radv: Format.

Using find ./src/amd/vulkan -regex '.*/.*\.\(c\|h\|cpp\)' | xargs -P8 -n1 clang-format --style=file -i Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10091>
author: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> 2021-04-10 03:24:05 +0200
committer: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> 2021-04-10 03:31:58 +0200
commit: 59c501ca353f8ec9d2717c98af2bfa1a1dbf4d75 (patch)
tree: dd56c73e05cea59c5c8931605bf9d5efc986677e /src
parent: 8451b41022757763a4a46eb597b9392e39a26b6a (diff)
65 files changed, 52880 insertions, 56833 deletions
diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c
index 30cd453b8e5..fc68d188dab 100644
--- a/src/amd/vulkan/layers/radv_sqtt_layer.c
+++ b/src/amd/vulkan/layers/radv_sqtt_layer.c
@@ -29,1215 +29,1071 @@
 
 static void
 radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer,
-				    enum rgp_sqtt_marker_general_api_type api_type)
+                                    enum rgp_sqtt_marker_general_api_type api_type)
 {
-	struct rgp_sqtt_marker_general_api marker = {0};
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct rgp_sqtt_marker_general_api marker = {0};
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
-	marker.api_type = api_type;
+   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
+   marker.api_type = api_type;
 
-	radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+   radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
 }
 
 static void
 radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer,
-				  enum rgp_sqtt_marker_general_api_type api_type)
+                                  enum rgp_sqtt_marker_general_api_type api_type)
 {
-	struct rgp_sqtt_marker_general_api marker = {0};
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct rgp_sqtt_marker_general_api marker = {0};
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
-	marker.api_type = api_type;
-	marker.is_end = 1;
+   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
+   marker.api_type = api_type;
+   marker.is_end = 1;
 
-	radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+   radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
 }
 
 static void
 radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer,
-			enum rgp_sqtt_marker_event_type api_type,
-			uint32_t vertex_offset_user_data,
-			uint32_t instance_offset_user_data,
-			uint32_t draw_index_user_data)
+                        enum rgp_sqtt_marker_event_type api_type, uint32_t vertex_offset_user_data,
+                        uint32_t instance_offset_user_data, uint32_t draw_index_user_data)
 {
-	struct rgp_sqtt_marker_event marker = {0};
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct rgp_sqtt_marker_event marker = {0};
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
-	marker.api_type = api_type;
-	marker.cmd_id = cmd_buffer->state.num_events++;
-	marker.cb_id = 0;
+   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
+   marker.api_type = api_type;
+   marker.cmd_id = cmd_buffer->state.num_events++;
+   marker.cb_id = 0;
 
-	if (vertex_offset_user_data == UINT_MAX ||
-	    instance_offset_user_data == UINT_MAX) {
-		vertex_offset_user_data = 0;
-		instance_offset_user_data = 0;
-	}
+   if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) {
+      vertex_offset_user_data = 0;
+      instance_offset_user_data = 0;
+   }
 
-	if (draw_index_user_data == UINT_MAX)
-		draw_index_user_data = vertex_offset_user_data;
+   if (draw_index_user_data == UINT_MAX)
+      draw_index_user_data = vertex_offset_user_data;
 
-	marker.vertex_offset_reg_idx = vertex_offset_user_data;
-	marker.instance_offset_reg_idx = instance_offset_user_data;
-	marker.draw_index_reg_idx = draw_index_user_data;
+   marker.vertex_offset_reg_idx = vertex_offset_user_data;
+   marker.instance_offset_reg_idx = instance_offset_user_data;
+   marker.draw_index_reg_idx = draw_index_user_data;
 
-	radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+   radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
 }
 
 static void
 radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer,
-				  enum rgp_sqtt_marker_event_type api_type,
-				  uint32_t x, uint32_t y, uint32_t z)
+                                  enum rgp_sqtt_marker_event_type api_type, uint32_t x, uint32_t y,
+                                  uint32_t z)
 {
-	struct rgp_sqtt_marker_event_with_dims marker = {0};
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct rgp_sqtt_marker_event_with_dims marker = {0};
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
-	marker.event.api_type = api_type;
-	marker.event.cmd_id = cmd_buffer->state.num_events++;
-	marker.event.cb_id = 0;
-	marker.event.has_thread_dims = 1;
+   marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
+   marker.event.api_type = api_type;
+   marker.event.cmd_id = cmd_buffer->state.num_events++;
+   marker.event.cb_id = 0;
+   marker.event.has_thread_dims = 1;
 
-	marker.thread_x = x;
-	marker.thread_y = y;
-	marker.thread_z = z;
+   marker.thread_x = x;
+   marker.thread_y = y;
+   marker.thread_z = z;
 
-	radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+   radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
 }
 
 static void
 radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer,
-			     enum rgp_sqtt_marker_user_event_type type,
-			     const char *str)
+                             enum rgp_sqtt_marker_user_event_type type, const char *str)
 {
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	if (type == UserEventPop) {
-		assert (str == NULL);
-		struct rgp_sqtt_marker_user_event marker = { 0 };
-		marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
-		marker.data_type = type;
+   if (type == UserEventPop) {
+      assert(str == NULL);
+      struct rgp_sqtt_marker_user_event marker = {0};
+      marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
+      marker.data_type = type;
 
-		radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
-	} else {
-		assert (str != NULL);
-		unsigned len = strlen(str);
-		struct rgp_sqtt_marker_user_event_with_length marker = { 0 };
-		marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
-		marker.user_event.data_type = type;
-		marker.length = align(len, 4);
+      radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+   } else {
+      assert(str != NULL);
+      unsigned len = strlen(str);
+      struct rgp_sqtt_marker_user_event_with_length marker = {0};
+      marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
+      marker.user_event.data_type = type;
+      marker.length = align(len, 4);
 
-		uint8_t *buffer = alloca(sizeof(marker) + marker.length);
-		memset(buffer, 0, sizeof(marker) + marker.length);
-		memcpy(buffer, &marker, sizeof(marker));
-		memcpy(buffer + sizeof(marker), str, len);
+      uint8_t *buffer = alloca(sizeof(marker) + marker.length);
+      memset(buffer, 0, sizeof(marker) + marker.length);
+      memcpy(buffer, &marker, sizeof(marker));
+      memcpy(buffer + sizeof(marker), str, len);
 
-		radv_emit_thread_trace_userdata(cmd_buffer->device, cs, buffer, sizeof(marker) / 4 + marker.length / 4);
-	}
+      radv_emit_thread_trace_userdata(cmd_buffer->device, cs, buffer,
+                                      sizeof(marker) / 4 + marker.length / 4);
+   }
 }
 
 void
 radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 {
-	uint64_t device_id = (uintptr_t)cmd_buffer->device;
-	struct rgp_sqtt_marker_cb_start marker = {0};
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t device_id = (uintptr_t)cmd_buffer->device;
+   struct rgp_sqtt_marker_cb_start marker = {0};
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	if (likely(!cmd_buffer->device->thread_trace.bo))
-		return;
+   if (likely(!cmd_buffer->device->thread_trace.bo))
+      return;
 
-	marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
-	marker.cb_id = 0;
-	marker.device_id_low = device_id;
-	marker.device_id_high = device_id >> 32;
-	marker.queue = cmd_buffer->queue_family_index;
-	marker.queue_flags = VK_QUEUE_COMPUTE_BIT |
-			     VK_QUEUE_TRANSFER_BIT |
-			     VK_QUEUE_SPARSE_BINDING_BIT;
+   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
+   marker.cb_id = 0;
+   marker.device_id_low = device_id;
+   marker.device_id_high = device_id >> 32;
+   marker.queue = cmd_buffer->queue_family_index;
+   marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT;
 
-	if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
-		marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
+   if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
+      marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
 
-	radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+   radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
 }
 
 void
 radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 {
-	uint64_t device_id = (uintptr_t)cmd_buffer->device;
-	struct rgp_sqtt_marker_cb_end marker = {0};
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t device_id = (uintptr_t)cmd_buffer->device;
+   struct rgp_sqtt_marker_cb_end marker = {0};
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	if (likely(!cmd_buffer->device->thread_trace.bo))
-		return;
+   if (likely(!cmd_buffer->device->thread_trace.bo))
+      return;
 
-	marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
-	marker.cb_id = 0;
-	marker.device_id_low = device_id;
-	marker.device_id_high = device_id >> 32;
+   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
+   marker.cb_id = 0;
+   marker.device_id_low = device_id;
+   marker.device_id_high = device_id >> 32;
 
-	radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+   radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
 }
 
 void
 radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
 {
-	if (likely(!cmd_buffer->device->thread_trace.bo))
-		return;
+   if (likely(!cmd_buffer->device->thread_trace.bo))
+      return;
 
-	radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type,
-				UINT_MAX, UINT_MAX, UINT_MAX);
+   radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX,
+                           UINT_MAX);
 }
 
 void
 radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z)
 {
-	if (likely(!cmd_buffer->device->thread_trace.bo))
-		return;
+   if (likely(!cmd_buffer->device->thread_trace.bo))
+      return;
 
-	radv_write_event_with_dims_marker(cmd_buffer,
-					  cmd_buffer->state.current_event_type,
-					  x, y, z);
+   radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, x, y, z);
 }
 
 void
 radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
-				      VkImageAspectFlagBits aspects)
+                                      VkImageAspectFlagBits aspects)
 {
-	cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ?
-		EventRenderPassColorClear : EventRenderPassDepthStencilClear;
+   cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT)
+                                             ? EventRenderPassColorClear
+                                             : EventRenderPassDepthStencilClear;
 }
 
 void
 radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer)
 {
-	cmd_buffer->state.current_event_type = EventInternalUnknown;
+   cmd_buffer->state.current_event_type = EventInternalUnknown;
 }
 
 void
 radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
 {
-	cmd_buffer->state.current_event_type = EventRenderPassResolve;
+   cmd_buffer->state.current_event_type = EventRenderPassResolve;
 }
 
 void
 radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
 {
-	cmd_buffer->state.current_event_type = EventInternalUnknown;
+   cmd_buffer->state.current_event_type = EventInternalUnknown;
 }
 
 void
 radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct rgp_sqtt_marker_barrier_end marker = {0};
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-
-	if (likely(!cmd_buffer->device->thread_trace.bo) ||
-	    !cmd_buffer->state.pending_sqtt_barrier_end)
-		return;
-
-	cmd_buffer->state.pending_sqtt_barrier_end = false;
-
-	marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
-	marker.cb_id = 0;
-
-	marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
-
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
-		marker.wait_on_eop_ts = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
-		marker.vs_partial_flush = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
-		marker.ps_partial_flush = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
-		marker.cs_partial_flush = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
-		marker.pfp_sync_me = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
-		marker.sync_cp_dma = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
-		marker.inval_tcp = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
-		marker.inval_sqI = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
-		marker.inval_sqK = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
-		marker.flush_tcc = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
-		marker.inval_tcc = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
-		marker.flush_cb = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
-		marker.inval_cb = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
-		marker.flush_db = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
-		marker.inval_db = true;
-	if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
-		marker.inval_gl1 = true;
-
-	radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
-
-	cmd_buffer->state.num_layout_transitions = 0;
+   struct rgp_sqtt_marker_barrier_end marker = {0};
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+   if (likely(!cmd_buffer->device->thread_trace.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
+      return;
+
+   cmd_buffer->state.pending_sqtt_barrier_end = false;
+
+   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
+   marker.cb_id = 0;
+
+   marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
+
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
+      marker.wait_on_eop_ts = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
+      marker.vs_partial_flush = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
+      marker.ps_partial_flush = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
+      marker.cs_partial_flush = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
+      marker.pfp_sync_me = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
+      marker.sync_cp_dma = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
+      marker.inval_tcp = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
+      marker.inval_sqI = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
+      marker.inval_sqK = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
+      marker.flush_tcc = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
+      marker.inval_tcc = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
+      marker.flush_cb = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
+      marker.inval_cb = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
+      marker.flush_db = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
+      marker.inval_db = true;
+   if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
+      marker.inval_gl1 = true;
+
+   radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+
+   cmd_buffer->state.num_layout_transitions = 0;
 }
 
 void
-radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
-			   enum rgp_barrier_reason reason)
+radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason)
 {
-	struct rgp_sqtt_marker_barrier_start marker = {0};
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct rgp_sqtt_marker_barrier_start marker = {0};
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	if (likely(!cmd_buffer->device->thread_trace.bo))
-		return;
+   if (likely(!cmd_buffer->device->thread_trace.bo))
+      return;
 
-	radv_describe_barrier_end_delayed(cmd_buffer);
-	cmd_buffer->state.sqtt_flush_bits = 0;
+   radv_describe_barrier_end_delayed(cmd_buffer);
+   cmd_buffer->state.sqtt_flush_bits = 0;
 
-	marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
-	marker.cb_id = 0;
-	marker.dword02 = reason;
+   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
+   marker.cb_id = 0;
+   marker.dword02 = reason;
 
-	radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+   radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
 }
 
 void
 radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer)
 {
-	cmd_buffer->state.pending_sqtt_barrier_end = true;
+   cmd_buffer->state.pending_sqtt_barrier_end = true;
 }
 
 void
 radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
-				const struct radv_barrier_data *barrier)
+                                const struct radv_barrier_data *barrier)
 {
-	struct rgp_sqtt_marker_layout_transition marker = {0};
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct rgp_sqtt_marker_layout_transition marker = {0};
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	if (likely(!cmd_buffer->device->thread_trace.bo))
-		return;
+   if (likely(!cmd_buffer->device->thread_trace.bo))
+      return;
 
-	marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
-	marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
-	marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
-	marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
-	marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
-	marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
-	marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
-	marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
-	marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
+   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
+   marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
+   marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
+   marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
+   marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
+   marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
+   marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
+   marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
+   marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
 
-	radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+   radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
 
-	cmd_buffer->state.num_layout_transitions++;
+   cmd_buffer->state.num_layout_transitions++;
 }
 
 static void
 radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer,
-			    VkPipelineBindPoint pipelineBindPoint,
-			    struct radv_pipeline *pipeline)
+                            VkPipelineBindPoint pipelineBindPoint, struct radv_pipeline *pipeline)
 {
-	struct rgp_sqtt_marker_pipeline_bind marker = {0};
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct rgp_sqtt_marker_pipeline_bind marker = {0};
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	if (likely(!cmd_buffer->device->thread_trace.bo))
-		return;
+   if (likely(!cmd_buffer->device->thread_trace.bo))
+      return;
 
-	marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
-	marker.cb_id = 0;
-	marker.bind_point = pipelineBindPoint;
-	marker.api_pso_hash[0] = pipeline->pipeline_hash;
-	marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
+   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
+   marker.cb_id = 0;
+   marker.bind_point = pipelineBindPoint;
+   marker.api_pso_hash[0] = pipeline->pipeline_hash;
+   marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
 
-	radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+   radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
 }
 
 /* TODO: Improve the way to trigger capture (overlay, etc). */
 static void
 radv_handle_thread_trace(VkQueue _queue)
 {
-	RADV_FROM_HANDLE(radv_queue, queue, _queue);
-	static bool thread_trace_enabled = false;
-	static uint64_t num_frames = 0;
-	bool resize_trigger = false;
-
-	if (thread_trace_enabled) {
-		struct ac_thread_trace thread_trace = {0};
-
-		radv_end_thread_trace(queue);
-		thread_trace_enabled = false;
-
-		/* TODO: Do something better than this whole sync. */
-		radv_QueueWaitIdle(_queue);
-
-		if (radv_get_thread_trace(queue, &thread_trace)) {
-			ac_dump_thread_trace(&queue->device->physical_device->rad_info,
-					     &thread_trace,
-					     &queue->device->thread_trace);
-		} else {
-			/* Trigger a new capture if the driver failed to get
-			 * the trace because the buffer was too small.
-			 */
-			resize_trigger = true;
-		}
-	}
-
-	if (!thread_trace_enabled) {
-		bool frame_trigger = num_frames == queue->device->thread_trace.start_frame;
-		bool file_trigger = false;
+   RADV_FROM_HANDLE(radv_queue, queue, _queue);
+   static bool thread_trace_enabled = false;
+   static uint64_t num_frames = 0;
+   bool resize_trigger = false;
+
+   if (thread_trace_enabled) {
+      struct ac_thread_trace thread_trace = {0};
+
+      radv_end_thread_trace(queue);
+      thread_trace_enabled = false;
+
+      /* TODO: Do something better than this whole sync. */
+      radv_QueueWaitIdle(_queue);
+
+      if (radv_get_thread_trace(queue, &thread_trace)) {
+         ac_dump_thread_trace(&queue->device->physical_device->rad_info, &thread_trace,
+                              &queue->device->thread_trace);
+      } else {
+         /* Trigger a new capture if the driver failed to get
+          * the trace because the buffer was too small.
+          */
+         resize_trigger = true;
+      }
+   }
+
+   if (!thread_trace_enabled) {
+      bool frame_trigger = num_frames == queue->device->thread_trace.start_frame;
+      bool file_trigger = false;
 #ifndef _WIN32
-		if (queue->device->thread_trace.trigger_file &&
-		    access(queue->device->thread_trace.trigger_file, W_OK) == 0) {
-			if (unlink(queue->device->thread_trace.trigger_file) == 0) {
-				file_trigger = true;
-			} else {
-				/* Do not enable tracing if we cannot remove the file,
-				 * because by then we'll trace every frame ... */
-				fprintf(stderr, "RADV: could not remove thread trace trigger file, ignoring\n");
-			}
-		}
+      if (queue->device->thread_trace.trigger_file &&
+          access(queue->device->thread_trace.trigger_file, W_OK) == 0) {
+         if (unlink(queue->device->thread_trace.trigger_file) == 0) {
+            file_trigger = true;
+         } else {
+            /* Do not enable tracing if we cannot remove the file,
+             * because by then we'll trace every frame ... */
+            fprintf(stderr, "RADV: could not remove thread trace trigger file, ignoring\n");
+         }
+      }
 #endif
 
-		if (frame_trigger || file_trigger || resize_trigger) {
-			/* FIXME: SQTT on compute hangs. */
-			if (queue->queue_family_index == RADV_QUEUE_COMPUTE) {
-				fprintf(stderr, "RADV: Capturing a SQTT trace on the compute "
-						"queue is currently broken and might hang! "
-						"Please, disable presenting on compute if "
-						"you can.\n");
-				return;
-			}
+      if (frame_trigger || file_trigger || resize_trigger) {
+         /* FIXME: SQTT on compute hangs. */
+         if (queue->queue_family_index == RADV_QUEUE_COMPUTE) {
+            fprintf(stderr, "RADV: Capturing a SQTT trace on the compute "
+                            "queue is currently broken and might hang! "
+                            "Please, disable presenting on compute if "
+                            "you can.\n");
+            return;
+         }
 
-			radv_begin_thread_trace(queue);
-			assert(!thread_trace_enabled);
-			thread_trace_enabled = true;
-		}
-	}
-	num_frames++;
+         radv_begin_thread_trace(queue);
+         assert(!thread_trace_enabled);
+         thread_trace_enabled = true;
+      }
+   }
+   num_frames++;
 }
 
-VkResult sqtt_QueuePresentKHR(
-	VkQueue                                  _queue,
-	const VkPresentInfoKHR*                  pPresentInfo)
+VkResult
+sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
 {
-	VkResult result;
+   VkResult result;
 
-	result = radv_QueuePresentKHR(_queue, pPresentInfo);
-	if (result != VK_SUCCESS)
-		return result;
+   result = radv_QueuePresentKHR(_queue, pPresentInfo);
+   if (result != VK_SUCCESS)
+      return result;
 
-	radv_handle_thread_trace(_queue);
+   radv_handle_thread_trace(_queue);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) \
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
-	radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
-	cmd_buffer->state.current_event_type = EventCmd##api_name; \
-	radv_Cmd##cmd_name(__VA_ARGS__); \
-	cmd_buffer->state.current_event_type = EventInternalUnknown; \
-	radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
+#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...)                                                \
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);                                   \
+   radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name);                              \
+   cmd_buffer->state.current_event_type = EventCmd##api_name;                                      \
+   radv_Cmd##cmd_name(__VA_ARGS__);                                                                \
+   cmd_buffer->state.current_event_type = EventInternalUnknown;                                    \
+   radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
 
-#define EVENT_MARKER(cmd_name, ...) \
-	EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
+#define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
 
-void sqtt_CmdDraw(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    vertexCount,
-	uint32_t                                    instanceCount,
-	uint32_t                                    firstVertex,
-	uint32_t                                    firstInstance)
+void
+sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
+             uint32_t firstVertex, uint32_t firstInstance)
 {
-	EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount,
-		     firstVertex, firstInstance);
+   EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
 }
 
-void sqtt_CmdDrawIndexed(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    indexCount,
-	uint32_t                                    instanceCount,
-	uint32_t                                    firstIndex,
-	int32_t                                     vertexOffset,
-	uint32_t                                    firstInstance)
+void
+sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
+                    uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
 {
-	EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount,
-		     firstIndex, vertexOffset, firstInstance);
+   EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset,
+                firstInstance);
 }
 
-void sqtt_CmdDrawIndirect(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    buffer,
-	VkDeviceSize                                offset,
-	uint32_t                                    drawCount,
-	uint32_t                                    stride)
+void
+sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+                     uint32_t drawCount, uint32_t stride)
 {
-	EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount,
-		     stride);
+   EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);
 }
 
-void sqtt_CmdDrawIndexedIndirect(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    buffer,
-	VkDeviceSize                                offset,
-	uint32_t                                    drawCount,
-	uint32_t                                    stride)
+void
+sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+                            uint32_t drawCount, uint32_t stride)
 {
-	EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset,
-		     drawCount, stride);
+   EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);
 }
 
-void sqtt_CmdDrawIndirectCount(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    buffer,
-	VkDeviceSize                                offset,
-	VkBuffer                                    countBuffer,
-	VkDeviceSize                                countBufferOffset,
-	uint32_t                                    maxDrawCount,
-	uint32_t                                    stride)
+void
+sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+                          VkBuffer countBuffer, VkDeviceSize countBufferOffset,
+                          uint32_t maxDrawCount, uint32_t stride)
 {
-	EVENT_MARKER(DrawIndirectCount,commandBuffer, buffer, offset,
-		     countBuffer, countBufferOffset, maxDrawCount, stride);
+   EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
+                maxDrawCount, stride);
 }
 
-void sqtt_CmdDrawIndexedIndirectCount(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    buffer,
-	VkDeviceSize                                offset,
-	VkBuffer                                    countBuffer,
-	VkDeviceSize                                countBufferOffset,
-	uint32_t                                    maxDrawCount,
-	uint32_t                                    stride)
+void
+sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer,
+                                 VkDeviceSize offset, VkBuffer countBuffer,
+                                 VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
+                                 uint32_t stride)
 {
-	EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset,
-		     countBuffer, countBufferOffset, maxDrawCount, stride);
+   EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer,
+                countBufferOffset, maxDrawCount, stride);
 }
 
-void sqtt_CmdDispatch(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    x,
-	uint32_t                                    y,
-	uint32_t                                    z)
+void
+sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
 {
-	EVENT_MARKER(Dispatch, commandBuffer, x, y, z);
+   EVENT_MARKER(Dispatch, commandBuffer, x, y, z);
 }
 
-void sqtt_CmdDispatchIndirect(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    buffer,
-	VkDeviceSize                                offset)
+void
+sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)
 {
-	EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
+   EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
 }
 
-void sqtt_CmdCopyBuffer2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkCopyBufferInfo2KHR*                 pCopyBufferInfo)
+void
+sqtt_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2KHR *pCopyBufferInfo)
 {
-	EVENT_MARKER_ALIAS(CopyBuffer2KHR, CopyBuffer, commandBuffer,
-			   pCopyBufferInfo);
+   EVENT_MARKER_ALIAS(CopyBuffer2KHR, CopyBuffer, commandBuffer, pCopyBufferInfo);
 }
 
-void sqtt_CmdFillBuffer(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    dstBuffer,
-	VkDeviceSize                                dstOffset,
-	VkDeviceSize                                fillSize,
-	uint32_t                                    data)
+void
+sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
+                   VkDeviceSize fillSize, uint32_t data)
 {
-	EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize,
-		     data);
+   EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);
 }
 
-void sqtt_CmdUpdateBuffer(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    dstBuffer,
-	VkDeviceSize                                dstOffset,
-	VkDeviceSize                                dataSize,
-	const void*                                 pData)
+void
+sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
+                     VkDeviceSize dataSize, const void *pData)
 {
-	EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset,
-		     dataSize, pData);
+   EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);
 }
 
-void sqtt_CmdCopyImage2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkCopyImageInfo2KHR*                  pCopyImageInfo)
+void
+sqtt_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, const VkCopyImageInfo2KHR *pCopyImageInfo)
 {
-	EVENT_MARKER_ALIAS(CopyImage2KHR, CopyImage, commandBuffer,
-			   pCopyImageInfo);
+   EVENT_MARKER_ALIAS(CopyImage2KHR, CopyImage, commandBuffer, pCopyImageInfo);
 }
 
-void sqtt_CmdCopyBufferToImage2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkCopyBufferToImageInfo2KHR*          pCopyBufferToImageInfo)
+void
+sqtt_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
+                              const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo)
 {
-	EVENT_MARKER_ALIAS(CopyBufferToImage2KHR, CopyBufferToImage,
-			   commandBuffer, pCopyBufferToImageInfo);
+   EVENT_MARKER_ALIAS(CopyBufferToImage2KHR, CopyBufferToImage, commandBuffer,
+                      pCopyBufferToImageInfo);
 }
 
-void sqtt_CmdCopyImageToBuffer2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkCopyImageToBufferInfo2KHR*          pCopyImageToBufferInfo)
+void
+sqtt_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
+                              const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo)
 {
-	EVENT_MARKER_ALIAS(CopyImageToBuffer2KHR, CopyImageToBuffer,
-			   commandBuffer, pCopyImageToBufferInfo);
+   EVENT_MARKER_ALIAS(CopyImageToBuffer2KHR, CopyImageToBuffer, commandBuffer,
+                      pCopyImageToBufferInfo);
 }
 
-void sqtt_CmdBlitImage2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkBlitImageInfo2KHR*                  pBlitImageInfo)
+void
+sqtt_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, const VkBlitImageInfo2KHR *pBlitImageInfo)
 {
-	EVENT_MARKER_ALIAS(BlitImage2KHR, BlitImage, commandBuffer,
-			   pBlitImageInfo);
+   EVENT_MARKER_ALIAS(BlitImage2KHR, BlitImage, commandBuffer, pBlitImageInfo);
 }
 
-void sqtt_CmdClearColorImage(
-	VkCommandBuffer                             commandBuffer,
-	VkImage                                     image_h,
-	VkImageLayout                               imageLayout,
-	const VkClearColorValue*                    pColor,
-	uint32_t                                    rangeCount,
-	const VkImageSubresourceRange*              pRanges)
+void
+sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
+                        const VkClearColorValue *pColor, uint32_t rangeCount,
+                        const VkImageSubresourceRange *pRanges)
 {
-	EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout,
-		     pColor, rangeCount, pRanges);
+   EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);
 }
 
-void sqtt_CmdClearDepthStencilImage(
-	VkCommandBuffer                             commandBuffer,
-	VkImage                                     image_h,
-	VkImageLayout                               imageLayout,
-	const VkClearDepthStencilValue*             pDepthStencil,
-	uint32_t                                    rangeCount,
-	const VkImageSubresourceRange*              pRanges)
+void
+sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,
+                               VkImageLayout imageLayout,
+                               const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
+                               const VkImageSubresourceRange *pRanges)
 {
-	EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h,
-		     imageLayout, pDepthStencil, rangeCount, pRanges);
+   EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil,
+                rangeCount, pRanges);
 }
 
-void sqtt_CmdClearAttachments(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    attachmentCount,
-	const VkClearAttachment*                    pAttachments,
-	uint32_t                                    rectCount,
-	const VkClearRect*                          pRects)
+void
+sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
+                         const VkClearAttachment *pAttachments, uint32_t rectCount,
+                         const VkClearRect *pRects)
 {
-	EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount,
-		     pAttachments, rectCount, pRects);
+   EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
 }
 
-void sqtt_CmdResolveImage2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkResolveImageInfo2KHR*               pResolveImageInfo)
+void
+sqtt_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
+                         const VkResolveImageInfo2KHR *pResolveImageInfo)
 {
-	EVENT_MARKER_ALIAS(ResolveImage2KHR, ResolveImage, commandBuffer,
-			   pResolveImageInfo);
+   EVENT_MARKER_ALIAS(ResolveImage2KHR, ResolveImage, commandBuffer, pResolveImageInfo);
 }
 
-void sqtt_CmdWaitEvents(VkCommandBuffer commandBuffer,
-			uint32_t eventCount,
-			const VkEvent* pEvents,
-			VkPipelineStageFlags srcStageMask,
-			VkPipelineStageFlags dstStageMask,
-			uint32_t memoryBarrierCount,
-			const VkMemoryBarrier* pMemoryBarriers,
-			uint32_t bufferMemoryBarrierCount,
-			const VkBufferMemoryBarrier* pBufferMemoryBarriers,
-			uint32_t imageMemoryBarrierCount,
-			const VkImageMemoryBarrier* pImageMemoryBarriers)
+void
+sqtt_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
+                   VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,
+                   uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
+                   uint32_t bufferMemoryBarrierCount,
+                   const VkBufferMemoryBarrier *pBufferMemoryBarriers,
+                   uint32_t imageMemoryBarrierCount,
+                   const VkImageMemoryBarrier *pImageMemoryBarriers)
 {
-	EVENT_MARKER(WaitEvents, commandBuffer, eventCount, pEvents,
-		     srcStageMask, dstStageMask, memoryBarrierCount,
-		     pMemoryBarriers, bufferMemoryBarrierCount,
-		     pBufferMemoryBarriers, imageMemoryBarrierCount,
-		     pImageMemoryBarriers);
+   EVENT_MARKER(WaitEvents, commandBuffer, eventCount, pEvents, srcStageMask, dstStageMask,
+                memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
+                pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);
 }
 
-void sqtt_CmdPipelineBarrier(
-	VkCommandBuffer                             commandBuffer,
-	VkPipelineStageFlags                        srcStageMask,
-	VkPipelineStageFlags                        destStageMask,
-	VkBool32                                    byRegion,
-	uint32_t                                    memoryBarrierCount,
-	const VkMemoryBarrier*                      pMemoryBarriers,
-	uint32_t                                    bufferMemoryBarrierCount,
-	const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
-	uint32_t                                    imageMemoryBarrierCount,
-	const VkImageMemoryBarrier*                 pImageMemoryBarriers)
+void
+sqtt_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask,
+                        VkPipelineStageFlags destStageMask, VkBool32 byRegion,
+                        uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
+                        uint32_t bufferMemoryBarrierCount,
+                        const VkBufferMemoryBarrier *pBufferMemoryBarriers,
+                        uint32_t imageMemoryBarrierCount,
+                        const VkImageMemoryBarrier *pImageMemoryBarriers)
 {
-	EVENT_MARKER(PipelineBarrier, commandBuffer, srcStageMask,
-		     destStageMask, byRegion, memoryBarrierCount,
-		     pMemoryBarriers, bufferMemoryBarrierCount,
-		     pBufferMemoryBarriers, imageMemoryBarrierCount,
-		     pImageMemoryBarriers);
+   EVENT_MARKER(PipelineBarrier, commandBuffer, srcStageMask, destStageMask, byRegion,
+                memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
+                pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);
 }
 
-void sqtt_CmdResetQueryPool(
-	VkCommandBuffer                             commandBuffer,
-	VkQueryPool                                 queryPool,
-	uint32_t                                    firstQuery,
-	uint32_t                                    queryCount)
+void
+sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
+                       uint32_t queryCount)
 {
-	EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery,
-		     queryCount);
+   EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);
 }
 
-void sqtt_CmdCopyQueryPoolResults(
-	VkCommandBuffer                             commandBuffer,
-	VkQueryPool                                 queryPool,
-	uint32_t                                    firstQuery,
-	uint32_t                                    queryCount,
-	VkBuffer                                    dstBuffer,
-	VkDeviceSize                                dstOffset,
-	VkDeviceSize                                stride,
-	VkQueryResultFlags                          flags)
-{
-	EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery,
-				     queryCount, dstBuffer, dstOffset, stride,
-				     flags);
+void
+sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
+                             uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer,
+                             VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags)
+{
+   EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer,
+                dstOffset, stride, flags);
 }
 
 #undef EVENT_MARKER
-#define API_MARKER_ALIAS(cmd_name, api_name, ...) \
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
-	radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
-	radv_Cmd##cmd_name(__VA_ARGS__); \
-	radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
+#define API_MARKER_ALIAS(cmd_name, api_name, ...)                                                  \
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);                                   \
+   radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name);                              \
+   radv_Cmd##cmd_name(__VA_ARGS__);                                                                \
+   radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
 
-#define API_MARKER(cmd_name, ...) \
-	API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
+#define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
 
 static bool
 radv_sqtt_dump_pipeline()
 {
-	return getenv("RADV_THREAD_TRACE_PIPELINE");
+   return getenv("RADV_THREAD_TRACE_PIPELINE");
 }
 
-void sqtt_CmdBindPipeline(
-	VkCommandBuffer                             commandBuffer,
-	VkPipelineBindPoint                         pipelineBindPoint,
-	VkPipeline                                  _pipeline)
+void
+sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+                     VkPipeline _pipeline)
 {
-	RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
 
-	API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
+   API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
 
-	if (radv_sqtt_dump_pipeline())
-		radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
+   if (radv_sqtt_dump_pipeline())
+      radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
 }
 
-void sqtt_CmdBindDescriptorSets(
-	VkCommandBuffer                             commandBuffer,
-	VkPipelineBindPoint                         pipelineBindPoint,
-	VkPipelineLayout                            layout,
-	uint32_t                                    firstSet,
-	uint32_t                                    descriptorSetCount,
-	const VkDescriptorSet*                      pDescriptorSets,
-	uint32_t                                    dynamicOffsetCount,
-	const uint32_t*                             pDynamicOffsets)
+void
+sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+                           VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount,
+                           const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
+                           const uint32_t *pDynamicOffsets)
 {
-	API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint,
-		   layout, firstSet, descriptorSetCount,
-		   pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
+   API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet,
+              descriptorSetCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
 }
 
-void sqtt_CmdBindIndexBuffer(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer				    buffer,
-	VkDeviceSize				    offset,
-	VkIndexType				    indexType)
+void
+sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+                        VkIndexType indexType)
 {
-	API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
+   API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
 }
 
-void sqtt_CmdBindVertexBuffers(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    firstBinding,
-	uint32_t                                    bindingCount,
-	const VkBuffer*                             pBuffers,
-	const VkDeviceSize*                         pOffsets)
+void
+sqtt_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding,
+                          uint32_t bindingCount, const VkBuffer *pBuffers,
+                          const VkDeviceSize *pOffsets)
 {
-	API_MARKER(BindVertexBuffers, commandBuffer, firstBinding, bindingCount,
-		   pBuffers, pOffsets);
+   API_MARKER(BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers, pOffsets);
 }
 
-void sqtt_CmdBeginQuery(
-	VkCommandBuffer                             commandBuffer,
-	VkQueryPool                                 queryPool,
-	uint32_t                                    query,
-	VkQueryControlFlags                         flags)
+void
+sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
+                   VkQueryControlFlags flags)
 {
-	API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
+   API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
 }
 
-void sqtt_CmdEndQuery(
-	VkCommandBuffer                             commandBuffer,
-	VkQueryPool                                 queryPool,
-	uint32_t                                    query)
+void
+sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
 {
-	API_MARKER(EndQuery, commandBuffer, queryPool, query);
+   API_MARKER(EndQuery, commandBuffer, queryPool, query);
 }
 
-void sqtt_CmdWriteTimestamp(
-	VkCommandBuffer                             commandBuffer,
-	VkPipelineStageFlagBits                     pipelineStage,
-	VkQueryPool                                 queryPool,
-	uint32_t				    flags)
+void
+sqtt_CmdWriteTimestamp(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage,
+                       VkQueryPool queryPool, uint32_t flags)
 {
-	API_MARKER(WriteTimestamp, commandBuffer, pipelineStage, queryPool, flags);
+   API_MARKER(WriteTimestamp, commandBuffer, pipelineStage, queryPool, flags);
 }
 
-void sqtt_CmdPushConstants(
-	VkCommandBuffer				    commandBuffer,
-	VkPipelineLayout			    layout,
-	VkShaderStageFlags			    stageFlags,
-	uint32_t				    offset,
-	uint32_t				    size,
-	const void*				    pValues)
+void
+sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
+                      VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
+                      const void *pValues)
 {
-	API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset,
-		   size, pValues);
+   API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
 }
 
-void sqtt_CmdBeginRenderPass2(
-	VkCommandBuffer                             commandBuffer,
-	const VkRenderPassBeginInfo*                pRenderPassBeginInfo,
-	const VkSubpassBeginInfo*                   pSubpassBeginInfo)
+void
+sqtt_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
+                         const VkRenderPassBeginInfo *pRenderPassBeginInfo,
+                         const VkSubpassBeginInfo *pSubpassBeginInfo)
 {
-	API_MARKER_ALIAS(BeginRenderPass2, BeginRenderPass, commandBuffer,
-			 pRenderPassBeginInfo, pSubpassBeginInfo);
+   API_MARKER_ALIAS(BeginRenderPass2, BeginRenderPass, commandBuffer, pRenderPassBeginInfo,
+                    pSubpassBeginInfo);
 }
 
-void sqtt_CmdNextSubpass2(
-	VkCommandBuffer                             commandBuffer,
-	const VkSubpassBeginInfo*                   pSubpassBeginInfo,
-	const VkSubpassEndInfo*                     pSubpassEndInfo)
+void
+sqtt_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo,
+                     const VkSubpassEndInfo *pSubpassEndInfo)
 {
-	API_MARKER_ALIAS(NextSubpass2, NextSubpass, commandBuffer,
-			 pSubpassBeginInfo, pSubpassEndInfo);
+   API_MARKER_ALIAS(NextSubpass2, NextSubpass, commandBuffer, pSubpassBeginInfo, pSubpassEndInfo);
 }
 
-void sqtt_CmdEndRenderPass2(
-	VkCommandBuffer                             commandBuffer,
-	const VkSubpassEndInfo*                     pSubpassEndInfo)
+void
+sqtt_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo)
 {
-	API_MARKER_ALIAS(EndRenderPass2, EndRenderPass, commandBuffer,
-			 pSubpassEndInfo);
+   API_MARKER_ALIAS(EndRenderPass2, EndRenderPass, commandBuffer, pSubpassEndInfo);
 }
 
-void sqtt_CmdExecuteCommands(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    commandBufferCount,
-	const VkCommandBuffer*                      pCmdBuffers)
+void
+sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
+                        const VkCommandBuffer *pCmdBuffers)
 {
-	API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount,
-		   pCmdBuffers);
+   API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);
 }
 
-void sqtt_CmdSetViewport(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    firstViewport,
-	uint32_t                                    viewportCount,
-	const VkViewport*                           pViewports)
+void
+sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
+                    const VkViewport *pViewports)
 {
-	API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount,
-		   pViewports);
+   API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports);
 }
 
-void sqtt_CmdSetScissor(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    firstScissor,
-	uint32_t                                    scissorCount,
-	const VkRect2D*                             pScissors)
+void
+sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
+                   const VkRect2D *pScissors)
 {
-	API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount,
-		   pScissors);
+   API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors);
 }
 
-void sqtt_CmdSetLineWidth(
-	VkCommandBuffer                             commandBuffer,
-	float                                       lineWidth)
+void
+sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
 {
-	API_MARKER(SetLineWidth, commandBuffer, lineWidth);
+   API_MARKER(SetLineWidth, commandBuffer, lineWidth);
 }
 
-void sqtt_CmdSetDepthBias(
-	VkCommandBuffer                             commandBuffer,
-	float                                       depthBiasConstantFactor,
-	float                                       depthBiasClamp,
-	float                                       depthBiasSlopeFactor)
+void
+sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor,
+                     float depthBiasClamp, float depthBiasSlopeFactor)
 {
-	API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor,
-		   depthBiasClamp, depthBiasSlopeFactor);
+   API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp,
+              depthBiasSlopeFactor);
 }
 
-void sqtt_CmdSetBlendConstants(
-	VkCommandBuffer                             commandBuffer,
-	const float                                 blendConstants[4])
+void
+sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
 {
-	API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
+   API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
 }
 
-void sqtt_CmdSetDepthBounds(
-	VkCommandBuffer                             commandBuffer,
-	float                                       minDepthBounds,
-	float                                       maxDepthBounds)
+void
+sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
 {
-	API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds,
-		   maxDepthBounds);
+   API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);
 }
 
-void sqtt_CmdSetStencilCompareMask(
-	VkCommandBuffer                             commandBuffer,
-	VkStencilFaceFlags                          faceMask,
-	uint32_t                                    compareMask)
+void
+sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+                              uint32_t compareMask)
 {
-	API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
+   API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
 }
 
-void sqtt_CmdSetStencilWriteMask(
-	VkCommandBuffer                             commandBuffer,
-	VkStencilFaceFlags                          faceMask,
-	uint32_t                                    writeMask)
+void
+sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+                            uint32_t writeMask)
 {
-	API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
+   API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
 }
 
-void sqtt_CmdSetStencilReference(
-	VkCommandBuffer                             commandBuffer,
-	VkStencilFaceFlags                          faceMask,
-	uint32_t                                    reference)
+void
+sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+                            uint32_t reference)
 {
-	API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
+   API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
 }
 
 /* VK_EXT_debug_marker */
-void sqtt_CmdDebugMarkerBeginEXT(
-	VkCommandBuffer                             commandBuffer,
-	const VkDebugMarkerMarkerInfoEXT*           pMarkerInfo)
+void
+sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,
+                            const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	radv_write_user_event_marker(cmd_buffer, UserEventPush,
-				     pMarkerInfo->pMarkerName);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);
 }
 
-void sqtt_CmdDebugMarkerEndEXT(
-	VkCommandBuffer                             commandBuffer)
+void
+sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
 }
 
-void sqtt_CmdDebugMarkerInsertEXT(
-	VkCommandBuffer                             commandBuffer,
-	const VkDebugMarkerMarkerInfoEXT*           pMarkerInfo)
+void
+sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,
+                             const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	radv_write_user_event_marker(cmd_buffer, UserEventTrigger,
-				     pMarkerInfo->pMarkerName);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);
 }
 
-VkResult sqtt_DebugMarkerSetObjectNameEXT(
-	VkDevice                                    device,
-	const VkDebugMarkerObjectNameInfoEXT*       pNameInfo)
+VkResult
+sqtt_DebugMarkerSetObjectNameEXT(VkDevice device, const VkDebugMarkerObjectNameInfoEXT *pNameInfo)
 {
-	/* no-op */
-	return VK_SUCCESS;
+   /* no-op */
+   return VK_SUCCESS;
 }
 
-VkResult sqtt_DebugMarkerSetObjectTagEXT(
-	VkDevice                                    device,
-	const VkDebugMarkerObjectTagInfoEXT*        pTagInfo)
+VkResult
+sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo)
 {
-	/* no-op */
-	return VK_SUCCESS;
+   /* no-op */
+   return VK_SUCCESS;
 }
 
 /* Pipelines */
 static enum rgp_hardware_stages
-radv_mesa_to_rgp_shader_stage(struct radv_pipeline *pipeline,
-			      gl_shader_stage stage)
-{
-	struct radv_shader_variant *shader = pipeline->shaders[stage];
-
-	switch (stage) {
-	case MESA_SHADER_VERTEX:
-		if (shader->info.vs.as_ls)
-			return RGP_HW_STAGE_LS;
-		else if (shader->info.vs.as_es)
-			return RGP_HW_STAGE_ES;
-		else if (shader->info.is_ngg)
-			return RGP_HW_STAGE_GS;
-		else
-			return RGP_HW_STAGE_VS;
-	case MESA_SHADER_TESS_CTRL:
-		return RGP_HW_STAGE_HS;
-	case MESA_SHADER_TESS_EVAL:
-		if (shader->info.tes.as_es)
-			return RGP_HW_STAGE_ES;
-		else if (shader->info.is_ngg)
-			return RGP_HW_STAGE_GS;
-		else
-			return RGP_HW_STAGE_VS;
-	case MESA_SHADER_GEOMETRY:
-		return RGP_HW_STAGE_GS;
-	case MESA_SHADER_FRAGMENT:
-		return RGP_HW_STAGE_PS;
-	case MESA_SHADER_COMPUTE:
-		return RGP_HW_STAGE_CS;
-	default:
-		unreachable("invalid mesa shader stage");
-	}
+radv_mesa_to_rgp_shader_stage(struct radv_pipeline *pipeline, gl_shader_stage stage)
+{
+   struct radv_shader_variant *shader = pipeline->shaders[stage];
+
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      if (shader->info.vs.as_ls)
+         return RGP_HW_STAGE_LS;
+      else if (shader->info.vs.as_es)
+         return RGP_HW_STAGE_ES;
+      else if (shader->info.is_ngg)
+         return RGP_HW_STAGE_GS;
+      else
+         return RGP_HW_STAGE_VS;
+   case MESA_SHADER_TESS_CTRL:
+      return RGP_HW_STAGE_HS;
+   case MESA_SHADER_TESS_EVAL:
+      if (shader->info.tes.as_es)
+         return RGP_HW_STAGE_ES;
+      else if (shader->info.is_ngg)
+         return RGP_HW_STAGE_GS;
+      else
+         return RGP_HW_STAGE_VS;
+   case MESA_SHADER_GEOMETRY:
+      return RGP_HW_STAGE_GS;
+   case MESA_SHADER_FRAGMENT:
+      return RGP_HW_STAGE_PS;
+   case MESA_SHADER_COMPUTE:
+      return RGP_HW_STAGE_CS;
+   default:
+      unreachable("invalid mesa shader stage");
+   }
 }
 
 static VkResult
-radv_add_code_object(struct radv_device *device,
-		     struct radv_pipeline *pipeline)
-{
-	struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
-	struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
-	struct rgp_code_object_record *record;
-
-	record = malloc(sizeof(struct rgp_code_object_record));
-	if (!record)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-	record->shader_stages_mask = 0;
-	record->num_shaders_combined = 0;
-	record->pipeline_hash[0] = pipeline->pipeline_hash;
-	record->pipeline_hash[1] = pipeline->pipeline_hash;
-
-	for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-		struct radv_shader_variant *shader = pipeline->shaders[i];
-		uint8_t *code;
-		uint64_t va;
-
-		if (!shader)
-			continue;
-
-		code = malloc(shader->code_size);
-		if (!code) {
-			free(record);
-			return VK_ERROR_OUT_OF_HOST_MEMORY;
-		}
-		memcpy(code, shader->code_ptr, shader->code_size);
-
-		va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-
-		record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader;
-		record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32;
-		record->shader_data[i].code_size = shader->code_size;
-		record->shader_data[i].code = code;
-		record->shader_data[i].vgpr_count = shader->config.num_vgprs;
-		record->shader_data[i].sgpr_count = shader->config.num_sgprs;
-		record->shader_data[i].base_address = va & 0xffffffffffff;
-		record->shader_data[i].elf_symbol_offset = 0;
-		record->shader_data[i].hw_stage = radv_mesa_to_rgp_shader_stage(pipeline, i);
-		record->shader_data[i].is_combined = false;
-
-		record->shader_stages_mask |= (1 << i);
-		record->num_shaders_combined++;
-	}
-
-	simple_mtx_lock(&code_object->lock);
-	list_addtail(&record->list, &code_object->record);
-	code_object->record_count++;
-	simple_mtx_unlock(&code_object->lock);
-
-	return VK_SUCCESS;
+radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
+{
+   struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
+   struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
+   struct rgp_code_object_record *record;
+
+   record = malloc(sizeof(struct rgp_code_object_record));
+   if (!record)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   record->shader_stages_mask = 0;
+   record->num_shaders_combined = 0;
+   record->pipeline_hash[0] = pipeline->pipeline_hash;
+   record->pipeline_hash[1] = pipeline->pipeline_hash;
+
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct radv_shader_variant *shader = pipeline->shaders[i];
+      uint8_t *code;
+      uint64_t va;
+
+      if (!shader)
+         continue;
+
+      code = malloc(shader->code_size);
+      if (!code) {
+         free(record);
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      }
+      memcpy(code, shader->code_ptr, shader->code_size);
+
+      va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+      record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader;
+      record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32;
+      record->shader_data[i].code_size = shader->code_size;
+      record->shader_data[i].code = code;
+      record->shader_data[i].vgpr_count = shader->config.num_vgprs;
+      record->shader_data[i].sgpr_count = shader->config.num_sgprs;
+      record->shader_data[i].base_address = va & 0xffffffffffff;
+      record->shader_data[i].elf_symbol_offset = 0;
+      record->shader_data[i].hw_stage = radv_mesa_to_rgp_shader_stage(pipeline, i);
+      record->shader_data[i].is_combined = false;
+
+      record->shader_stages_mask |= (1 << i);
+      record->num_shaders_combined++;
+   }
+
+   simple_mtx_lock(&code_object->lock);
+   list_addtail(&record->list, &code_object->record);
+   code_object->record_count++;
+   simple_mtx_unlock(&code_object->lock);
+
+   return VK_SUCCESS;
 }
 
 static VkResult
-radv_register_pipeline(struct radv_device *device,
-		       struct radv_pipeline *pipeline)
+radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
 {
-	bool result;
-	uint64_t base_va = ~0;
+   bool result;
+   uint64_t base_va = ~0;
 
-	result = ac_sqtt_add_pso_correlation(&device->thread_trace, pipeline->pipeline_hash);
-	if (!result)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
+   result = ac_sqtt_add_pso_correlation(&device->thread_trace, pipeline->pipeline_hash);
+   if (!result)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
 
-	/* Find the lowest shader BO VA. */
-	for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-		struct radv_shader_variant *shader = pipeline->shaders[i];
-		uint64_t va;
+   /* Find the lowest shader BO VA. */
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct radv_shader_variant *shader = pipeline->shaders[i];
+      uint64_t va;
 
-		if (!shader)
-			continue;
+      if (!shader)
+         continue;
 
-		va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-		base_va = MIN2(base_va, va);
-	}
+      va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+      base_va = MIN2(base_va, va);
+   }
 
-	result = ac_sqtt_add_code_object_loader_event(&device->thread_trace,
-						      pipeline->pipeline_hash,
-						      base_va);
-	if (!result)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
+   result =
+      ac_sqtt_add_code_object_loader_event(&device->thread_trace, pipeline->pipeline_hash, base_va);
+   if (!result)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
 
-	result = radv_add_code_object(device, pipeline);
-	if (result != VK_SUCCESS)
-		return result;
+   result = radv_add_code_object(device, pipeline);
+   if (result != VK_SUCCESS)
+      return result;
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
 static void
-radv_unregister_pipeline(struct radv_device *device,
-			 struct radv_pipeline *pipeline)
-{
-	struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
-	struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation;
-	struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events;
-	struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
-
-	/* Destroy the PSO correlation record. */
-	simple_mtx_lock(&pso_correlation->lock);
-	list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
-				 &pso_correlation->record, list) {
-		if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
-			pso_correlation->record_count--;
-			list_del(&record->list);
-			free(record);
-			break;
-		}
-	}
-	simple_mtx_unlock(&pso_correlation->lock);
-
-	/* Destroy the code object loader record. */
-	simple_mtx_lock(&loader_events->lock);
-	list_for_each_entry_safe(struct rgp_loader_events_record, record,
-				 &loader_events->record, list) {
-		if (record->code_object_hash[0] == pipeline->pipeline_hash) {
-			loader_events->record_count--;
-			list_del(&record->list);
-			free(record);
-			break;
-		}
-	}
-	simple_mtx_unlock(&loader_events->lock);
-
-	/* Destroy the code object record. */
-	simple_mtx_lock(&code_object->lock);
-	list_for_each_entry_safe(struct rgp_code_object_record, record,
-				 &code_object->record, list) {
-		if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
-			uint32_t mask = record->shader_stages_mask;
-			int i;
-
-			/* Free the disassembly. */
-			while (mask) {
-				i = u_bit_scan(&mask);
-				free(record->shader_data[i].code);
-			}
-
-			code_object->record_count--;
-			list_del(&record->list);
-			free(record);
-			break;
-		}
-	}
-	simple_mtx_unlock(&code_object->lock);
-}
-
-VkResult sqtt_CreateGraphicsPipelines(
-	VkDevice                                    _device,
-	VkPipelineCache                             pipelineCache,
-	uint32_t                                    count,
-	const VkGraphicsPipelineCreateInfo*         pCreateInfos,
-	const VkAllocationCallbacks*                pAllocator,
-	VkPipeline*                                 pPipelines)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	VkResult result;
-
-	result = radv_CreateGraphicsPipelines(_device, pipelineCache, count,
-					      pCreateInfos, pAllocator,
-					      pPipelines);
-	if (result != VK_SUCCESS)
-		return result;
-
-	if (radv_sqtt_dump_pipeline()) {
-		for (unsigned i = 0; i < count; i++) {
-			RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
-
-			if (!pipeline)
-				continue;
-
-			result = radv_register_pipeline(device, pipeline);
-			if (result != VK_SUCCESS)
-				goto fail;
-		}
-	}
-
-	return VK_SUCCESS;
+radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
+{
+   struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
+   struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation;
+   struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events;
+   struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
+
+   /* Destroy the PSO correlation record. */
+   simple_mtx_lock(&pso_correlation->lock);
+   list_for_each_entry_safe(struct rgp_pso_correlation_record, record, &pso_correlation->record,
+                            list)
+   {
+      if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
+         pso_correlation->record_count--;
+         list_del(&record->list);
+         free(record);
+         break;
+      }
+   }
+   simple_mtx_unlock(&pso_correlation->lock);
+
+   /* Destroy the code object loader record. */
+   simple_mtx_lock(&loader_events->lock);
+   list_for_each_entry_safe(struct rgp_loader_events_record, record, &loader_events->record, list)
+   {
+      if (record->code_object_hash[0] == pipeline->pipeline_hash) {
+         loader_events->record_count--;
+         list_del(&record->list);
+         free(record);
+         break;
+      }
+   }
+   simple_mtx_unlock(&loader_events->lock);
+
+   /* Destroy the code object record. */
+   simple_mtx_lock(&code_object->lock);
+   list_for_each_entry_safe(struct rgp_code_object_record, record, &code_object->record, list)
+   {
+      if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
+         uint32_t mask = record->shader_stages_mask;
+         int i;
+
+         /* Free the disassembly. */
+         while (mask) {
+            i = u_bit_scan(&mask);
+            free(record->shader_data[i].code);
+         }
+
+         code_object->record_count--;
+         list_del(&record->list);
+         free(record);
+         break;
+      }
+   }
+   simple_mtx_unlock(&code_object->lock);
+}
+
+VkResult
+sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
+                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
+                             const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   VkResult result;
+
+   result = radv_CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
+                                         pPipelines);
+   if (result != VK_SUCCESS)
+      return result;
+
+   if (radv_sqtt_dump_pipeline()) {
+      for (unsigned i = 0; i < count; i++) {
+         RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
+
+         if (!pipeline)
+            continue;
+
+         result = radv_register_pipeline(device, pipeline);
+         if (result != VK_SUCCESS)
+            goto fail;
+      }
+   }
+
+   return VK_SUCCESS;
 
 fail:
-	for (unsigned i = 0; i < count; i++) {
-		sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
-		pPipelines[i] = VK_NULL_HANDLE;
-	}
-	return result;
+   for (unsigned i = 0; i < count; i++) {
+      sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
+      pPipelines[i] = VK_NULL_HANDLE;
+   }
+   return result;
 }
 
-VkResult sqtt_CreateComputePipelines(
-	VkDevice                                    _device,
-	VkPipelineCache                             pipelineCache,
-	uint32_t                                    count,
-	const VkComputePipelineCreateInfo*          pCreateInfos,
-	const VkAllocationCallbacks*                pAllocator,
-	VkPipeline*                                 pPipelines)
+VkResult
+sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
+                            const VkComputePipelineCreateInfo *pCreateInfos,
+                            const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	VkResult result;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   VkResult result;
 
-	result = radv_CreateComputePipelines(_device, pipelineCache, count,
-					     pCreateInfos, pAllocator,
-					     pPipelines);
-	if (result != VK_SUCCESS)
-		return result;
+   result = radv_CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
+                                        pPipelines);
+   if (result != VK_SUCCESS)
+      return result;
 
-	if (radv_sqtt_dump_pipeline()) {
-		for (unsigned i = 0; i < count; i++) {
-			RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
+   if (radv_sqtt_dump_pipeline()) {
+      for (unsigned i = 0; i < count; i++) {
+         RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
 
-			if (!pipeline)
-				continue;
+         if (!pipeline)
+            continue;
 
-			result = radv_register_pipeline(device, pipeline);
-			if (result != VK_SUCCESS)
-				goto fail;
-		}
-	}
+         result = radv_register_pipeline(device, pipeline);
+         if (result != VK_SUCCESS)
+            goto fail;
+      }
+   }
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 
 fail:
-	for (unsigned i = 0; i < count; i++) {
-		sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
-		pPipelines[i] = VK_NULL_HANDLE;
-	}
-	return result;
+   for (unsigned i = 0; i < count; i++) {
+      sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
+      pPipelines[i] = VK_NULL_HANDLE;
+   }
+   return result;
 }
 
-void sqtt_DestroyPipeline(
-	VkDevice                                    _device,
-	VkPipeline                                  _pipeline,
-	const VkAllocationCallbacks*                pAllocator)
+void
+sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline,
+                     const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
 
-	if (!_pipeline)
-		return;
+   if (!_pipeline)
+      return;
 
-       if (radv_sqtt_dump_pipeline())
-		radv_unregister_pipeline(device, pipeline);
+   if (radv_sqtt_dump_pipeline())
+      radv_unregister_pipeline(device, pipeline);
 
-	radv_DestroyPipeline(_device, _pipeline, pAllocator);
+   radv_DestroyPipeline(_device, _pipeline, pAllocator);
 }
 
 #undef API_MARKER
diff --git a/src/amd/vulkan/radv_android.c b/src/amd/vulkan/radv_android.c
index 76ab39ca480..f8064c011d9 100644
--- a/src/amd/vulkan/radv_android.c
+++ b/src/amd/vulkan/radv_android.c
@@ -22,12 +22,12 @@
  */
 
 #ifdef ANDROID
+#include <libsync.h>
 #include <hardware/gralloc.h>
 #include <hardware/hardware.h>
 #include <hardware/hwvulkan.h>
 #include <vulkan/vk_android_native_buffer.h>
 #include <vulkan/vk_icd.h>
-#include <libsync.h>
 
 #if ANDROID_API_LEVEL >= 26
 #include <hardware/gralloc1.h>
@@ -42,433 +42,421 @@
 
 #ifdef ANDROID
 
-static int radv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
+static int radv_hal_open(const struct hw_module_t *mod, const char *id, struct hw_device_t **dev);
 static int radv_hal_close(struct hw_device_t *dev);
 
 static void UNUSED
 static_asserts(void)
 {
-	STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
+   STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
 }
 
 PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = {
-	.common = {
-		.tag = HARDWARE_MODULE_TAG,
-		.module_api_version = HWVULKAN_MODULE_API_VERSION_0_1,
-		.hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0),
-		.id = HWVULKAN_HARDWARE_MODULE_ID,
-		.name = "AMD Vulkan HAL",
-		.author = "Google",
-		.methods = &(hw_module_methods_t) {
-			.open = radv_hal_open,
-		},
-	},
+   .common =
+      {
+         .tag = HARDWARE_MODULE_TAG,
+         .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1,
+         .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0),
+         .id = HWVULKAN_HARDWARE_MODULE_ID,
+         .name = "AMD Vulkan HAL",
+         .author = "Google",
+         .methods =
+            &(hw_module_methods_t){
+               .open = radv_hal_open,
+            },
+      },
 };
 
 /* If any bits in test_mask are set, then unset them and return true. */
 static inline bool
 unmask32(uint32_t *inout_mask, uint32_t test_mask)
 {
-	uint32_t orig_mask = *inout_mask;
-	*inout_mask &= ~test_mask;
-	return *inout_mask != orig_mask;
+   uint32_t orig_mask = *inout_mask;
+   *inout_mask &= ~test_mask;
+   return *inout_mask != orig_mask;
 }
 
 static int
-radv_hal_open(const struct hw_module_t* mod, const char* id,
-             struct hw_device_t** dev)
+radv_hal_open(const struct hw_module_t *mod, const char *id, struct hw_device_t **dev)
 {
-	assert(mod == &HAL_MODULE_INFO_SYM.common);
-	assert(strcmp(id, HWVULKAN_DEVICE_0) == 0);
-
-	hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev));
-	if (!hal_dev)
-		return -1;
-
-	*hal_dev = (hwvulkan_device_t) {
-		.common = {
-			.tag = HARDWARE_DEVICE_TAG,
-			.version = HWVULKAN_DEVICE_API_VERSION_0_1,
-			.module = &HAL_MODULE_INFO_SYM.common,
-			.close = radv_hal_close,
-		},
-		.EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties,
-		.CreateInstance = radv_CreateInstance,
-		.GetInstanceProcAddr = radv_GetInstanceProcAddr,
-	};
-
-	*dev = &hal_dev->common;
-	return 0;
+   assert(mod == &HAL_MODULE_INFO_SYM.common);
+   assert(strcmp(id, HWVULKAN_DEVICE_0) == 0);
+
+   hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev));
+   if (!hal_dev)
+      return -1;
+
+   *hal_dev = (hwvulkan_device_t){
+      .common =
+         {
+            .tag = HARDWARE_DEVICE_TAG,
+            .version = HWVULKAN_DEVICE_API_VERSION_0_1,
+            .module = &HAL_MODULE_INFO_SYM.common,
+            .close = radv_hal_close,
+         },
+      .EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties,
+      .CreateInstance = radv_CreateInstance,
+      .GetInstanceProcAddr = radv_GetInstanceProcAddr,
+   };
+
+   *dev = &hal_dev->common;
+   return 0;
 }
 
 static int
 radv_hal_close(struct hw_device_t *dev)
 {
-	/* hwvulkan.h claims that hw_device_t::close() is never called. */
-	return -1;
+   /* hwvulkan.h claims that hw_device_t::close() is never called. */
+   return -1;
 }
 
 VkResult
-radv_image_from_gralloc(VkDevice device_h,
-                       const VkImageCreateInfo *base_info,
-                       const VkNativeBufferANDROID *gralloc_info,
-                       const VkAllocationCallbacks *alloc,
-                       VkImage *out_image_h)
+radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
+                        const VkNativeBufferANDROID *gralloc_info,
+                        const VkAllocationCallbacks *alloc, VkImage *out_image_h)
 
 {
-	RADV_FROM_HANDLE(radv_device, device, device_h);
-	VkImage image_h = VK_NULL_HANDLE;
-	struct radv_image *image = NULL;
-	VkResult result;
-
-	if (gralloc_info->handle->numFds != 1) {
-		return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE,
-		                 "VkNativeBufferANDROID::handle::numFds is %d, "
-		                 "expected 1", gralloc_info->handle->numFds);
-	}
-
-	/* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
-	 * must exceed that of the gralloc handle, and we do not own the gralloc
-	 * handle.
-	 */
-	int dma_buf = gralloc_info->handle->data[0];
-
-	VkDeviceMemory memory_h;
-
-	const VkImportMemoryFdInfoKHR import_info = {
-		.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
-		.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
-		.fd = os_dupfd_cloexec(dma_buf),
-	};
-
-	/* Find the first VRAM memory type, or GART for PRIME images. */
-	int memory_type_index = -1;
-	for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
-		bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
-		if (is_local) {
-			memory_type_index = i;
-			break;
-		}
-	}
-
-	/* fallback */
-	if (memory_type_index == -1)
-		memory_type_index = 0;
-
-	result = radv_AllocateMemory(device_h,
-				     &(VkMemoryAllocateInfo) {
-					     .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
-					     .pNext = &import_info,
-					     /* Max buffer size, unused for imports */
-					     .allocationSize = 0x7FFFFFFF,
-					     .memoryTypeIndex = memory_type_index,
-				     },
-				     alloc,
-				     &memory_h);
-	if (result != VK_SUCCESS)
-		return result;
-
-	struct radeon_bo_metadata md;
-	device->ws->buffer_get_metadata(device->ws, radv_device_memory_from_handle(memory_h)->bo, &md);
-
-	VkImageCreateInfo updated_base_info = *base_info;
-
-	VkExternalMemoryImageCreateInfo external_memory_info = {
-		.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
-		.pNext = updated_base_info.pNext,
-		.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
-	};
-
-	updated_base_info.pNext = &external_memory_info;
-
-	result = radv_image_create(device_h,
-	                           &(struct radv_image_create_info) {
-	                               .vk_info = &updated_base_info,
-	                               .no_metadata_planes = true,
-	                               .bo_metadata = &md,
-	                           },
-	                           alloc,
-	                           &image_h);
-
-	if (result != VK_SUCCESS)
-		goto fail_create_image;
-
-	image = radv_image_from_handle(image_h);
-
-	radv_image_override_offset_stride(device, image, 0, gralloc_info->stride);
-
-	radv_BindImageMemory(device_h, image_h, memory_h, 0);
-
-	image->owned_memory = memory_h;
-	/* Don't clobber the out-parameter until success is certain. */
-	*out_image_h = image_h;
-
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, device_h);
+   VkImage image_h = VK_NULL_HANDLE;
+   struct radv_image *image = NULL;
+   VkResult result;
+
+   if (gralloc_info->handle->numFds != 1) {
+      return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                       "VkNativeBufferANDROID::handle::numFds is %d, "
+                       "expected 1",
+                       gralloc_info->handle->numFds);
+   }
+
+   /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
+    * must exceed that of the gralloc handle, and we do not own the gralloc
+    * handle.
+    */
+   int dma_buf = gralloc_info->handle->data[0];
+
+   VkDeviceMemory memory_h;
+
+   const VkImportMemoryFdInfoKHR import_info = {
+      .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
+      .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+      .fd = os_dupfd_cloexec(dma_buf),
+   };
+
+   /* Find the first VRAM memory type, or GART for PRIME images. */
+   int memory_type_index = -1;
+   for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
+      bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags &
+                         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+      if (is_local) {
+         memory_type_index = i;
+         break;
+      }
+   }
+
+   /* fallback */
+   if (memory_type_index == -1)
+      memory_type_index = 0;
+
+   result = radv_AllocateMemory(device_h,
+                                &(VkMemoryAllocateInfo){
+                                   .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+                                   .pNext = &import_info,
+                                   /* Max buffer size, unused for imports */
+                                   .allocationSize = 0x7FFFFFFF,
+                                   .memoryTypeIndex = memory_type_index,
+                                },
+                                alloc, &memory_h);
+   if (result != VK_SUCCESS)
+      return result;
+
+   struct radeon_bo_metadata md;
+   device->ws->buffer_get_metadata(device->ws, radv_device_memory_from_handle(memory_h)->bo, &md);
+
+   VkImageCreateInfo updated_base_info = *base_info;
+
+   VkExternalMemoryImageCreateInfo external_memory_info = {
+      .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+      .pNext = updated_base_info.pNext,
+      .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+   };
+
+   updated_base_info.pNext = &external_memory_info;
+
+   result = radv_image_create(device_h,
+                              &(struct radv_image_create_info){
+                                 .vk_info = &updated_base_info,
+                                 .no_metadata_planes = true,
+                                 .bo_metadata = &md,
+                              },
+                              alloc, &image_h);
+
+   if (result != VK_SUCCESS)
+      goto fail_create_image;
+
+   image = radv_image_from_handle(image_h);
+
+   radv_image_override_offset_stride(device, image, 0, gralloc_info->stride);
+
+   radv_BindImageMemory(device_h, image_h, memory_h, 0);
+
+   image->owned_memory = memory_h;
+   /* Don't clobber the out-parameter until success is certain. */
+   *out_image_h = image_h;
+
+   return VK_SUCCESS;
 
 fail_create_image:
-	radv_FreeMemory(device_h, memory_h, alloc);
-	return result;
+   radv_FreeMemory(device_h, memory_h, alloc);
+   return result;
 }
 
-VkResult radv_GetSwapchainGrallocUsageANDROID(
-    VkDevice            device_h,
-    VkFormat            format,
-    VkImageUsageFlags   imageUsage,
-    int*                grallocUsage)
+VkResult
+radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format,
+                                     VkImageUsageFlags imageUsage, int *grallocUsage)
 {
-	RADV_FROM_HANDLE(radv_device, device, device_h);
-	struct radv_physical_device *phys_dev = device->physical_device;
-	VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev);
-	VkResult result;
-
-	*grallocUsage = 0;
-
-	/* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
-	 * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
-	 * The relevant code in libvulkan/swapchain.cpp contains this fun comment:
-	 *
-	 *     TODO(jessehall): I think these are right, but haven't thought hard
-	 *     about it. Do we need to query the driver for support of any of
-	 *     these?
-	 *
-	 * Any disagreement between this function and the hardcoded
-	 * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests
-	 * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
-	 */
-
-	const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
-		.format = format,
-		.type = VK_IMAGE_TYPE_2D,
-		.tiling = VK_IMAGE_TILING_OPTIMAL,
-		.usage = imageUsage,
-	};
-
-	VkImageFormatProperties2 image_format_props = {
-		.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
-	};
-
-	/* Check that requested format and usage are supported. */
-	result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h,
-	                                                      &image_format_info, &image_format_props);
-	if (result != VK_SUCCESS) {
-		return vk_errorf(device->instance, result,
-		                 "radv_GetPhysicalDeviceImageFormatProperties2 failed "
-		                 "inside %s", __func__);
-	}
-
-	if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
-	                          VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
-		*grallocUsage |= GRALLOC_USAGE_HW_RENDER;
-
-	if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
-	                          VK_IMAGE_USAGE_SAMPLED_BIT |
-	                          VK_IMAGE_USAGE_STORAGE_BIT |
-	                          VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
-		*grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
-
-	/* All VkImageUsageFlags not explicitly checked here are unsupported for
-	 * gralloc swapchains.
-	 */
-	if (imageUsage != 0) {
-	return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
-	                "unsupported VkImageUsageFlags(0x%x) for gralloc "
-	                "swapchain", imageUsage);
-	}
-
-	/*
-	* FINISHME: Advertise all display-supported formats. Mostly
-	* DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check
-	* what we need for 30-bit colors.
-	*/
-	if (format == VK_FORMAT_B8G8R8A8_UNORM ||
-	    format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
-		*grallocUsage |= GRALLOC_USAGE_HW_FB |
-		                 GRALLOC_USAGE_HW_COMPOSER |
-		                 GRALLOC_USAGE_EXTERNAL_DISP;
-	}
-
-	if (*grallocUsage == 0)
-		return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, device_h);
+   struct radv_physical_device *phys_dev = device->physical_device;
+   VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev);
+   VkResult result;
+
+   *grallocUsage = 0;
+
+   /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
+    * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
+    * The relevant code in libvulkan/swapchain.cpp contains this fun comment:
+    *
+    *     TODO(jessehall): I think these are right, but haven't thought hard
+    *     about it. Do we need to query the driver for support of any of
+    *     these?
+    *
+    * Any disagreement between this function and the hardcoded
+    * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests
+    * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
+    */
+
+   const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+      .format = format,
+      .type = VK_IMAGE_TYPE_2D,
+      .tiling = VK_IMAGE_TILING_OPTIMAL,
+      .usage = imageUsage,
+   };
+
+   VkImageFormatProperties2 image_format_props = {
+      .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
+   };
+
+   /* Check that requested format and usage are supported. */
+   result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info,
+                                                         &image_format_props);
+   if (result != VK_SUCCESS) {
+      return vk_errorf(device->instance, result,
+                       "radv_GetPhysicalDeviceImageFormatProperties2 failed "
+                       "inside %s",
+                       __func__);
+   }
+
+   if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
+      *grallocUsage |= GRALLOC_USAGE_HW_RENDER;
+
+   if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
+                                VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
+      *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
+
+   /* All VkImageUsageFlags not explicitly checked here are unsupported for
+    * gralloc swapchains.
+    */
+   if (imageUsage != 0) {
+      return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
+                       "unsupported VkImageUsageFlags(0x%x) for gralloc "
+                       "swapchain",
+                       imageUsage);
+   }
+
+   /*
+    * FINISHME: Advertise all display-supported formats. Mostly
+    * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check
+    * what we need for 30-bit colors.
+    */
+   if (format == VK_FORMAT_B8G8R8A8_UNORM || format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
+      *grallocUsage |=
+         GRALLOC_USAGE_HW_FB | GRALLOC_USAGE_HW_COMPOSER | GRALLOC_USAGE_EXTERNAL_DISP;
+   }
+
+   if (*grallocUsage == 0)
+      return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   return VK_SUCCESS;
 }
 
-VkResult radv_GetSwapchainGrallocUsage2ANDROID(
-    VkDevice            device_h,
-    VkFormat            format,
-    VkImageUsageFlags   imageUsage,
-    VkSwapchainImageUsageFlagsANDROID swapchainImageUsage,
-    uint64_t*           grallocConsumerUsage,
-    uint64_t*           grallocProducerUsage)
+VkResult
+radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format,
+                                      VkImageUsageFlags imageUsage,
+                                      VkSwapchainImageUsageFlagsANDROID swapchainImageUsage,
+                                      uint64_t *grallocConsumerUsage,
+                                      uint64_t *grallocProducerUsage)
 {
-	/* Before level 26 (Android 8.0/Oreo) the loader uses
-	 * vkGetSwapchainGrallocUsageANDROID. */
+   /* Before level 26 (Android 8.0/Oreo) the loader uses
+    * vkGetSwapchainGrallocUsageANDROID. */
 #if ANDROID_API_LEVEL >= 26
-	RADV_FROM_HANDLE(radv_device, device, device_h);
-	struct radv_physical_device *phys_dev = device->physical_device;
-	VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev);
-	VkResult result;
-
-	*grallocConsumerUsage = 0;
-	*grallocProducerUsage = 0;
-
-	if (swapchainImageUsage & VK_SWAPCHAIN_IMAGE_USAGE_SHARED_BIT_ANDROID)
-		return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
-		                 "The Vulkan loader tried to query shared presentable image support");
-
-	const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
-		.format = format,
-		.type = VK_IMAGE_TYPE_2D,
-		.tiling = VK_IMAGE_TILING_OPTIMAL,
-		.usage = imageUsage,
-	};
-
-	VkImageFormatProperties2 image_format_props = {
-		.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
-	};
-
-	/* Check that requested format and usage are supported. */
-	result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h,
-	                                                      &image_format_info, &image_format_props);
-	if (result != VK_SUCCESS) {
-		return vk_errorf(device->instance, result,
-		                 "radv_GetPhysicalDeviceImageFormatProperties2 failed "
-		                 "inside %s", __func__);
-	}
-
-	if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
-	                          VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) {
-		*grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
-		*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET;
-	}
-
-	if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
-	                          VK_IMAGE_USAGE_SAMPLED_BIT |
-	                          VK_IMAGE_USAGE_STORAGE_BIT |
-	                          VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
-		*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE;
-	}
-
-	if (imageUsage != 0) {
-		return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
-		                "unsupported VkImageUsageFlags(0x%x) for gralloc "
-		                "swapchain", imageUsage);
-	}
-
-	/*
-	* FINISHME: Advertise all display-supported formats. Mostly
-	* DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check
-	* what we need for 30-bit colors.
-	*/
-	if (format == VK_FORMAT_B8G8R8A8_UNORM ||
-	    format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
-		*grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
-		*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_HWCOMPOSER;
-	}
-
-	if (!*grallocProducerUsage && !*grallocConsumerUsage)
-		return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, device_h);
+   struct radv_physical_device *phys_dev = device->physical_device;
+   VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev);
+   VkResult result;
+
+   *grallocConsumerUsage = 0;
+   *grallocProducerUsage = 0;
+
+   if (swapchainImageUsage & VK_SWAPCHAIN_IMAGE_USAGE_SHARED_BIT_ANDROID)
+      return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
+                       "The Vulkan loader tried to query shared presentable image support");
+
+   const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+      .format = format,
+      .type = VK_IMAGE_TYPE_2D,
+      .tiling = VK_IMAGE_TILING_OPTIMAL,
+      .usage = imageUsage,
+   };
+
+   VkImageFormatProperties2 image_format_props = {
+      .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
+   };
+
+   /* Check that requested format and usage are supported. */
+   result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info,
+                                                         &image_format_props);
+   if (result != VK_SUCCESS) {
+      return vk_errorf(device->instance, result,
+                       "radv_GetPhysicalDeviceImageFormatProperties2 failed "
+                       "inside %s",
+                       __func__);
+   }
+
+   if (unmask32(&imageUsage,
+                VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) {
+      *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
+      *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET;
+   }
+
+   if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
+                                VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
+      *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE;
+   }
+
+   if (imageUsage != 0) {
+      return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
+                       "unsupported VkImageUsageFlags(0x%x) for gralloc "
+                       "swapchain",
+                       imageUsage);
+   }
+
+   /*
+    * FINISHME: Advertise all display-supported formats. Mostly
+    * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check
+    * what we need for 30-bit colors.
+    */
+   if (format == VK_FORMAT_B8G8R8A8_UNORM || format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
+      *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
+      *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_HWCOMPOSER;
+   }
+
+   if (!*grallocProducerUsage && !*grallocConsumerUsage)
+      return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   return VK_SUCCESS;
 #else
-	*grallocConsumerUsage = 0;
-	*grallocProducerUsage = 0;
-	return VK_ERROR_FORMAT_NOT_SUPPORTED;
+   *grallocConsumerUsage = 0;
+   *grallocProducerUsage = 0;
+   return VK_ERROR_FORMAT_NOT_SUPPORTED;
 #endif
 }
 
 VkResult
-radv_AcquireImageANDROID(
-      VkDevice            device,
-      VkImage             image_h,
-      int                 nativeFenceFd,
-      VkSemaphore         semaphore,
-      VkFence             fence)
+radv_AcquireImageANDROID(VkDevice device, VkImage image_h, int nativeFenceFd, VkSemaphore semaphore,
+                         VkFence fence)
 {
-	VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS;
-
-	if (semaphore != VK_NULL_HANDLE) {
-		int semaphore_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd;
-		semaphore_result = radv_ImportSemaphoreFdKHR(device,
-		                                             &(VkImportSemaphoreFdInfoKHR) {
-		                                                 .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
-		                                                 .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
-		                                                 .fd = semaphore_fd,
-		                                                 .semaphore = semaphore,
-		                                                 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
-		                                            });
-	}
-
-	if (fence != VK_NULL_HANDLE) {
-		int fence_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd;
-		fence_result = radv_ImportFenceFdKHR(device,
-		                                     &(VkImportFenceFdInfoKHR) {
-		                                         .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR,
-		                                         .flags = VK_FENCE_IMPORT_TEMPORARY_BIT,
-		                                         .fd = fence_fd,
-		                                         .fence = fence,
-		                                         .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT,
-		                                     });
-	}
-
-	close(nativeFenceFd);
-
-	if (semaphore_result != VK_SUCCESS)
-		return semaphore_result;
-	return fence_result;
+   VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS;
+
+   if (semaphore != VK_NULL_HANDLE) {
+      int semaphore_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd;
+      semaphore_result = radv_ImportSemaphoreFdKHR(
+         device, &(VkImportSemaphoreFdInfoKHR){
+                    .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
+                    .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
+                    .fd = semaphore_fd,
+                    .semaphore = semaphore,
+                    .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
+                 });
+   }
+
+   if (fence != VK_NULL_HANDLE) {
+      int fence_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd;
+      fence_result =
+         radv_ImportFenceFdKHR(device, &(VkImportFenceFdInfoKHR){
+                                          .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR,
+                                          .flags = VK_FENCE_IMPORT_TEMPORARY_BIT,
+                                          .fd = fence_fd,
+                                          .fence = fence,
+                                          .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT,
+                                       });
+   }
+
+   close(nativeFenceFd);
+
+   if (semaphore_result != VK_SUCCESS)
+      return semaphore_result;
+   return fence_result;
 }
 
 VkResult
-radv_QueueSignalReleaseImageANDROID(
-      VkQueue             _queue,
-      uint32_t            waitSemaphoreCount,
-      const VkSemaphore*  pWaitSemaphores,
-      VkImage             image,
-      int*                pNativeFenceFd)
+radv_QueueSignalReleaseImageANDROID(VkQueue _queue, uint32_t waitSemaphoreCount,
+                                    const VkSemaphore *pWaitSemaphores, VkImage image,
+                                    int *pNativeFenceFd)
 {
-	RADV_FROM_HANDLE(radv_queue, queue, _queue);
-	VkResult result = VK_SUCCESS;
-
-	if (waitSemaphoreCount == 0) {
-		if (pNativeFenceFd)
-			*pNativeFenceFd = -1;
-		return VK_SUCCESS;
-	}
-
-	int fd = -1;
-
-	for (uint32_t i = 0; i < waitSemaphoreCount; ++i) {
-		int tmp_fd;
-		result = radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device),
-		                                &(VkSemaphoreGetFdInfoKHR) {
-		                                    .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
-		                                    .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
-		                                    .semaphore = pWaitSemaphores[i],
-		                            }, &tmp_fd);
-		if (result != VK_SUCCESS) {
-			if (fd >= 0)
-				close (fd);
-			return result;
-		}
-
-		if (fd < 0)
-			fd = tmp_fd;
-		else if (tmp_fd >= 0) {
-			sync_accumulate("radv", &fd, tmp_fd);
-			close(tmp_fd);
-		}
-	}
-
-	if (pNativeFenceFd) {
-		*pNativeFenceFd = fd;
-	} else if (fd >= 0) {
-		close(fd);
-		/* We still need to do the exports, to reset the semaphores, but
-		 * otherwise we don't wait on them. */
-	}
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_queue, queue, _queue);
+   VkResult result = VK_SUCCESS;
+
+   if (waitSemaphoreCount == 0) {
+      if (pNativeFenceFd)
+         *pNativeFenceFd = -1;
+      return VK_SUCCESS;
+   }
+
+   int fd = -1;
+
+   for (uint32_t i = 0; i < waitSemaphoreCount; ++i) {
+      int tmp_fd;
+      result =
+         radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device),
+                                &(VkSemaphoreGetFdInfoKHR){
+                                   .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
+                                   .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
+                                   .semaphore = pWaitSemaphores[i],
+                                },
+                                &tmp_fd);
+      if (result != VK_SUCCESS) {
+         if (fd >= 0)
+            close(fd);
+         return result;
+      }
+
+      if (fd < 0)
+         fd = tmp_fd;
+      else if (tmp_fd >= 0) {
+         sync_accumulate("radv", &fd, tmp_fd);
+         close(tmp_fd);
+      }
+   }
+
+   if (pNativeFenceFd) {
+      *pNativeFenceFd = fd;
+   } else if (fd >= 0) {
+      close(fd);
+      /* We still need to do the exports, to reset the semaphores, but
+       * otherwise we don't wait on them. */
+   }
+   return VK_SUCCESS;
 }
 #endif
 
@@ -482,29 +470,29 @@ enum {
 static inline VkFormat
 vk_format_from_android(unsigned android_format, unsigned android_usage)
 {
-	switch (android_format) {
-	case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
-	case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
-		return VK_FORMAT_R8G8B8A8_UNORM;
-	case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
-		return VK_FORMAT_R8G8B8_UNORM;
-	case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
-		return VK_FORMAT_R5G6B5_UNORM_PACK16;
-	case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
-		return VK_FORMAT_R16G16B16A16_SFLOAT;
-	case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
-		return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
-	case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
-		return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
-	case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED:
-		if (android_usage & AHARDWAREBUFFER_USAGE_CAMERA_MASK)
-			return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
-		else
-			return VK_FORMAT_R8G8B8_UNORM;
-	case AHARDWAREBUFFER_FORMAT_BLOB:
-	default:
-		return VK_FORMAT_UNDEFINED;
-	}
+   switch (android_format) {
+   case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
+   case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
+      return VK_FORMAT_R8G8B8A8_UNORM;
+   case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
+      return VK_FORMAT_R8G8B8_UNORM;
+   case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
+      return VK_FORMAT_R5G6B5_UNORM_PACK16;
+   case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
+      return VK_FORMAT_R16G16B16A16_SFLOAT;
+   case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
+      return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+   case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
+      return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+   case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED:
+      if (android_usage & AHARDWAREBUFFER_USAGE_CAMERA_MASK)
+         return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+      else
+         return VK_FORMAT_R8G8B8_UNORM;
+   case AHARDWAREBUFFER_FORMAT_BLOB:
+   default:
+      return VK_FORMAT_UNDEFINED;
+   }
 }
 
 static inline unsigned
@@ -529,8 +517,7 @@ android_format_from_vk(unsigned vk_format)
 }
 
 uint64_t
-radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
-                             const VkImageUsageFlags vk_usage)
+radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create, const VkImageUsageFlags vk_usage)
 {
    uint64_t ahb_usage = 0;
    if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT)
@@ -555,145 +542,137 @@ radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
 }
 
 static VkResult
-get_ahb_buffer_format_properties(
-   VkDevice device_h,
-   const struct AHardwareBuffer *buffer,
-   VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties)
+get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer *buffer,
+                                 VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties)
 {
-	RADV_FROM_HANDLE(radv_device, device, device_h);
-
-	/* Get a description of buffer contents . */
-	AHardwareBuffer_Desc desc;
-	AHardwareBuffer_describe(buffer, &desc);
-
-	/* Verify description. */
-	const uint64_t gpu_usage =
-		AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
-		AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
-		AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
-
-	/* "Buffer must be a valid Android hardware buffer object with at least
-	 * one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags."
-	 */
-	if (!(desc.usage & (gpu_usage)))
-		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-
-	/* Fill properties fields based on description. */
-	VkAndroidHardwareBufferFormatPropertiesANDROID *p = pProperties;
-
-	p->format = vk_format_from_android(desc.format, desc.usage);
-	p->externalFormat = (uint64_t) (uintptr_t) p->format;
-
-	VkFormatProperties format_properties;
-	radv_GetPhysicalDeviceFormatProperties(
-		radv_physical_device_to_handle(device->physical_device),
-		p->format, &format_properties);
-
-	if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
-		p->formatFeatures = format_properties.linearTilingFeatures;
-	else
-		p->formatFeatures = format_properties.optimalTilingFeatures;
-
-	/* "Images can be created with an external format even if the Android hardware
-	 *  buffer has a format which has an equivalent Vulkan format to enable
-	 *  consistent handling of images from sources that might use either category
-	 *  of format. However, all images created with an external format are subject
-	 *  to the valid usage requirements associated with external formats, even if
-	 *  the Android hardware buffer’s format has a Vulkan equivalent."
-	 *
-	 * "The formatFeatures member *must* include
-	 *  VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of
-	 *  VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or
-	 *  VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT"
-	 */
-	assert(p->formatFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
-
-	p->formatFeatures |= VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
-
-	/* "Implementations may not always be able to determine the color model,
-	 *  numerical range, or chroma offsets of the image contents, so the values
-	 *  in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions.
-	 *  Applications should treat these values as sensible defaults to use in
-	 *  the absence of more reliable information obtained through some other
-	 *  means."
-	 */
-	p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY;
-	p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY;
-	p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY;
-	p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY;
-
-	p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
-	p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
-
-	p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
-	p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
-
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, device_h);
+
+   /* Get a description of buffer contents . */
+   AHardwareBuffer_Desc desc;
+   AHardwareBuffer_describe(buffer, &desc);
+
+   /* Verify description. */
+   const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
+                              AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
+                              AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
+
+   /* "Buffer must be a valid Android hardware buffer object with at least
+    * one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags."
+    */
+   if (!(desc.usage & (gpu_usage)))
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+   /* Fill properties fields based on description. */
+   VkAndroidHardwareBufferFormatPropertiesANDROID *p = pProperties;
+
+   p->format = vk_format_from_android(desc.format, desc.usage);
+   p->externalFormat = (uint64_t)(uintptr_t)p->format;
+
+   VkFormatProperties format_properties;
+   radv_GetPhysicalDeviceFormatProperties(radv_physical_device_to_handle(device->physical_device),
+                                          p->format, &format_properties);
+
+   if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
+      p->formatFeatures = format_properties.linearTilingFeatures;
+   else
+      p->formatFeatures = format_properties.optimalTilingFeatures;
+
+   /* "Images can be created with an external format even if the Android hardware
+    *  buffer has a format which has an equivalent Vulkan format to enable
+    *  consistent handling of images from sources that might use either category
+    *  of format. However, all images created with an external format are subject
+    *  to the valid usage requirements associated with external formats, even if
+    *  the Android hardware buffer’s format has a Vulkan equivalent."
+    *
+    * "The formatFeatures member *must* include
+    *  VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of
+    *  VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or
+    *  VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT"
+    */
+   assert(p->formatFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
+
+   p->formatFeatures |= VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
+
+   /* "Implementations may not always be able to determine the color model,
+    *  numerical range, or chroma offsets of the image contents, so the values
+    *  in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions.
+    *  Applications should treat these values as sensible defaults to use in
+    *  the absence of more reliable information obtained through some other
+    *  means."
+    */
+   p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+   p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+   p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+   p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+
+   p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
+   p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
+
+   p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+   p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+
+   return VK_SUCCESS;
 }
 
 VkResult
-radv_GetAndroidHardwareBufferPropertiesANDROID(
-   VkDevice device_h,
-   const struct AHardwareBuffer *buffer,
-   VkAndroidHardwareBufferPropertiesANDROID *pProperties)
+radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h,
+                                               const struct AHardwareBuffer *buffer,
+                                               VkAndroidHardwareBufferPropertiesANDROID *pProperties)
 {
-	RADV_FROM_HANDLE(radv_device, dev, device_h);
-	struct radv_physical_device *pdevice = dev->physical_device;
-
-	VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
-		vk_find_struct(pProperties->pNext,
-			ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);
-
-	/* Fill format properties of an Android hardware buffer. */
-	if (format_prop)
-		get_ahb_buffer_format_properties(device_h, buffer, format_prop);
-
-	/* NOTE - We support buffers with only one handle but do not error on
-	 * multiple handle case. Reason is that we want to support YUV formats
-	 * where we have many logical planes but they all point to the same
-	 * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
-	 */
-	const native_handle_t *handle =
-		AHardwareBuffer_getNativeHandle(buffer);
-	int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
-	if (dma_buf < 0)
-		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-
-	/* All memory types. */
-	uint32_t memory_types = (1u << pdevice->memory_properties.memoryTypeCount) - 1;
-
-	pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END);
-	pProperties->memoryTypeBits = memory_types;
-
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, dev, device_h);
+   struct radv_physical_device *pdevice = dev->physical_device;
+
+   VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
+      vk_find_struct(pProperties->pNext, ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);
+
+   /* Fill format properties of an Android hardware buffer. */
+   if (format_prop)
+      get_ahb_buffer_format_properties(device_h, buffer, format_prop);
+
+   /* NOTE - We support buffers with only one handle but do not error on
+    * multiple handle case. Reason is that we want to support YUV formats
+    * where we have many logical planes but they all point to the same
+    * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
+    */
+   const native_handle_t *handle = AHardwareBuffer_getNativeHandle(buffer);
+   int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
+   if (dma_buf < 0)
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+   /* All memory types. */
+   uint32_t memory_types = (1u << pdevice->memory_properties.memoryTypeCount) - 1;
+
+   pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END);
+   pProperties->memoryTypeBits = memory_types;
+
+   return VK_SUCCESS;
 }
 
 VkResult
-radv_GetMemoryAndroidHardwareBufferANDROID(
-   VkDevice device_h,
-   const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
-   struct AHardwareBuffer **pBuffer)
+radv_GetMemoryAndroidHardwareBufferANDROID(VkDevice device_h,
+                                           const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
+                                           struct AHardwareBuffer **pBuffer)
 {
-	RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
-
-	/* This should always be set due to the export handle types being set on
-	 * allocation. */
-	assert(mem->android_hardware_buffer);
-
-	/* Some quotes from Vulkan spec:
-	 *
-	 * "If the device memory was created by importing an Android hardware
-	 * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same
-	 * Android hardware buffer object."
-	 *
-	 * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must
-	 * have been included in VkExportMemoryAllocateInfo::handleTypes when
-	 * memory was created."
-	 */
-	*pBuffer = mem->android_hardware_buffer;
-	/* Increase refcount. */
-	AHardwareBuffer_acquire(mem->android_hardware_buffer);
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
+
+   /* This should always be set due to the export handle types being set on
+    * allocation. */
+   assert(mem->android_hardware_buffer);
+
+   /* Some quotes from Vulkan spec:
+    *
+    * "If the device memory was created by importing an Android hardware
+    * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same
+    * Android hardware buffer object."
+    *
+    * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must
+    * have been included in VkExportMemoryAllocateInfo::handleTypes when
+    * memory was created."
+    */
+   *pBuffer = mem->android_hardware_buffer;
+   /* Increase refcount. */
+   AHardwareBuffer_acquire(mem->android_hardware_buffer);
+   return VK_SUCCESS;
 }
 
 #endif
@@ -702,172 +681,162 @@ VkFormat
 radv_select_android_external_format(const void *next, VkFormat default_format)
 {
 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
-	const VkExternalFormatANDROID *android_format =
-		vk_find_struct_const(next, EXTERNAL_FORMAT_ANDROID);
+   const VkExternalFormatANDROID *android_format =
+      vk_find_struct_const(next, EXTERNAL_FORMAT_ANDROID);
 
-	if (android_format && android_format->externalFormat) {
-		return (VkFormat)android_format->externalFormat;
-	}
+   if (android_format && android_format->externalFormat) {
+      return (VkFormat)android_format->externalFormat;
+   }
 #endif
 
-	return default_format;
+   return default_format;
 }
 
-
 VkResult
-radv_import_ahb_memory(struct radv_device *device,
-                       struct radv_device_memory *mem,
-                       unsigned priority,
-                       const VkImportAndroidHardwareBufferInfoANDROID *info)
+radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
+                       unsigned priority, const VkImportAndroidHardwareBufferInfoANDROID *info)
 {
 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
-	/* Import from AHardwareBuffer to radv_device_memory. */
-	const native_handle_t *handle =
-		AHardwareBuffer_getNativeHandle(info->buffer);
-
-	/* NOTE - We support buffers with only one handle but do not error on
-	 * multiple handle case. Reason is that we want to support YUV formats
-	 * where we have many logical planes but they all point to the same
-	 * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
-	 */
-	int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
-	if (dma_buf < 0)
-		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-
-	uint64_t alloc_size = 0;
-	mem->bo = device->ws->buffer_from_fd(device->ws, dma_buf,
-	                                     priority, &alloc_size);
-	if (!mem->bo)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-	if (mem->image) {
-		struct radeon_bo_metadata metadata;
-		device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
-
-		struct radv_image_create_info create_info = {
-			.no_metadata_planes = true,
-			.bo_metadata = &metadata
-		};
-
-		VkResult result = radv_image_create_layout(device, create_info, NULL, mem->image);
-		if (result != VK_SUCCESS) {
-			device->ws->buffer_destroy(device->ws, mem->bo);
-			mem->bo = NULL;
-			return result;
-		}
-
-		if (alloc_size < mem->image->size) {
-			device->ws->buffer_destroy(device->ws, mem->bo);
-			mem->bo = NULL;
-			return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-		}
-	} else if (mem->buffer) {
-		if (alloc_size < mem->buffer->size) {
-			device->ws->buffer_destroy(device->ws, mem->bo);
-			mem->bo = NULL;
-			return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-		}
-	}
-
-	/* "If the vkAllocateMemory command succeeds, the implementation must
-	 * acquire a reference to the imported hardware buffer, which it must
-	 * release when the device memory object is freed. If the command fails,
-	 * the implementation must not retain a reference."
-	 */
-	AHardwareBuffer_acquire(info->buffer);
-	mem->android_hardware_buffer = info->buffer;
-
-	return VK_SUCCESS;
+   /* Import from AHardwareBuffer to radv_device_memory. */
+   const native_handle_t *handle = AHardwareBuffer_getNativeHandle(info->buffer);
+
+   /* NOTE - We support buffers with only one handle but do not error on
+    * multiple handle case. Reason is that we want to support YUV formats
+    * where we have many logical planes but they all point to the same
+    * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
+    */
+   int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
+   if (dma_buf < 0)
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+   uint64_t alloc_size = 0;
+   mem->bo = device->ws->buffer_from_fd(device->ws, dma_buf, priority, &alloc_size);
+   if (!mem->bo)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   if (mem->image) {
+      struct radeon_bo_metadata metadata;
+      device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
+
+      struct radv_image_create_info create_info = {.no_metadata_planes = true,
+                                                   .bo_metadata = &metadata};
+
+      VkResult result = radv_image_create_layout(device, create_info, NULL, mem->image);
+      if (result != VK_SUCCESS) {
+         device->ws->buffer_destroy(device->ws, mem->bo);
+         mem->bo = NULL;
+         return result;
+      }
+
+      if (alloc_size < mem->image->size) {
+         device->ws->buffer_destroy(device->ws, mem->bo);
+         mem->bo = NULL;
+         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+      }
+   } else if (mem->buffer) {
+      if (alloc_size < mem->buffer->size) {
+         device->ws->buffer_destroy(device->ws, mem->bo);
+         mem->bo = NULL;
+         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+      }
+   }
+
+   /* "If the vkAllocateMemory command succeeds, the implementation must
+    * acquire a reference to the imported hardware buffer, which it must
+    * release when the device memory object is freed. If the command fails,
+    * the implementation must not retain a reference."
+    */
+   AHardwareBuffer_acquire(info->buffer);
+   mem->android_hardware_buffer = info->buffer;
+
+   return VK_SUCCESS;
 #else /* RADV_SUPPORT_ANDROID_HARDWARE_BUFFER */
-	return VK_ERROR_EXTENSION_NOT_PRESENT;
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
 #endif
 }
 
 VkResult
-radv_create_ahb_memory(struct radv_device *device,
-                       struct radv_device_memory *mem,
-                       unsigned priority,
-                       const VkMemoryAllocateInfo *pAllocateInfo)
+radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
+                       unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo)
 {
 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
-	const VkMemoryDedicatedAllocateInfo *dedicated_info =
-		vk_find_struct_const(pAllocateInfo->pNext,
-		                     MEMORY_DEDICATED_ALLOCATE_INFO);
-
-	uint32_t w = 0;
-	uint32_t h = 1;
-	uint32_t layers = 1;
-	uint32_t format = 0;
-	uint64_t usage = 0;
-
-	/* If caller passed dedicated information. */
-	if (dedicated_info && dedicated_info->image) {
-		RADV_FROM_HANDLE(radv_image, image, dedicated_info->image);
-		w = image->info.width;
-		h = image->info.height;
-		layers = image->info.array_size;
-		format = android_format_from_vk(image->vk_format);
-		usage = radv_ahb_usage_from_vk_usage(image->flags, image->usage);
-	} else if (dedicated_info && dedicated_info->buffer) {
-		RADV_FROM_HANDLE(radv_buffer, buffer, dedicated_info->buffer);
-		w = buffer->size;
-		format = AHARDWAREBUFFER_FORMAT_BLOB;
-		usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
-		        AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
-	} else {
-		w = pAllocateInfo->allocationSize;
-		format = AHARDWAREBUFFER_FORMAT_BLOB;
-		usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
-		        AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
-	}
-
-	struct AHardwareBuffer *android_hardware_buffer = NULL;
-	struct AHardwareBuffer_Desc desc = {
-		.width = w,
-		.height = h,
-		.layers = layers,
-		.format = format,
-		.usage = usage,
-	};
-
-	if (AHardwareBuffer_allocate(&desc, &android_hardware_buffer) != 0)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-	mem->android_hardware_buffer = android_hardware_buffer;
-
-	const struct VkImportAndroidHardwareBufferInfoANDROID import_info = {
-		.buffer = mem->android_hardware_buffer,
-	};
-
-	VkResult result = radv_import_ahb_memory(device, mem, priority, &import_info);
-	if (result != VK_SUCCESS)
-		AHardwareBuffer_release(mem->android_hardware_buffer);
-	return result;
+   const VkMemoryDedicatedAllocateInfo *dedicated_info =
+      vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
+
+   uint32_t w = 0;
+   uint32_t h = 1;
+   uint32_t layers = 1;
+   uint32_t format = 0;
+   uint64_t usage = 0;
+
+   /* If caller passed dedicated information. */
+   if (dedicated_info && dedicated_info->image) {
+      RADV_FROM_HANDLE(radv_image, image, dedicated_info->image);
+      w = image->info.width;
+      h = image->info.height;
+      layers = image->info.array_size;
+      format = android_format_from_vk(image->vk_format);
+      usage = radv_ahb_usage_from_vk_usage(image->flags, image->usage);
+   } else if (dedicated_info && dedicated_info->buffer) {
+      RADV_FROM_HANDLE(radv_buffer, buffer, dedicated_info->buffer);
+      w = buffer->size;
+      format = AHARDWAREBUFFER_FORMAT_BLOB;
+      usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
+   } else {
+      w = pAllocateInfo->allocationSize;
+      format = AHARDWAREBUFFER_FORMAT_BLOB;
+      usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
+   }
+
+   struct AHardwareBuffer *android_hardware_buffer = NULL;
+   struct AHardwareBuffer_Desc desc = {
+      .width = w,
+      .height = h,
+      .layers = layers,
+      .format = format,
+      .usage = usage,
+   };
+
+   if (AHardwareBuffer_allocate(&desc, &android_hardware_buffer) != 0)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   mem->android_hardware_buffer = android_hardware_buffer;
+
+   const struct VkImportAndroidHardwareBufferInfoANDROID import_info = {
+      .buffer = mem->android_hardware_buffer,
+   };
+
+   VkResult result = radv_import_ahb_memory(device, mem, priority, &import_info);
+   if (result != VK_SUCCESS)
+      AHardwareBuffer_release(mem->android_hardware_buffer);
+   return result;
 #else /* RADV_SUPPORT_ANDROID_HARDWARE_BUFFER */
-	return VK_ERROR_EXTENSION_NOT_PRESENT;
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
 #endif
 }
 
-bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage) {
+bool
+radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage)
+{
 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
-	/* Ideally we check Gralloc for what it supports and then merge that with the radv
-	   format support, but there is no easy gralloc query besides just creating an image.
-	   That seems a bit on the expensive side, so just hardcode for now. */
-	/* TODO: Add multi-plane formats after confirming everything works between radeonsi
-	   and radv. */
-	switch(format) {
-	case VK_FORMAT_R8G8B8A8_UNORM:
-	case VK_FORMAT_R5G6B5_UNORM_PACK16:
-		return true;
-	case VK_FORMAT_R8_UNORM:
-	case VK_FORMAT_R8G8_UNORM:
-		return !(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
-	default:
-		return false;
-	}
+   /* Ideally we check Gralloc for what it supports and then merge that with the radv
+      format support, but there is no easy gralloc query besides just creating an image.
+      That seems a bit on the expensive side, so just hardcode for now. */
+   /* TODO: Add multi-plane formats after confirming everything works between radeonsi
+      and radv. */
+   switch (format) {
+   case VK_FORMAT_R8G8B8A8_UNORM:
+   case VK_FORMAT_R5G6B5_UNORM_PACK16:
+      return true;
+   case VK_FORMAT_R8_UNORM:
+   case VK_FORMAT_R8G8_UNORM:
+      return !(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+   default:
+      return false;
+   }
 #else
-	(void)format;
-	(void)usage;
-	return false;
+   (void)format;
+   (void)usage;
+   return false;
 #endif
 }
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f1751cbfc17..4c015b98d57 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -25,777 +25,736 @@
  * IN THE SOFTWARE.
  */
 
+#include "radv_cs.h"
+#include "radv_debug.h"
+#include "radv_meta.h"
 #include "radv_private.h"
 #include "radv_radeon_winsys.h"
 #include "radv_shader.h"
-#include "radv_cs.h"
 #include "sid.h"
 #include "vk_format.h"
 #include "vk_util.h"
-#include "radv_debug.h"
-#include "radv_meta.h"
 
 #include "ac_debug.h"
 
 enum {
-	RADV_PREFETCH_VBO_DESCRIPTORS	= (1 << 0),
-	RADV_PREFETCH_VS		= (1 << 1),
-	RADV_PREFETCH_TCS		= (1 << 2),
-	RADV_PREFETCH_TES		= (1 << 3),
-	RADV_PREFETCH_GS		= (1 << 4),
-	RADV_PREFETCH_PS		= (1 << 5),
-	RADV_PREFETCH_SHADERS		= (RADV_PREFETCH_VS  |
-					   RADV_PREFETCH_TCS |
-					   RADV_PREFETCH_TES |
-					   RADV_PREFETCH_GS  |
-					   RADV_PREFETCH_PS)
+   RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0),
+   RADV_PREFETCH_VS = (1 << 1),
+   RADV_PREFETCH_TCS = (1 << 2),
+   RADV_PREFETCH_TES = (1 << 3),
+   RADV_PREFETCH_GS = (1 << 4),
+   RADV_PREFETCH_PS = (1 << 5),
+   RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES |
+                            RADV_PREFETCH_GS | RADV_PREFETCH_PS)
 };
 
 static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
-					 struct radv_image *image,
-					 VkImageLayout src_layout,
-					 bool src_render_loop,
-					 VkImageLayout dst_layout,
-					 bool dst_render_loop,
-					 uint32_t src_family,
-					 uint32_t dst_family,
-					 const VkImageSubresourceRange *range,
-					 struct radv_sample_locations_state *sample_locs);
+                                         struct radv_image *image, VkImageLayout src_layout,
+                                         bool src_render_loop, VkImageLayout dst_layout,
+                                         bool dst_render_loop, uint32_t src_family,
+                                         uint32_t dst_family, const VkImageSubresourceRange *range,
+                                         struct radv_sample_locations_state *sample_locs);
 
 const struct radv_dynamic_state default_dynamic_state = {
-	.viewport = {
-		.count = 0,
-	},
-	.scissor = {
-		.count = 0,
-	},
-	.line_width = 1.0f,
-	.depth_bias = {
-		.bias = 0.0f,
-		.clamp = 0.0f,
-		.slope = 0.0f,
-	},
-	.blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
-	.depth_bounds = {
-		.min = 0.0f,
-		.max = 1.0f,
-	},
-	.stencil_compare_mask = {
-		.front = ~0u,
-		.back = ~0u,
-	},
-	.stencil_write_mask = {
-		.front = ~0u,
-		.back = ~0u,
-	},
-	.stencil_reference = {
-		.front = 0u,
-		.back = 0u,
-	},
-	.line_stipple = {
-		.factor = 0u,
-		.pattern = 0u,
-	},
-	.cull_mode = 0u,
-	.front_face = 0u,
-	.primitive_topology = 0u,
-	.fragment_shading_rate = {
-		.size = { 1u, 1u },
-		.combiner_ops = { VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR,
-				  VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR
-		},
-	},
+   .viewport =
+      {
+         .count = 0,
+      },
+   .scissor =
+      {
+         .count = 0,
+      },
+   .line_width = 1.0f,
+   .depth_bias =
+      {
+         .bias = 0.0f,
+         .clamp = 0.0f,
+         .slope = 0.0f,
+      },
+   .blend_constants = {0.0f, 0.0f, 0.0f, 0.0f},
+   .depth_bounds =
+      {
+         .min = 0.0f,
+         .max = 1.0f,
+      },
+   .stencil_compare_mask =
+      {
+         .front = ~0u,
+         .back = ~0u,
+      },
+   .stencil_write_mask =
+      {
+         .front = ~0u,
+         .back = ~0u,
+      },
+   .stencil_reference =
+      {
+         .front = 0u,
+         .back = 0u,
+      },
+   .line_stipple =
+      {
+         .factor = 0u,
+         .pattern = 0u,
+      },
+   .cull_mode = 0u,
+   .front_face = 0u,
+   .primitive_topology = 0u,
+   .fragment_shading_rate =
+      {
+         .size = {1u, 1u},
+         .combiner_ops = {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR,
+                          VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR},
+      },
 };
 
 static void
-radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
-			const struct radv_dynamic_state *src)
-{
-	struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic;
-	uint64_t copy_mask = src->mask;
-	uint64_t dest_mask = 0;
-
-	dest->discard_rectangle.count = src->discard_rectangle.count;
-	dest->sample_location.count = src->sample_location.count;
-
-	if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
-		if (dest->viewport.count != src->viewport.count) {
-			dest->viewport.count = src->viewport.count;
-			dest_mask |= RADV_DYNAMIC_VIEWPORT;
-		}
-
-		if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
-			   src->viewport.count * sizeof(VkViewport))) {
-			typed_memcpy(dest->viewport.viewports,
-				     src->viewport.viewports,
-				     src->viewport.count);
-			dest_mask |= RADV_DYNAMIC_VIEWPORT;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_SCISSOR) {
-		if (dest->scissor.count != src->scissor.count) {
-			dest->scissor.count = src->scissor.count;
-			dest_mask |= RADV_DYNAMIC_SCISSOR;
-		}
-
-		if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
-			   src->scissor.count * sizeof(VkRect2D))) {
-			typed_memcpy(dest->scissor.scissors,
-				     src->scissor.scissors, src->scissor.count);
-			dest_mask |= RADV_DYNAMIC_SCISSOR;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) {
-		if (dest->line_width != src->line_width) {
-			dest->line_width = src->line_width;
-			dest_mask |= RADV_DYNAMIC_LINE_WIDTH;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) {
-		if (memcmp(&dest->depth_bias, &src->depth_bias,
-			   sizeof(src->depth_bias))) {
-			dest->depth_bias = src->depth_bias;
-			dest_mask |= RADV_DYNAMIC_DEPTH_BIAS;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) {
-		if (memcmp(&dest->blend_constants, &src->blend_constants,
-			   sizeof(src->blend_constants))) {
-			typed_memcpy(dest->blend_constants,
-				     src->blend_constants, 4);
-			dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) {
-		if (memcmp(&dest->depth_bounds, &src->depth_bounds,
-			   sizeof(src->depth_bounds))) {
-			dest->depth_bounds = src->depth_bounds;
-			dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
-		if (memcmp(&dest->stencil_compare_mask,
-			   &src->stencil_compare_mask,
-			   sizeof(src->stencil_compare_mask))) {
-			dest->stencil_compare_mask = src->stencil_compare_mask;
-			dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
-		if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
-			   sizeof(src->stencil_write_mask))) {
-			dest->stencil_write_mask = src->stencil_write_mask;
-			dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) {
-		if (memcmp(&dest->stencil_reference, &src->stencil_reference,
-			   sizeof(src->stencil_reference))) {
-			dest->stencil_reference = src->stencil_reference;
-			dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) {
-		if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles,
-			   src->discard_rectangle.count * sizeof(VkRect2D))) {
-			typed_memcpy(dest->discard_rectangle.rectangles,
-				     src->discard_rectangle.rectangles,
-				     src->discard_rectangle.count);
-			dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
-		if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
-		    dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
-		    dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
-		    memcmp(&dest->sample_location.locations,
-			   &src->sample_location.locations,
-			   src->sample_location.count * sizeof(VkSampleLocationEXT))) {
-			dest->sample_location.per_pixel = src->sample_location.per_pixel;
-			dest->sample_location.grid_size = src->sample_location.grid_size;
-			typed_memcpy(dest->sample_location.locations,
-				     src->sample_location.locations,
-				     src->sample_location.count);
-			dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_LINE_STIPPLE) {
-		if (memcmp(&dest->line_stipple, &src->line_stipple,
-			   sizeof(src->line_stipple))) {
-			dest->line_stipple = src->line_stipple;
-			dest_mask |= RADV_DYNAMIC_LINE_STIPPLE;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_CULL_MODE) {
-		if (dest->cull_mode != src->cull_mode) {
-			dest->cull_mode = src->cull_mode;
-			dest_mask |= RADV_DYNAMIC_CULL_MODE;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_FRONT_FACE) {
-		if (dest->front_face != src->front_face) {
-			dest->front_face = src->front_face;
-			dest_mask |= RADV_DYNAMIC_FRONT_FACE;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
-		if (dest->primitive_topology != src->primitive_topology) {
-			dest->primitive_topology = src->primitive_topology;
-			dest_mask |= RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
-		if (dest->depth_test_enable != src->depth_test_enable) {
-			dest->depth_test_enable = src->depth_test_enable;
-			dest_mask |= RADV_DYNAMIC_DEPTH_TEST_ENABLE;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
-		if (dest->depth_write_enable != src->depth_write_enable) {
-			dest->depth_write_enable = src->depth_write_enable;
-			dest_mask |= RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
-		if (dest->depth_compare_op != src->depth_compare_op) {
-			dest->depth_compare_op = src->depth_compare_op;
-			dest_mask |= RADV_DYNAMIC_DEPTH_COMPARE_OP;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
-		if (dest->depth_bounds_test_enable != src->depth_bounds_test_enable) {
-			dest->depth_bounds_test_enable = src->depth_bounds_test_enable;
-			dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
-		if (dest->stencil_test_enable != src->stencil_test_enable) {
-			dest->stencil_test_enable = src->stencil_test_enable;
-			dest_mask |= RADV_DYNAMIC_STENCIL_TEST_ENABLE;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_STENCIL_OP) {
-		if (memcmp(&dest->stencil_op, &src->stencil_op,
-			   sizeof(src->stencil_op))) {
-			dest->stencil_op = src->stencil_op;
-			dest_mask |= RADV_DYNAMIC_STENCIL_OP;
-		}
-	}
-
-	if (copy_mask & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
-		if (memcmp(&dest->fragment_shading_rate,
-			   &src->fragment_shading_rate,
-			   sizeof(src->fragment_shading_rate))) {
-			dest->fragment_shading_rate = src->fragment_shading_rate;
-			dest_mask |= RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
-		}
-	}
-
-	cmd_buffer->state.dirty |= dest_mask;
+radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dynamic_state *src)
+{
+   struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic;
+   uint64_t copy_mask = src->mask;
+   uint64_t dest_mask = 0;
+
+   dest->discard_rectangle.count = src->discard_rectangle.count;
+   dest->sample_location.count = src->sample_location.count;
+
+   if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
+      if (dest->viewport.count != src->viewport.count) {
+         dest->viewport.count = src->viewport.count;
+         dest_mask |= RADV_DYNAMIC_VIEWPORT;
+      }
+
+      if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
+                 src->viewport.count * sizeof(VkViewport))) {
+         typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count);
+         dest_mask |= RADV_DYNAMIC_VIEWPORT;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_SCISSOR) {
+      if (dest->scissor.count != src->scissor.count) {
+         dest->scissor.count = src->scissor.count;
+         dest_mask |= RADV_DYNAMIC_SCISSOR;
+      }
+
+      if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
+                 src->scissor.count * sizeof(VkRect2D))) {
+         typed_memcpy(dest->scissor.scissors, src->scissor.scissors, src->scissor.count);
+         dest_mask |= RADV_DYNAMIC_SCISSOR;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) {
+      if (dest->line_width != src->line_width) {
+         dest->line_width = src->line_width;
+         dest_mask |= RADV_DYNAMIC_LINE_WIDTH;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) {
+      if (memcmp(&dest->depth_bias, &src->depth_bias, sizeof(src->depth_bias))) {
+         dest->depth_bias = src->depth_bias;
+         dest_mask |= RADV_DYNAMIC_DEPTH_BIAS;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) {
+      if (memcmp(&dest->blend_constants, &src->blend_constants, sizeof(src->blend_constants))) {
+         typed_memcpy(dest->blend_constants, src->blend_constants, 4);
+         dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) {
+      if (memcmp(&dest->depth_bounds, &src->depth_bounds, sizeof(src->depth_bounds))) {
+         dest->depth_bounds = src->depth_bounds;
+         dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
+      if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask,
+                 sizeof(src->stencil_compare_mask))) {
+         dest->stencil_compare_mask = src->stencil_compare_mask;
+         dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
+      if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
+                 sizeof(src->stencil_write_mask))) {
+         dest->stencil_write_mask = src->stencil_write_mask;
+         dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) {
+      if (memcmp(&dest->stencil_reference, &src->stencil_reference,
+                 sizeof(src->stencil_reference))) {
+         dest->stencil_reference = src->stencil_reference;
+         dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+      if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles,
+                 src->discard_rectangle.count * sizeof(VkRect2D))) {
+         typed_memcpy(dest->discard_rectangle.rectangles, src->discard_rectangle.rectangles,
+                      src->discard_rectangle.count);
+         dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+      if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
+          dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
+          dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
+          memcmp(&dest->sample_location.locations, &src->sample_location.locations,
+                 src->sample_location.count * sizeof(VkSampleLocationEXT))) {
+         dest->sample_location.per_pixel = src->sample_location.per_pixel;
+         dest->sample_location.grid_size = src->sample_location.grid_size;
+         typed_memcpy(dest->sample_location.locations, src->sample_location.locations,
+                      src->sample_location.count);
+         dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_LINE_STIPPLE) {
+      if (memcmp(&dest->line_stipple, &src->line_stipple, sizeof(src->line_stipple))) {
+         dest->line_stipple = src->line_stipple;
+         dest_mask |= RADV_DYNAMIC_LINE_STIPPLE;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_CULL_MODE) {
+      if (dest->cull_mode != src->cull_mode) {
+         dest->cull_mode = src->cull_mode;
+         dest_mask |= RADV_DYNAMIC_CULL_MODE;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_FRONT_FACE) {
+      if (dest->front_face != src->front_face) {
+         dest->front_face = src->front_face;
+         dest_mask |= RADV_DYNAMIC_FRONT_FACE;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
+      if (dest->primitive_topology != src->primitive_topology) {
+         dest->primitive_topology = src->primitive_topology;
+         dest_mask |= RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
+      if (dest->depth_test_enable != src->depth_test_enable) {
+         dest->depth_test_enable = src->depth_test_enable;
+         dest_mask |= RADV_DYNAMIC_DEPTH_TEST_ENABLE;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
+      if (dest->depth_write_enable != src->depth_write_enable) {
+         dest->depth_write_enable = src->depth_write_enable;
+         dest_mask |= RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
+      if (dest->depth_compare_op != src->depth_compare_op) {
+         dest->depth_compare_op = src->depth_compare_op;
+         dest_mask |= RADV_DYNAMIC_DEPTH_COMPARE_OP;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
+      if (dest->depth_bounds_test_enable != src->depth_bounds_test_enable) {
+         dest->depth_bounds_test_enable = src->depth_bounds_test_enable;
+         dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
+      if (dest->stencil_test_enable != src->stencil_test_enable) {
+         dest->stencil_test_enable = src->stencil_test_enable;
+         dest_mask |= RADV_DYNAMIC_STENCIL_TEST_ENABLE;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_STENCIL_OP) {
+      if (memcmp(&dest->stencil_op, &src->stencil_op, sizeof(src->stencil_op))) {
+         dest->stencil_op = src->stencil_op;
+         dest_mask |= RADV_DYNAMIC_STENCIL_OP;
+      }
+   }
+
+   if (copy_mask & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
+      if (memcmp(&dest->fragment_shading_rate, &src->fragment_shading_rate,
+                 sizeof(src->fragment_shading_rate))) {
+         dest->fragment_shading_rate = src->fragment_shading_rate;
+         dest_mask |= RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
+      }
+   }
+
+   cmd_buffer->state.dirty |= dest_mask;
 }
 
 static void
-radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer,
-			  struct radv_pipeline *pipeline)
+radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
 {
-	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
-	struct radv_shader_info *info;
+   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+   struct radv_shader_info *info;
 
-	if (!pipeline->streamout_shader ||
-	    cmd_buffer->device->physical_device->use_ngg_streamout)
-		return;
+   if (!pipeline->streamout_shader || cmd_buffer->device->physical_device->use_ngg_streamout)
+      return;
 
-	info = &pipeline->streamout_shader->info;
-	for (int i = 0; i < MAX_SO_BUFFERS; i++)
-		so->stride_in_dw[i] = info->so.strides[i];
+   info = &pipeline->streamout_shader->info;
+   for (int i = 0; i < MAX_SO_BUFFERS; i++)
+      so->stride_in_dw[i] = info->so.strides[i];
 
-	so->enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask;
+   so->enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask;
 }
 
-bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
+bool
+radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
 {
-	return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
-	       cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
+   return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
+          cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
 }
 
-enum ring_type radv_queue_family_to_ring(int f) {
-	switch (f) {
-	case RADV_QUEUE_GENERAL:
-		return RING_GFX;
-	case RADV_QUEUE_COMPUTE:
-		return RING_COMPUTE;
-	case RADV_QUEUE_TRANSFER:
-		return RING_DMA;
-	default:
-		unreachable("Unknown queue family");
-	}
+enum ring_type
+radv_queue_family_to_ring(int f)
+{
+   switch (f) {
+   case RADV_QUEUE_GENERAL:
+      return RING_GFX;
+   case RADV_QUEUE_COMPUTE:
+      return RING_COMPUTE;
+   case RADV_QUEUE_TRANSFER:
+      return RING_DMA;
+   default:
+      unreachable("Unknown queue family");
+   }
 }
 
 static void
 radv_destroy_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 {
-	list_del(&cmd_buffer->pool_link);
+   list_del(&cmd_buffer->pool_link);
 
-	list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
-				 &cmd_buffer->upload.list, list) {
-		cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
-		list_del(&up->list);
-		free(up);
-	}
+   list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
+   {
+      cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
+      list_del(&up->list);
+      free(up);
+   }
 
-	if (cmd_buffer->upload.upload_bo)
-		cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo);
+   if (cmd_buffer->upload.upload_bo)
+      cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo);
 
-	if (cmd_buffer->cs)
-		cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs);
+   if (cmd_buffer->cs)
+      cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs);
 
-	for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
-		free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
+   for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
+      free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
 
-	vk_object_base_finish(&cmd_buffer->base);
-	vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
+   vk_object_base_finish(&cmd_buffer->base);
+   vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
 }
 
-static VkResult radv_create_cmd_buffer(
-	struct radv_device *                         device,
-	struct radv_cmd_pool *                       pool,
-	VkCommandBufferLevel                        level,
-	VkCommandBuffer*                            pCommandBuffer)
+static VkResult
+radv_create_cmd_buffer(struct radv_device *device, struct radv_cmd_pool *pool,
+                       VkCommandBufferLevel level, VkCommandBuffer *pCommandBuffer)
 {
-	struct radv_cmd_buffer *cmd_buffer;
-	unsigned ring;
-	cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
-			       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (cmd_buffer == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   struct radv_cmd_buffer *cmd_buffer;
+   unsigned ring;
+   cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (cmd_buffer == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	vk_object_base_init(&device->vk, &cmd_buffer->base,
-			    VK_OBJECT_TYPE_COMMAND_BUFFER);
+   vk_object_base_init(&device->vk, &cmd_buffer->base, VK_OBJECT_TYPE_COMMAND_BUFFER);
 
-	cmd_buffer->device = device;
-	cmd_buffer->pool = pool;
-	cmd_buffer->level = level;
+   cmd_buffer->device = device;
+   cmd_buffer->pool = pool;
+   cmd_buffer->level = level;
 
-	list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
-	cmd_buffer->queue_family_index = pool->queue_family_index;
+   list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
+   cmd_buffer->queue_family_index = pool->queue_family_index;
 
-	ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
+   ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
 
-	cmd_buffer->cs = device->ws->cs_create(device->ws, ring);
-	if (!cmd_buffer->cs) {
-		radv_destroy_cmd_buffer(cmd_buffer);
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-	}
+   cmd_buffer->cs = device->ws->cs_create(device->ws, ring);
+   if (!cmd_buffer->cs) {
+      radv_destroy_cmd_buffer(cmd_buffer);
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
 
-	*pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer);
+   *pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer);
 
-	list_inithead(&cmd_buffer->upload.list);
+   list_inithead(&cmd_buffer->upload.list);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
 static VkResult
 radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 {
-	cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
-
-	list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
-				 &cmd_buffer->upload.list, list) {
-		cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
-		list_del(&up->list);
-		free(up);
-	}
-
-	cmd_buffer->push_constant_stages = 0;
-	cmd_buffer->scratch_size_per_wave_needed = 0;
-	cmd_buffer->scratch_waves_wanted = 0;
-	cmd_buffer->compute_scratch_size_per_wave_needed = 0;
-	cmd_buffer->compute_scratch_waves_wanted = 0;
-	cmd_buffer->esgs_ring_size_needed = 0;
-	cmd_buffer->gsvs_ring_size_needed = 0;
-	cmd_buffer->tess_rings_needed = false;
-	cmd_buffer->gds_needed = false;
-	cmd_buffer->gds_oa_needed = false;
-	cmd_buffer->sample_positions_needed = false;
-
-	if (cmd_buffer->upload.upload_bo)
-		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
-				   cmd_buffer->upload.upload_bo);
-	cmd_buffer->upload.offset = 0;
-
-	cmd_buffer->record_result = VK_SUCCESS;
-
-	memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings));
-
-	for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
-		cmd_buffer->descriptors[i].dirty = 0;
-		cmd_buffer->descriptors[i].valid = 0;
-		cmd_buffer->descriptors[i].push_dirty = false;
-	}
-
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
-	    cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
-		unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
-		unsigned fence_offset, eop_bug_offset;
-		void *fence_ptr;
-
-		radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset,
-					     &fence_ptr);
-		memset(fence_ptr, 0, 8);
-
-		cmd_buffer->gfx9_fence_va =
-			radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-		cmd_buffer->gfx9_fence_va += fence_offset;
-
-		if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
-			/* Allocate a buffer for the EOP bug on GFX9. */
-			radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db,
-						     &eop_bug_offset, &fence_ptr);
-			memset(fence_ptr, 0, 16 * num_db);
-			cmd_buffer->gfx9_eop_bug_va =
-				radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-			cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
-		}
-	}
-
-	cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
-
-	return cmd_buffer->record_result;
+   cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
+
+   list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
+   {
+      cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
+      list_del(&up->list);
+      free(up);
+   }
+
+   cmd_buffer->push_constant_stages = 0;
+   cmd_buffer->scratch_size_per_wave_needed = 0;
+   cmd_buffer->scratch_waves_wanted = 0;
+   cmd_buffer->compute_scratch_size_per_wave_needed = 0;
+   cmd_buffer->compute_scratch_waves_wanted = 0;
+   cmd_buffer->esgs_ring_size_needed = 0;
+   cmd_buffer->gsvs_ring_size_needed = 0;
+   cmd_buffer->tess_rings_needed = false;
+   cmd_buffer->gds_needed = false;
+   cmd_buffer->gds_oa_needed = false;
+   cmd_buffer->sample_positions_needed = false;
+
+   if (cmd_buffer->upload.upload_bo)
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo);
+   cmd_buffer->upload.offset = 0;
+
+   cmd_buffer->record_result = VK_SUCCESS;
+
+   memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings));
+
+   for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
+      cmd_buffer->descriptors[i].dirty = 0;
+      cmd_buffer->descriptors[i].valid = 0;
+      cmd_buffer->descriptors[i].push_dirty = false;
+   }
+
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+       cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
+      unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
+      unsigned fence_offset, eop_bug_offset;
+      void *fence_ptr;
+
+      radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset, &fence_ptr);
+      memset(fence_ptr, 0, 8);
+
+      cmd_buffer->gfx9_fence_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+      cmd_buffer->gfx9_fence_va += fence_offset;
+
+      if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
+         /* Allocate a buffer for the EOP bug on GFX9. */
+         radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr);
+         memset(fence_ptr, 0, 16 * num_db);
+         cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+         cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
+      }
+   }
+
+   cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
+
+   return cmd_buffer->record_result;
 }
 
 enum radeon_bo_domain
 radv_cmdbuffer_domain(const struct radeon_info *info, uint32_t perftest)
 {
-	bool use_sam = (info->all_vram_visible && info->has_dedicated_vram &&
-	                !(perftest & RADV_PERFTEST_NO_SAM)) ||
-	                (perftest & RADV_PERFTEST_SAM);
-	return use_sam ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
+   bool use_sam =
+      (info->all_vram_visible && info->has_dedicated_vram && !(perftest & RADV_PERFTEST_NO_SAM)) ||
+      (perftest & RADV_PERFTEST_SAM);
+   return use_sam ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
 }
 
 static bool
-radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer,
-				  uint64_t min_needed)
-{
-	uint64_t new_size;
-	struct radeon_winsys_bo *bo;
-	struct radv_cmd_buffer_upload *upload;
-	struct radv_device *device = cmd_buffer->device;
-
-	new_size = MAX2(min_needed, 16 * 1024);
-	new_size = MAX2(new_size, 2 * cmd_buffer->upload.size);
-
-	bo = device->ws->buffer_create(device->ws,
-				       new_size, 4096,
-				       radv_cmdbuffer_domain(&device->physical_device->rad_info,
-							     device->instance->perftest_flags),
-				       RADEON_FLAG_CPU_ACCESS|
-				       RADEON_FLAG_NO_INTERPROCESS_SHARING |
-				       RADEON_FLAG_32BIT |
-				       RADEON_FLAG_GTT_WC,
-				       RADV_BO_PRIORITY_UPLOAD_BUFFER);
-
-	if (!bo) {
-		cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
-		return false;
-	}
-
-	radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
-	if (cmd_buffer->upload.upload_bo) {
-		upload = malloc(sizeof(*upload));
-
-		if (!upload) {
-			cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
-			device->ws->buffer_destroy(device->ws, bo);
-			return false;
-		}
-
-		memcpy(upload, &cmd_buffer->upload, sizeof(*upload));
-		list_add(&upload->list, &cmd_buffer->upload.list);
-	}
-
-	cmd_buffer->upload.upload_bo = bo;
-	cmd_buffer->upload.size = new_size;
-	cmd_buffer->upload.offset = 0;
-	cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo);
-
-	if (!cmd_buffer->upload.map) {
-		cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
-		return false;
-	}
-
-	return true;
+radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t min_needed)
+{
+   uint64_t new_size;
+   struct radeon_winsys_bo *bo;
+   struct radv_cmd_buffer_upload *upload;
+   struct radv_device *device = cmd_buffer->device;
+
+   new_size = MAX2(min_needed, 16 * 1024);
+   new_size = MAX2(new_size, 2 * cmd_buffer->upload.size);
+
+   bo = device->ws->buffer_create(
+      device->ws, new_size, 4096,
+      radv_cmdbuffer_domain(&device->physical_device->rad_info, device->instance->perftest_flags),
+      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT |
+         RADEON_FLAG_GTT_WC,
+      RADV_BO_PRIORITY_UPLOAD_BUFFER);
+
+   if (!bo) {
+      cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+      return false;
+   }
+
+   radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
+   if (cmd_buffer->upload.upload_bo) {
+      upload = malloc(sizeof(*upload));
+
+      if (!upload) {
+         cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         device->ws->buffer_destroy(device->ws, bo);
+         return false;
+      }
+
+      memcpy(upload, &cmd_buffer->upload, sizeof(*upload));
+      list_add(&upload->list, &cmd_buffer->upload.list);
+   }
+
+   cmd_buffer->upload.upload_bo = bo;
+   cmd_buffer->upload.size = new_size;
+   cmd_buffer->upload.offset = 0;
+   cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo);
+
+   if (!cmd_buffer->upload.map) {
+      cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+      return false;
+   }
+
+   return true;
 }
 
 bool
-radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
-			     unsigned size, unsigned *out_offset, void **ptr)
+radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
+                             unsigned *out_offset, void **ptr)
 {
-	assert(size % 4 == 0);
+   assert(size % 4 == 0);
 
-	struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
+   struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
 
-	/* Align to the scalar cache line size if it results in this allocation
-	 * being placed in less of them.
-	 */
-	unsigned offset = cmd_buffer->upload.offset;
-	unsigned line_size = rad_info->chip_class >= GFX10 ? 64 : 32;
-	unsigned gap = align(offset, line_size) - offset;
-	if ((size & (line_size - 1)) > gap)
-		offset = align(offset, line_size);
+   /* Align to the scalar cache line size if it results in this allocation
+    * being placed in less of them.
+    */
+   unsigned offset = cmd_buffer->upload.offset;
+   unsigned line_size = rad_info->chip_class >= GFX10 ? 64 : 32;
+   unsigned gap = align(offset, line_size) - offset;
+   if ((size & (line_size - 1)) > gap)
+      offset = align(offset, line_size);
 
-	if (offset + size > cmd_buffer->upload.size) {
-		if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
-			return false;
-		offset = 0;
-	}
+   if (offset + size > cmd_buffer->upload.size) {
+      if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
+         return false;
+      offset = 0;
+   }
 
-	*out_offset = offset;
-	*ptr = cmd_buffer->upload.map + offset;
+   *out_offset = offset;
+   *ptr = cmd_buffer->upload.map + offset;
 
-	cmd_buffer->upload.offset = offset + size;
-	return true;
+   cmd_buffer->upload.offset = offset + size;
+   return true;
 }
 
 bool
-radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
-			    unsigned size, const void *data, unsigned *out_offset)
+radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data,
+                            unsigned *out_offset)
 {
-	uint8_t *ptr;
+   uint8_t *ptr;
 
-	if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr))
-		return false;
+   if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr))
+      return false;
 
-	if (ptr)
-		memcpy(ptr, data, size);
+   if (ptr)
+      memcpy(ptr, data, size);
 
-	return true;
+   return true;
 }
 
 static void
-radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
-			    unsigned count, const uint32_t *data)
+radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned count,
+                            const uint32_t *data)
 {
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
+   radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
 
-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
-		    S_370_WR_CONFIRM(1) |
-		    S_370_ENGINE_SEL(V_370_ME));
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit_array(cs, data, count);
+   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
+   radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+   radeon_emit_array(cs, data, count);
 }
 
-void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
+void
+radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint64_t va;
+   struct radv_device *device = cmd_buffer->device;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t va;
 
-	va = radv_buffer_get_va(device->trace_bo);
-	if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
-		va += 4;
+   va = radv_buffer_get_va(device->trace_bo);
+   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
+      va += 4;
 
-	++cmd_buffer->state.trace_id;
-	radv_emit_write_data_packet(cmd_buffer, va, 1,
-				    &cmd_buffer->state.trace_id);
+   ++cmd_buffer->state.trace_id;
+   radv_emit_write_data_packet(cmd_buffer, va, 1, &cmd_buffer->state.trace_id);
 
-	radeon_check_space(cmd_buffer->device->ws, cs, 2);
+   radeon_check_space(cmd_buffer->device->ws, cs, 2);
 
-	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-	radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
+   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+   radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
 }
 
 static void
-radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
-			   enum radv_cmd_flush_bits flags)
+radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags)
 {
-	if (unlikely(cmd_buffer->device->thread_trace.bo)) {
-		radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
-	}
+   if (unlikely(cmd_buffer->device->thread_trace.bo)) {
+      radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
+   }
 
-	if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
-		enum rgp_flush_bits sqtt_flush_bits = 0;
-		assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-				RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
+   if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
+      enum rgp_flush_bits sqtt_flush_bits = 0;
+      assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
 
-		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
+      radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
 
-		/* Force wait for graphics or compute engines to be idle. */
-		si_cs_emit_cache_flush(cmd_buffer->cs,
-				       cmd_buffer->device->physical_device->rad_info.chip_class,
-				       &cmd_buffer->gfx9_fence_idx,
-				       cmd_buffer->gfx9_fence_va,
-				       radv_cmd_buffer_uses_mec(cmd_buffer),
-				       flags, &sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va);
-	}
+      /* Force wait for graphics or compute engines to be idle. */
+      si_cs_emit_cache_flush(cmd_buffer->cs,
+                             cmd_buffer->device->physical_device->rad_info.chip_class,
+                             &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
+                             radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits,
+                             cmd_buffer->gfx9_eop_bug_va);
+   }
 
-	if (unlikely(cmd_buffer->device->trace_bo))
-		radv_cmd_buffer_trace_emit(cmd_buffer);
+   if (unlikely(cmd_buffer->device->trace_bo))
+      radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 static void
-radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer,
-		   struct radv_pipeline *pipeline)
+radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
 {
-	struct radv_device *device = cmd_buffer->device;
-	enum ring_type ring;
-	uint32_t data[2];
-	uint64_t va;
+   struct radv_device *device = cmd_buffer->device;
+   enum ring_type ring;
+   uint32_t data[2];
+   uint64_t va;
 
-	va = radv_buffer_get_va(device->trace_bo);
+   va = radv_buffer_get_va(device->trace_bo);
 
-	ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
+   ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
 
-	switch (ring) {
-	case RING_GFX:
-		va += 8;
-		break;
-	case RING_COMPUTE:
-		va += 16;
-		break;
-	default:
-		assert(!"invalid ring type");
-	}
+   switch (ring) {
+   case RING_GFX:
+      va += 8;
+      break;
+   case RING_COMPUTE:
+      va += 16;
+      break;
+   default:
+      assert(!"invalid ring type");
+   }
 
-	uint64_t pipeline_address = (uintptr_t)pipeline;
-	data[0] = pipeline_address;
-	data[1] = pipeline_address >> 32;
+   uint64_t pipeline_address = (uintptr_t)pipeline;
+   data[0] = pipeline_address;
+   data[1] = pipeline_address >> 32;
 
-	radv_emit_write_data_packet(cmd_buffer, va, 2, data);
+   radv_emit_write_data_packet(cmd_buffer, va, 2, data);
 }
 
 static void
-radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
-			     uint64_t vb_ptr)
+radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr)
 {
-	struct radv_device *device = cmd_buffer->device;
-	uint32_t data[2];
-	uint64_t va;
+   struct radv_device *device = cmd_buffer->device;
+   uint32_t data[2];
+   uint64_t va;
 
-	va = radv_buffer_get_va(device->trace_bo);
-	va += 24;
+   va = radv_buffer_get_va(device->trace_bo);
+   va += 24;
 
-	data[0] = vb_ptr;
-	data[1] = vb_ptr >> 32;
+   data[0] = vb_ptr;
+   data[1] = vb_ptr >> 32;
 
-	radv_emit_write_data_packet(cmd_buffer, va, 2, data);
+   radv_emit_write_data_packet(cmd_buffer, va, 2, data);
 }
 
-void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
-			     VkPipelineBindPoint bind_point,
-			     struct radv_descriptor_set *set,
-			     unsigned idx)
+void
+radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
+                        struct radv_descriptor_set *set, unsigned idx)
 {
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, bind_point);
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, bind_point);
 
-	descriptors_state->sets[idx] = set;
+   descriptors_state->sets[idx] = set;
 
-	descriptors_state->valid |= (1u << idx); /* active descriptors */
-	descriptors_state->dirty |= (1u << idx);
+   descriptors_state->valid |= (1u << idx); /* active descriptors */
+   descriptors_state->dirty |= (1u << idx);
 }
 
 static void
-radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer,
-		      VkPipelineBindPoint bind_point)
+radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
 {
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, bind_point);
-	struct radv_device *device = cmd_buffer->device;
-	uint32_t data[MAX_SETS * 2] = {0};
-	uint64_t va;
-	va = radv_buffer_get_va(device->trace_bo) + 32;
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, bind_point);
+   struct radv_device *device = cmd_buffer->device;
+   uint32_t data[MAX_SETS * 2] = {0};
+   uint64_t va;
+   va = radv_buffer_get_va(device->trace_bo) + 32;
 
-	u_foreach_bit(i, descriptors_state->valid) {
-		struct radv_descriptor_set *set = descriptors_state->sets[i];
-		data[i * 2] = (uint64_t)(uintptr_t)set;
-		data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
-	}
+   u_foreach_bit(i, descriptors_state->valid)
+   {
+      struct radv_descriptor_set *set = descriptors_state->sets[i];
+      data[i * 2] = (uint64_t)(uintptr_t)set;
+      data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
+   }
 
-	radv_emit_write_data_packet(cmd_buffer, va, MAX_SETS * 2, data);
+   radv_emit_write_data_packet(cmd_buffer, va, MAX_SETS * 2, data);
 }
 
 struct radv_userdata_info *
-radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
-		      gl_shader_stage stage,
-		      int idx)
+radv_lookup_user_sgpr(struct radv_pipeline *pipeline, gl_shader_stage stage, int idx)
 {
-	struct radv_shader_variant *shader = radv_get_shader(pipeline, stage);
-	return &shader->info.user_sgprs_locs.shader_data[idx];
+   struct radv_shader_variant *shader = radv_get_shader(pipeline, stage);
+   return &shader->info.user_sgprs_locs.shader_data[idx];
 }
 
 static void
-radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer,
-			   struct radv_pipeline *pipeline,
-			   gl_shader_stage stage,
-			   int idx, uint64_t va)
+radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline,
+                           gl_shader_stage stage, int idx, uint64_t va)
 {
-	struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
-	uint32_t base_reg = pipeline->user_data_0[stage];
-	if (loc->sgpr_idx == -1)
-		return;
+   struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
+   uint32_t base_reg = pipeline->user_data_0[stage];
+   if (loc->sgpr_idx == -1)
+      return;
 
-	assert(loc->num_sgprs == 1);
+   assert(loc->num_sgprs == 1);
 
-	radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
-				 base_reg + loc->sgpr_idx * 4, va, false);
+   radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va,
+                            false);
 }
 
 static void
-radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
-			      struct radv_pipeline *pipeline,
-			      struct radv_descriptor_state *descriptors_state,
-			      gl_shader_stage stage)
+radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline,
+                              struct radv_descriptor_state *descriptors_state,
+                              gl_shader_stage stage)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint32_t sh_base = pipeline->user_data_0[stage];
-	struct radv_userdata_locations *locs =
-		&pipeline->shaders[stage]->info.user_sgprs_locs;
-	unsigned mask = locs->descriptor_sets_enabled;
+   struct radv_device *device = cmd_buffer->device;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint32_t sh_base = pipeline->user_data_0[stage];
+   struct radv_userdata_locations *locs = &pipeline->shaders[stage]->info.user_sgprs_locs;
+   unsigned mask = locs->descriptor_sets_enabled;
 
-	mask &= descriptors_state->dirty & descriptors_state->valid;
+   mask &= descriptors_state->dirty & descriptors_state->valid;
 
-	while (mask) {
-		int start, count;
+   while (mask) {
+      int start, count;
 
-		u_bit_scan_consecutive_range(&mask, &start, &count);
+      u_bit_scan_consecutive_range(&mask, &start, &count);
 
-		struct radv_userdata_info *loc = &locs->descriptor_sets[start];
-		unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
+      struct radv_userdata_info *loc = &locs->descriptor_sets[start];
+      unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
 
-		radv_emit_shader_pointer_head(cs, sh_offset, count, true);
-		for (int i = 0; i < count; i++) {
-			struct radv_descriptor_set *set =
-				descriptors_state->sets[start + i];
+      radv_emit_shader_pointer_head(cs, sh_offset, count, true);
+      for (int i = 0; i < count; i++) {
+         struct radv_descriptor_set *set = descriptors_state->sets[start + i];
 
-			radv_emit_shader_pointer_body(device, cs, set->header.va, true);
-		}
-	}
+         radv_emit_shader_pointer_body(device, cs, set->header.va, true);
+      }
+   }
 }
 
 /**
@@ -803,30 +762,30 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
  * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
  */
 static void
-radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
-			      uint32_t x, uint32_t y, VkOffset2D *sample_locs)
+radv_convert_user_sample_locs(struct radv_sample_locations_state *state, uint32_t x, uint32_t y,
+                              VkOffset2D *sample_locs)
 {
-	uint32_t x_offset = x % state->grid_size.width;
-	uint32_t y_offset = y % state->grid_size.height;
-	uint32_t num_samples = (uint32_t)state->per_pixel;
-	VkSampleLocationEXT *user_locs;
-	uint32_t pixel_offset;
+   uint32_t x_offset = x % state->grid_size.width;
+   uint32_t y_offset = y % state->grid_size.height;
+   uint32_t num_samples = (uint32_t)state->per_pixel;
+   VkSampleLocationEXT *user_locs;
+   uint32_t pixel_offset;
 
-	pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
+   pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
 
-	assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
-	user_locs = &state->locations[pixel_offset];
+   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
+   user_locs = &state->locations[pixel_offset];
 
-	for (uint32_t i = 0; i < num_samples; i++) {
-		float shifted_pos_x = user_locs[i].x - 0.5;
-		float shifted_pos_y = user_locs[i].y - 0.5;
+   for (uint32_t i = 0; i < num_samples; i++) {
+      float shifted_pos_x = user_locs[i].x - 0.5;
+      float shifted_pos_y = user_locs[i].y - 0.5;
 
-		int32_t scaled_pos_x = floorf(shifted_pos_x * 16);
-		int32_t scaled_pos_y = floorf(shifted_pos_y * 16);
+      int32_t scaled_pos_x = floorf(shifted_pos_x * 16);
+      int32_t scaled_pos_y = floorf(shifted_pos_y * 16);
 
-		sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
-		sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
-	}
+      sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
+      sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
+   }
 }
 
 /**
@@ -835,20 +794,20 @@ radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
  */
 static void
 radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
-			       uint32_t *sample_locs_pixel)
+                               uint32_t *sample_locs_pixel)
 {
-	for (uint32_t i = 0; i < num_samples; i++) {
-		uint32_t sample_reg_idx = i / 4;
-		uint32_t sample_loc_idx = i % 4;
-		int32_t pos_x = sample_locs[i].x;
-		int32_t pos_y = sample_locs[i].y;
+   for (uint32_t i = 0; i < num_samples; i++) {
+      uint32_t sample_reg_idx = i / 4;
+      uint32_t sample_loc_idx = i % 4;
+      int32_t pos_x = sample_locs[i].x;
+      int32_t pos_y = sample_locs[i].y;
 
-		uint32_t shift_x = 8 * sample_loc_idx;
-		uint32_t shift_y = shift_x + 4;
+      uint32_t shift_x = 8 * sample_loc_idx;
+      uint32_t shift_y = shift_x + 4;
 
-		sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
-		sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
-	}
+      sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
+      sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
+   }
 }
 
 /**
@@ -856,41 +815,38 @@ radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
  * sample locations.
  */
 static uint64_t
-radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer,
-			       VkOffset2D *sample_locs,
-			       uint32_t num_samples)
+radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs,
+                               uint32_t num_samples)
 {
-	uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities));
-	uint32_t sample_mask = num_samples - 1;
-	uint32_t *distances = alloca(num_samples * sizeof(*distances));
-	uint64_t centroid_priority = 0;
+   uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities));
+   uint32_t sample_mask = num_samples - 1;
+   uint32_t *distances = alloca(num_samples * sizeof(*distances));
+   uint64_t centroid_priority = 0;
 
-	/* Compute the distances from center for each sample. */
-	for (int i = 0; i < num_samples; i++) {
-		distances[i] = (sample_locs[i].x * sample_locs[i].x) +
-			       (sample_locs[i].y * sample_locs[i].y);
-	}
+   /* Compute the distances from center for each sample. */
+   for (int i = 0; i < num_samples; i++) {
+      distances[i] = (sample_locs[i].x * sample_locs[i].x) + (sample_locs[i].y * sample_locs[i].y);
+   }
 
-	/* Compute the centroid priorities by looking at the distances array. */
-	for (int i = 0; i < num_samples; i++) {
-		uint32_t min_idx = 0;
+   /* Compute the centroid priorities by looking at the distances array. */
+   for (int i = 0; i < num_samples; i++) {
+      uint32_t min_idx = 0;
 
-		for (int j = 1; j < num_samples; j++) {
-			if (distances[j] < distances[min_idx])
-				min_idx = j;
-		}
+      for (int j = 1; j < num_samples; j++) {
+         if (distances[j] < distances[min_idx])
+            min_idx = j;
+      }
 
-		centroid_priorities[i] = min_idx;
-		distances[min_idx] = 0xffffffff;
-	}
+      centroid_priorities[i] = min_idx;
+      distances[min_idx] = 0xffffffff;
+   }
 
-	/* Compute the final centroid priority. */
-	for (int i = 0; i < 8; i++) {
-		centroid_priority |=
-			centroid_priorities[i & sample_mask] << (i * 4);
-	}
+   /* Compute the final centroid priority. */
+   for (int i = 0; i < 8; i++) {
+      centroid_priority |= centroid_priorities[i & sample_mask] << (i * 4);
+   }
 
-	return centroid_priority << 32 | centroid_priority;
+   return centroid_priority << 32 | centroid_priority;
 }
 
 /**
@@ -899,972 +855,934 @@ radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer,
 static void
 radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_sample_locations_state *sample_location =
-		&cmd_buffer->state.dynamic.sample_location;
-	uint32_t num_samples = (uint32_t)sample_location->per_pixel;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint32_t sample_locs_pixel[4][2] = {0};
-	VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
-	uint32_t max_sample_dist = 0;
-	uint64_t centroid_priority;
-
-	if (!cmd_buffer->state.dynamic.sample_location.count)
-		return;
-
-	/* Convert the user sample locations to hardware sample locations. */
-	radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
-	radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
-	radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
-	radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
-
-	/* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
-	for (uint32_t i = 0; i < 4; i++) {
-		radv_compute_sample_locs_pixel(num_samples, sample_locs[i],
-					       sample_locs_pixel[i]);
-	}
-
-	/* Compute the PA_SC_CENTROID_PRIORITY_* mask. */
-	centroid_priority =
-		radv_compute_centroid_priority(cmd_buffer, sample_locs[0],
-					       num_samples);
-
-	/* Compute the maximum sample distance from the specified locations. */
-	for (unsigned i = 0; i < 4; ++i) {
-		for (uint32_t j = 0; j < num_samples; j++) {
-			VkOffset2D offset = sample_locs[i][j];
-			max_sample_dist = MAX2(max_sample_dist,
-			                       MAX2(abs(offset.x), abs(offset.y)));
-		}
-	}
-
-	/* Emit the specified user sample locations. */
-	switch (num_samples) {
-	case 2:
-	case 4:
-		radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
-		radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
-		radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
-		radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
-		break;
-	case 8:
-		radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
-		radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
-		radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
-		radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
-		radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]);
-		radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]);
-		radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]);
-		radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]);
-		break;
-	default:
-		unreachable("invalid number of samples");
-	}
-
-	/* Emit the maximum sample distance and the centroid priority. */
-	radeon_set_context_reg_rmw(cs, R_028BE0_PA_SC_AA_CONFIG,
-				   S_028BE0_MAX_SAMPLE_DIST(max_sample_dist),
-				   ~C_028BE0_MAX_SAMPLE_DIST);
-
-	radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
-	radeon_emit(cs, centroid_priority);
-	radeon_emit(cs, centroid_priority >> 32);
-
-	/* GFX9: Flush DFSM when the AA mode changes. */
-	if (cmd_buffer->device->dfsm_allowed) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
-	}
-
-	cmd_buffer->state.context_roll_without_scissor_emitted = true;
+   struct radv_sample_locations_state *sample_location = &cmd_buffer->state.dynamic.sample_location;
+   uint32_t num_samples = (uint32_t)sample_location->per_pixel;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint32_t sample_locs_pixel[4][2] = {0};
+   VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
+   uint32_t max_sample_dist = 0;
+   uint64_t centroid_priority;
+
+   if (!cmd_buffer->state.dynamic.sample_location.count)
+      return;
+
+   /* Convert the user sample locations to hardware sample locations. */
+   radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
+   radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
+   radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
+   radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
+
+   /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
+   for (uint32_t i = 0; i < 4; i++) {
+      radv_compute_sample_locs_pixel(num_samples, sample_locs[i], sample_locs_pixel[i]);
+   }
+
+   /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */
+   centroid_priority = radv_compute_centroid_priority(cmd_buffer, sample_locs[0], num_samples);
+
+   /* Compute the maximum sample distance from the specified locations. */
+   for (unsigned i = 0; i < 4; ++i) {
+      for (uint32_t j = 0; j < num_samples; j++) {
+         VkOffset2D offset = sample_locs[i][j];
+         max_sample_dist = MAX2(max_sample_dist, MAX2(abs(offset.x), abs(offset.y)));
+      }
+   }
+
+   /* Emit the specified user sample locations. */
+   switch (num_samples) {
+   case 2:
+   case 4:
+      radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
+                             sample_locs_pixel[0][0]);
+      radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0,
+                             sample_locs_pixel[1][0]);
+      radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0,
+                             sample_locs_pixel[2][0]);
+      radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0,
+                             sample_locs_pixel[3][0]);
+      break;
+   case 8:
+      radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
+                             sample_locs_pixel[0][0]);
+      radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0,
+                             sample_locs_pixel[1][0]);
+      radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0,
+                             sample_locs_pixel[2][0]);
+      radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0,
+                             sample_locs_pixel[3][0]);
+      radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1,
+                             sample_locs_pixel[0][1]);
+      radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1,
+                             sample_locs_pixel[1][1]);
+      radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1,
+                             sample_locs_pixel[2][1]);
+      radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1,
+                             sample_locs_pixel[3][1]);
+      break;
+   default:
+      unreachable("invalid number of samples");
+   }
+
+   /* Emit the maximum sample distance and the centroid priority. */
+   radeon_set_context_reg_rmw(cs, R_028BE0_PA_SC_AA_CONFIG,
+                              S_028BE0_MAX_SAMPLE_DIST(max_sample_dist), ~C_028BE0_MAX_SAMPLE_DIST);
+
+   radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+   radeon_emit(cs, centroid_priority);
+   radeon_emit(cs, centroid_priority >> 32);
+
+   /* GFX9: Flush DFSM when the AA mode changes. */
+   if (cmd_buffer->device->dfsm_allowed) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+   }
+
+   cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 static void
-radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer,
-			     struct radv_pipeline *pipeline,
-			     gl_shader_stage stage,
-			     int idx, int count, uint32_t *values)
+radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline,
+                             gl_shader_stage stage, int idx, int count, uint32_t *values)
 {
-	struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
-	uint32_t base_reg = pipeline->user_data_0[stage];
-	if (loc->sgpr_idx == -1)
-		return;
+   struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
+   uint32_t base_reg = pipeline->user_data_0[stage];
+   if (loc->sgpr_idx == -1)
+      return;
 
-	assert(loc->num_sgprs == count);
+   assert(loc->num_sgprs == count);
 
-	radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count);
-	radeon_emit_array(cmd_buffer->cs, values, count);
+   radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count);
+   radeon_emit_array(cmd_buffer->cs, values, count);
 }
 
 static void
-radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
-			      struct radv_pipeline *pipeline)
+radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
 {
-	int num_samples = pipeline->graphics.ms.num_samples;
-	struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline;
+   int num_samples = pipeline->graphics.ms.num_samples;
+   struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline;
 
-	if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.needs_sample_positions)
-		cmd_buffer->sample_positions_needed = true;
+   if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.needs_sample_positions)
+      cmd_buffer->sample_positions_needed = true;
 
-	if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
-		return;
+   if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
+      return;
 
-	radv_emit_default_sample_locations(cmd_buffer->cs, num_samples);
+   radv_emit_default_sample_locations(cmd_buffer->cs, num_samples);
 
-	cmd_buffer->state.context_roll_without_scissor_emitted = true;
+   cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 static void
-radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer,
-			  struct radv_pipeline *pipeline)
+radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
 {
-	const struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline;
-
+   const struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline;
 
-	if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
-		return;
+   if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
+      return;
 
-	if (old_pipeline &&
-	    old_pipeline->graphics.binning.pa_sc_binner_cntl_0 == pipeline->graphics.binning.pa_sc_binner_cntl_0 &&
-	    old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control)
-		return;
+   if (old_pipeline &&
+       old_pipeline->graphics.binning.pa_sc_binner_cntl_0 ==
+          pipeline->graphics.binning.pa_sc_binner_cntl_0 &&
+       old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control)
+      return;
 
-	bool binning_flush = false;
-	if (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA12 ||
-	    cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA20 ||
-	    cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN2 ||
-	    cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
-		binning_flush = !old_pipeline ||
-			G_028C44_BINNING_MODE(old_pipeline->graphics.binning.pa_sc_binner_cntl_0) !=
-			G_028C44_BINNING_MODE(pipeline->graphics.binning.pa_sc_binner_cntl_0);
-	}
+   bool binning_flush = false;
+   if (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA12 ||
+       cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA20 ||
+       cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN2 ||
+       cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+      binning_flush = !old_pipeline ||
+                      G_028C44_BINNING_MODE(old_pipeline->graphics.binning.pa_sc_binner_cntl_0) !=
+                         G_028C44_BINNING_MODE(pipeline->graphics.binning.pa_sc_binner_cntl_0);
+   }
 
-	radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0,
-			       pipeline->graphics.binning.pa_sc_binner_cntl_0 |
-			       S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush));
+   radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0,
+                          pipeline->graphics.binning.pa_sc_binner_cntl_0 |
+                             S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush));
 
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
-		radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL,
-				       pipeline->graphics.binning.db_dfsm_control);
-	} else {
-		radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL,
-				       pipeline->graphics.binning.db_dfsm_control);
-	}
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+      radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL,
+                             pipeline->graphics.binning.db_dfsm_control);
+   } else {
+      radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL,
+                             pipeline->graphics.binning.db_dfsm_control);
+   }
 
-	cmd_buffer->state.context_roll_without_scissor_emitted = true;
+   cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
-
 static void
-radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer,
-			  struct radv_shader_variant *shader)
+radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant *shader)
 {
-	uint64_t va;
+   uint64_t va;
 
-	if (!shader)
-		return;
+   if (!shader)
+      return;
 
-	va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+   va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
 
-	si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
+   si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
 }
 
 static void
-radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
-		      struct radv_pipeline *pipeline,
-		      bool vertex_stage_only)
+radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline,
+                      bool vertex_stage_only)
 {
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	uint32_t mask = state->prefetch_L2_mask;
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   uint32_t mask = state->prefetch_L2_mask;
 
-	if (vertex_stage_only) {
-		/* Fast prefetch path for starting draws as soon as possible.
-		 */
-		mask = state->prefetch_L2_mask & (RADV_PREFETCH_VS |
-						  RADV_PREFETCH_VBO_DESCRIPTORS);
-	}
+   if (vertex_stage_only) {
+      /* Fast prefetch path for starting draws as soon as possible.
+       */
+      mask = state->prefetch_L2_mask & (RADV_PREFETCH_VS | RADV_PREFETCH_VBO_DESCRIPTORS);
+   }
 
-	if (mask & RADV_PREFETCH_VS)
-		radv_emit_shader_prefetch(cmd_buffer,
-					  pipeline->shaders[MESA_SHADER_VERTEX]);
+   if (mask & RADV_PREFETCH_VS)
+      radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_VERTEX]);
 
-	if (mask & RADV_PREFETCH_VBO_DESCRIPTORS)
-		si_cp_dma_prefetch(cmd_buffer, state->vb_va, state->vb_size);
+   if (mask & RADV_PREFETCH_VBO_DESCRIPTORS)
+      si_cp_dma_prefetch(cmd_buffer, state->vb_va, state->vb_size);
 
-	if (mask & RADV_PREFETCH_TCS)
-		radv_emit_shader_prefetch(cmd_buffer,
-					  pipeline->shaders[MESA_SHADER_TESS_CTRL]);
+   if (mask & RADV_PREFETCH_TCS)
+      radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_TESS_CTRL]);
 
-	if (mask & RADV_PREFETCH_TES)
-		radv_emit_shader_prefetch(cmd_buffer,
-					  pipeline->shaders[MESA_SHADER_TESS_EVAL]);
+   if (mask & RADV_PREFETCH_TES)
+      radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_TESS_EVAL]);
 
-	if (mask & RADV_PREFETCH_GS) {
-		radv_emit_shader_prefetch(cmd_buffer,
-					  pipeline->shaders[MESA_SHADER_GEOMETRY]);
-		if (radv_pipeline_has_gs_copy_shader(pipeline))
-			radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader);
-	}
+   if (mask & RADV_PREFETCH_GS) {
+      radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_GEOMETRY]);
+      if (radv_pipeline_has_gs_copy_shader(pipeline))
+         radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader);
+   }
 
-	if (mask & RADV_PREFETCH_PS)
-		radv_emit_shader_prefetch(cmd_buffer,
-					  pipeline->shaders[MESA_SHADER_FRAGMENT]);
+   if (mask & RADV_PREFETCH_PS)
+      radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_FRAGMENT]);
 
-	state->prefetch_L2_mask &= ~mask;
+   state->prefetch_L2_mask &= ~mask;
 }
 
 static void
 radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
 {
-	if (!cmd_buffer->device->physical_device->rad_info.rbplus_allowed)
-		return;
-
-	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-
-	unsigned sx_ps_downconvert = 0;
-	unsigned sx_blend_opt_epsilon = 0;
-	unsigned sx_blend_opt_control = 0;
-
-	if (!cmd_buffer->state.attachments || !subpass)
-		return;
-
-	for (unsigned i = 0; i < subpass->color_count; ++i) {
-		if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
-			/* We don't set the DISABLE bits, because the HW can't have holes,
-			 * so the SPI color format is set to 32-bit 1-component. */
-			sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
-			continue;
-		}
-
-		int idx = subpass->color_attachments[i].attachment;
-		struct radv_color_buffer_info *cb = &cmd_buffer->state.attachments[idx].cb;
-
-		unsigned format = G_028C70_FORMAT(cb->cb_color_info);
-		unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info);
-		uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf;
-		uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf;
-
-		bool has_alpha, has_rgb;
-
-		/* Set if RGB and A are present. */
-		has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib);
-
-		if (format == V_028C70_COLOR_8 ||
-		    format == V_028C70_COLOR_16 ||
-		    format == V_028C70_COLOR_32)
-			has_rgb = !has_alpha;
-		else
-			has_rgb = true;
-
-		/* Check the colormask and export format. */
-		if (!(colormask & 0x7))
-			has_rgb = false;
-		if (!(colormask & 0x8))
-			has_alpha = false;
-
-		if (spi_format == V_028714_SPI_SHADER_ZERO) {
-			has_rgb = false;
-			has_alpha = false;
-		}
-
-		/* The HW doesn't quite blend correctly with rgb9e5 if we disable the alpha
-		 * optimization, even though it has no alpha. */
-		if (has_rgb && format == V_028C70_COLOR_5_9_9_9)
-			has_alpha = true;
-
-		/* Disable value checking for disabled channels. */
-		if (!has_rgb)
-			sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
-		if (!has_alpha)
-			sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
-
-		/* Enable down-conversion for 32bpp and smaller formats. */
-		switch (format) {
-		case V_028C70_COLOR_8:
-		case V_028C70_COLOR_8_8:
-		case V_028C70_COLOR_8_8_8_8:
-			/* For 1 and 2-channel formats, use the superset thereof. */
-			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
-			    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
-			    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
-				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
-				sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
-			}
-			break;
-
-		case V_028C70_COLOR_5_6_5:
-			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
-				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
-				sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
-			}
-			break;
-
-		case V_028C70_COLOR_1_5_5_5:
-			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
-				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
-				sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
-			}
-			break;
-
-		case V_028C70_COLOR_4_4_4_4:
-			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
-				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
-				sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
-			}
-			break;
-
-		case V_028C70_COLOR_32:
-			if (swap == V_028C70_SWAP_STD &&
-			    spi_format == V_028714_SPI_SHADER_32_R)
-				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
-			else if (swap == V_028C70_SWAP_ALT_REV &&
-				 spi_format == V_028714_SPI_SHADER_32_AR)
-				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
-			break;
-
-		case V_028C70_COLOR_16:
-		case V_028C70_COLOR_16_16:
-			/* For 1-channel formats, use the superset thereof. */
-			if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
-			    spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
-			    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
-			    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
-				if (swap == V_028C70_SWAP_STD ||
-				    swap == V_028C70_SWAP_STD_REV)
-					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
-				else
-					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
-			}
-			break;
-
-		case V_028C70_COLOR_10_11_11:
-			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
-				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
-			break;
-
-		case V_028C70_COLOR_2_10_10_10:
-			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
-				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
-				sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
-			}
-			break;
-		case V_028C70_COLOR_5_9_9_9:
-			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
-				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);
-			break;
-		}
-	}
-
-	/* Do not set the DISABLE bits for the unused attachments, as that
-	 * breaks dual source blending in SkQP and does not seem to improve
-	 * performance. */
-
-	if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert &&
-	    sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon &&
-	    sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control)
-		return;
-
-	radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
-	radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
-	radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
-	radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
-
-	cmd_buffer->state.context_roll_without_scissor_emitted = true;
-
-	cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert;
-	cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon;
-	cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control;
+   if (!cmd_buffer->device->physical_device->rad_info.rbplus_allowed)
+      return;
+
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+
+   unsigned sx_ps_downconvert = 0;
+   unsigned sx_blend_opt_epsilon = 0;
+   unsigned sx_blend_opt_control = 0;
+
+   if (!cmd_buffer->state.attachments || !subpass)
+      return;
+
+   for (unsigned i = 0; i < subpass->color_count; ++i) {
+      if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
+         /* We don't set the DISABLE bits, because the HW can't have holes,
+          * so the SPI color format is set to 32-bit 1-component. */
+         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
+         continue;
+      }
+
+      int idx = subpass->color_attachments[i].attachment;
+      struct radv_color_buffer_info *cb = &cmd_buffer->state.attachments[idx].cb;
+
+      unsigned format = G_028C70_FORMAT(cb->cb_color_info);
+      unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info);
+      uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf;
+      uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf;
+
+      bool has_alpha, has_rgb;
+
+      /* Set if RGB and A are present. */
+      has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib);
+
+      if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || format == V_028C70_COLOR_32)
+         has_rgb = !has_alpha;
+      else
+         has_rgb = true;
+
+      /* Check the colormask and export format. */
+      if (!(colormask & 0x7))
+         has_rgb = false;
+      if (!(colormask & 0x8))
+         has_alpha = false;
+
+      if (spi_format == V_028714_SPI_SHADER_ZERO) {
+         has_rgb = false;
+         has_alpha = false;
+      }
+
+      /* The HW doesn't quite blend correctly with rgb9e5 if we disable the alpha
+       * optimization, even though it has no alpha. */
+      if (has_rgb && format == V_028C70_COLOR_5_9_9_9)
+         has_alpha = true;
+
+      /* Disable value checking for disabled channels. */
+      if (!has_rgb)
+         sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
+      if (!has_alpha)
+         sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
+
+      /* Enable down-conversion for 32bpp and smaller formats. */
+      switch (format) {
+      case V_028C70_COLOR_8:
+      case V_028C70_COLOR_8_8:
+      case V_028C70_COLOR_8_8_8_8:
+         /* For 1 and 2-channel formats, use the superset thereof. */
+         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
+             spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+             spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
+            sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
+         }
+         break;
+
+      case V_028C70_COLOR_5_6_5:
+         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
+            sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
+         }
+         break;
+
+      case V_028C70_COLOR_1_5_5_5:
+         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
+            sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
+         }
+         break;
+
+      case V_028C70_COLOR_4_4_4_4:
+         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
+            sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
+         }
+         break;
+
+      case V_028C70_COLOR_32:
+         if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R)
+            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
+         else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR)
+            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
+         break;
+
+      case V_028C70_COLOR_16:
+      case V_028C70_COLOR_16_16:
+         /* For 1-channel formats, use the superset thereof. */
+         if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
+             spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
+             spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+             spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+            if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV)
+               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
+            else
+               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
+         }
+         break;
+
+      case V_028C70_COLOR_10_11_11:
+         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
+            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
+         break;
+
+      case V_028C70_COLOR_2_10_10_10:
+         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
+            sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
+         }
+         break;
+      case V_028C70_COLOR_5_9_9_9:
+         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
+            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);
+         break;
+      }
+   }
+
+   /* Do not set the DISABLE bits for the unused attachments, as that
+    * breaks dual source blending in SkQP and does not seem to improve
+    * performance. */
+
+   if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert &&
+       sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon &&
+       sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control)
+      return;
+
+   radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
+   radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
+   radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
+   radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
+
+   cmd_buffer->state.context_roll_without_scissor_emitted = true;
+
+   cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert;
+   cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon;
+   cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control;
 }
 
 static void
 radv_emit_batch_break_on_new_ps(struct radv_cmd_buffer *cmd_buffer)
 {
-	if (!cmd_buffer->device->pbb_allowed)
-		return;
+   if (!cmd_buffer->device->pbb_allowed)
+      return;
 
-        struct radv_binning_settings settings =
-                radv_get_binning_settings(cmd_buffer->device->physical_device);
-	bool break_for_new_ps =
-		(!cmd_buffer->state.emitted_pipeline ||
-		 cmd_buffer->state.emitted_pipeline->shaders[MESA_SHADER_FRAGMENT] !=
-		 cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) &&
-		(settings.context_states_per_bin > 1 ||
-		 settings.persistent_states_per_bin > 1);
-	bool break_for_new_cb_target_mask =
-		(!cmd_buffer->state.emitted_pipeline ||
-		 cmd_buffer->state.emitted_pipeline->graphics.cb_target_mask !=
-		 cmd_buffer->state.pipeline->graphics.cb_target_mask) &&
-		 settings.context_states_per_bin > 1;
+   struct radv_binning_settings settings =
+      radv_get_binning_settings(cmd_buffer->device->physical_device);
+   bool break_for_new_ps =
+      (!cmd_buffer->state.emitted_pipeline ||
+       cmd_buffer->state.emitted_pipeline->shaders[MESA_SHADER_FRAGMENT] !=
+          cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) &&
+      (settings.context_states_per_bin > 1 || settings.persistent_states_per_bin > 1);
+   bool break_for_new_cb_target_mask =
+      (!cmd_buffer->state.emitted_pipeline ||
+       cmd_buffer->state.emitted_pipeline->graphics.cb_target_mask !=
+          cmd_buffer->state.pipeline->graphics.cb_target_mask) &&
+      settings.context_states_per_bin > 1;
 
-	if (!break_for_new_ps && !break_for_new_cb_target_mask)
-		return;
+   if (!break_for_new_ps && !break_for_new_cb_target_mask)
+      return;
 
-	radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-	radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+   radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
 }
 
 static void
 radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
 
-	if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline)
-		return;
+   if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline)
+      return;
 
-	radv_update_multisample_state(cmd_buffer, pipeline);
-	radv_update_binning_state(cmd_buffer, pipeline);
+   radv_update_multisample_state(cmd_buffer, pipeline);
+   radv_update_binning_state(cmd_buffer, pipeline);
 
-	cmd_buffer->scratch_size_per_wave_needed = MAX2(cmd_buffer->scratch_size_per_wave_needed,
-	                                                pipeline->scratch_bytes_per_wave);
-	cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted,
-	                                        pipeline->max_waves);
+   cmd_buffer->scratch_size_per_wave_needed =
+      MAX2(cmd_buffer->scratch_size_per_wave_needed, pipeline->scratch_bytes_per_wave);
+   cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted, pipeline->max_waves);
 
-	if (!cmd_buffer->state.emitted_pipeline ||
-	    cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband !=
-	     pipeline->graphics.can_use_guardband)
-		cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
+   if (!cmd_buffer->state.emitted_pipeline ||
+       cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband !=
+          pipeline->graphics.can_use_guardband)
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
 
-	if (!cmd_buffer->state.emitted_pipeline ||
-	    cmd_buffer->state.emitted_pipeline->graphics.pa_su_sc_mode_cntl !=
-	    pipeline->graphics.pa_su_sc_mode_cntl)
-		cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE |
-					   RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
+   if (!cmd_buffer->state.emitted_pipeline ||
+       cmd_buffer->state.emitted_pipeline->graphics.pa_su_sc_mode_cntl !=
+          pipeline->graphics.pa_su_sc_mode_cntl)
+      cmd_buffer->state.dirty |=
+         RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
 
-	if (!cmd_buffer->state.emitted_pipeline)
-		cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
+   if (!cmd_buffer->state.emitted_pipeline)
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
 
-	if (!cmd_buffer->state.emitted_pipeline ||
-	    cmd_buffer->state.emitted_pipeline->graphics.db_depth_control !=
-	    pipeline->graphics.db_depth_control)
-		cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |
-					   RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
-					   RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
-					   RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
-					   RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |
-					   RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
+   if (!cmd_buffer->state.emitted_pipeline ||
+       cmd_buffer->state.emitted_pipeline->graphics.db_depth_control !=
+          pipeline->graphics.db_depth_control)
+      cmd_buffer->state.dirty |=
+         RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
+         RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
+         RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
 
-	if (!cmd_buffer->state.emitted_pipeline)
-		cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
+   if (!cmd_buffer->state.emitted_pipeline)
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
 
-	radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
+   radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
 
-	if (!cmd_buffer->state.emitted_pipeline ||
-	    cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw ||
-	    cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash ||
-	    memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf,
-	           pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw * 4)) {
-		radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw);
-		cmd_buffer->state.context_roll_without_scissor_emitted = true;
-	}
+   if (!cmd_buffer->state.emitted_pipeline ||
+       cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw ||
+       cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash ||
+       memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf, pipeline->ctx_cs.buf,
+              pipeline->ctx_cs.cdw * 4)) {
+      radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw);
+      cmd_buffer->state.context_roll_without_scissor_emitted = true;
+   }
 
-	radv_emit_batch_break_on_new_ps(cmd_buffer);
+   radv_emit_batch_break_on_new_ps(cmd_buffer);
 
-	for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
-		if (!pipeline->shaders[i])
-			continue;
+   for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
+      if (!pipeline->shaders[i])
+         continue;
 
-		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
-				   pipeline->shaders[i]->bo);
-	}
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->shaders[i]->bo);
+   }
 
-	if (radv_pipeline_has_gs_copy_shader(pipeline))
-		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
-				   pipeline->gs_copy_shader->bo);
+   if (radv_pipeline_has_gs_copy_shader(pipeline))
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->gs_copy_shader->bo);
 
-	if (unlikely(cmd_buffer->device->trace_bo))
-		radv_save_pipeline(cmd_buffer, pipeline);
+   if (unlikely(cmd_buffer->device->trace_bo))
+      radv_save_pipeline(cmd_buffer, pipeline);
 
-	cmd_buffer->state.emitted_pipeline = pipeline;
+   cmd_buffer->state.emitted_pipeline = pipeline;
 
-	cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE;
+   cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE;
 }
 
 static void
 radv_emit_viewport(struct radv_cmd_buffer *cmd_buffer)
 {
-	si_write_viewport(cmd_buffer->cs, 0, cmd_buffer->state.dynamic.viewport.count,
-			  cmd_buffer->state.dynamic.viewport.viewports);
+   si_write_viewport(cmd_buffer->cs, 0, cmd_buffer->state.dynamic.viewport.count,
+                     cmd_buffer->state.dynamic.viewport.viewports);
 }
 
 static void
 radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer)
 {
-	uint32_t count = cmd_buffer->state.dynamic.scissor.count;
+   uint32_t count = cmd_buffer->state.dynamic.scissor.count;
 
-	si_write_scissors(cmd_buffer->cs, 0, count,
-			  cmd_buffer->state.dynamic.scissor.scissors,
-			  cmd_buffer->state.dynamic.viewport.viewports,
-			  cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband);
+   si_write_scissors(cmd_buffer->cs, 0, count, cmd_buffer->state.dynamic.scissor.scissors,
+                     cmd_buffer->state.dynamic.viewport.viewports,
+                     cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband);
 
-	cmd_buffer->state.context_roll_without_scissor_emitted = false;
+   cmd_buffer->state.context_roll_without_scissor_emitted = false;
 }
 
 static void
 radv_emit_discard_rectangle(struct radv_cmd_buffer *cmd_buffer)
 {
-	if (!cmd_buffer->state.dynamic.discard_rectangle.count)
-		return;
+   if (!cmd_buffer->state.dynamic.discard_rectangle.count)
+      return;
 
-	radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL,
-	                           cmd_buffer->state.dynamic.discard_rectangle.count * 2);
-	for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) {
-		VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i];
-		radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y));
-		radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) |
-		                            S_028214_BR_Y(rect.offset.y + rect.extent.height));
-	}
+   radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL,
+                              cmd_buffer->state.dynamic.discard_rectangle.count * 2);
+   for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) {
+      VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i];
+      radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y));
+      radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) |
+                                     S_028214_BR_Y(rect.offset.y + rect.extent.height));
+   }
 }
 
 static void
 radv_emit_line_width(struct radv_cmd_buffer *cmd_buffer)
 {
-	unsigned width = cmd_buffer->state.dynamic.line_width * 8;
+   unsigned width = cmd_buffer->state.dynamic.line_width * 8;
 
-	radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
-			       S_028A08_WIDTH(CLAMP(width, 0, 0xFFFF)));
+   radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
+                          S_028A08_WIDTH(CLAMP(width, 0, 0xFFFF)));
 }
 
 static void
 radv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
 
-	radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4);
-	radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4);
+   radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4);
+   radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4);
 }
 
 static void
 radv_emit_stencil(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
 
-	radeon_set_context_reg_seq(cmd_buffer->cs,
-				   R_028430_DB_STENCILREFMASK, 2);
-	radeon_emit(cmd_buffer->cs,
-		    S_028430_STENCILTESTVAL(d->stencil_reference.front) |
-		    S_028430_STENCILMASK(d->stencil_compare_mask.front) |
-		    S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) |
-		    S_028430_STENCILOPVAL(1));
-	radeon_emit(cmd_buffer->cs,
-		    S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) |
-		    S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) |
-		    S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) |
-		    S_028434_STENCILOPVAL_BF(1));
+   radeon_set_context_reg_seq(cmd_buffer->cs, R_028430_DB_STENCILREFMASK, 2);
+   radeon_emit(cmd_buffer->cs, S_028430_STENCILTESTVAL(d->stencil_reference.front) |
+                                  S_028430_STENCILMASK(d->stencil_compare_mask.front) |
+                                  S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) |
+                                  S_028430_STENCILOPVAL(1));
+   radeon_emit(cmd_buffer->cs, S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) |
+                                  S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) |
+                                  S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) |
+                                  S_028434_STENCILOPVAL_BF(1));
 }
 
 static void
 radv_emit_depth_bounds(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
 
-	radeon_set_context_reg(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN,
-			       fui(d->depth_bounds.min));
-	radeon_set_context_reg(cmd_buffer->cs, R_028024_DB_DEPTH_BOUNDS_MAX,
-			       fui(d->depth_bounds.max));
+   radeon_set_context_reg(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, fui(d->depth_bounds.min));
+   radeon_set_context_reg(cmd_buffer->cs, R_028024_DB_DEPTH_BOUNDS_MAX, fui(d->depth_bounds.max));
 }
 
 static void
 radv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
-	unsigned slope = fui(d->depth_bias.slope * 16.0f);
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   unsigned slope = fui(d->depth_bias.slope * 16.0f);
 
-	radeon_set_context_reg_seq(cmd_buffer->cs,
-				   R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5);
-	radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */
-	radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */
-	radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* FRONT OFFSET */
-	radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */
-	radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* BACK OFFSET */
+   radeon_set_context_reg_seq(cmd_buffer->cs, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5);
+   radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */
+   radeon_emit(cmd_buffer->cs, slope);                    /* FRONT SCALE */
+   radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias));  /* FRONT OFFSET */
+   radeon_emit(cmd_buffer->cs, slope);                    /* BACK SCALE */
+   radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias));  /* BACK OFFSET */
 }
 
 static void
 radv_emit_line_stipple(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
-	uint32_t auto_reset_cntl = 1;
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   uint32_t auto_reset_cntl = 1;
 
-	if (d->primitive_topology == V_008958_DI_PT_LINESTRIP)
-		auto_reset_cntl = 2;
+   if (d->primitive_topology == V_008958_DI_PT_LINESTRIP)
+      auto_reset_cntl = 2;
 
-	radeon_set_context_reg(cmd_buffer->cs, R_028A0C_PA_SC_LINE_STIPPLE,
-			       S_028A0C_LINE_PATTERN(d->line_stipple.pattern) |
-			       S_028A0C_REPEAT_COUNT(d->line_stipple.factor - 1) |
-			       S_028A0C_AUTO_RESET_CNTL(auto_reset_cntl));
+   radeon_set_context_reg(cmd_buffer->cs, R_028A0C_PA_SC_LINE_STIPPLE,
+                          S_028A0C_LINE_PATTERN(d->line_stipple.pattern) |
+                             S_028A0C_REPEAT_COUNT(d->line_stipple.factor - 1) |
+                             S_028A0C_AUTO_RESET_CNTL(auto_reset_cntl));
 }
 
 static void
 radv_emit_culling(struct radv_cmd_buffer *cmd_buffer, uint32_t states)
 {
-	unsigned pa_su_sc_mode_cntl = cmd_buffer->state.pipeline->graphics.pa_su_sc_mode_cntl;
-	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   unsigned pa_su_sc_mode_cntl = cmd_buffer->state.pipeline->graphics.pa_su_sc_mode_cntl;
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_CULL_MODE) {
-		pa_su_sc_mode_cntl &= C_028814_CULL_FRONT;
-		pa_su_sc_mode_cntl |= S_028814_CULL_FRONT(!!(d->cull_mode & VK_CULL_MODE_FRONT_BIT));
+   if (states & RADV_CMD_DIRTY_DYNAMIC_CULL_MODE) {
+      pa_su_sc_mode_cntl &= C_028814_CULL_FRONT;
+      pa_su_sc_mode_cntl |= S_028814_CULL_FRONT(!!(d->cull_mode & VK_CULL_MODE_FRONT_BIT));
 
-		pa_su_sc_mode_cntl &= C_028814_CULL_BACK;
-		pa_su_sc_mode_cntl |= S_028814_CULL_BACK(!!(d->cull_mode & VK_CULL_MODE_BACK_BIT));
-	}
+      pa_su_sc_mode_cntl &= C_028814_CULL_BACK;
+      pa_su_sc_mode_cntl |= S_028814_CULL_BACK(!!(d->cull_mode & VK_CULL_MODE_BACK_BIT));
+   }
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {
-		pa_su_sc_mode_cntl &= C_028814_FACE;
-		pa_su_sc_mode_cntl |= S_028814_FACE(d->front_face);
-	}
+   if (states & RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {
+      pa_su_sc_mode_cntl &= C_028814_FACE;
+      pa_su_sc_mode_cntl |= S_028814_FACE(d->front_face);
+   }
 
-	radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL,
-			       pa_su_sc_mode_cntl);
+   radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl);
 }
 
 static void
 radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
 
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
-		radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device,
-					   cmd_buffer->cs,
-					   R_030908_VGT_PRIMITIVE_TYPE, 1,
-					   d->primitive_topology);
-	} else {
-		radeon_set_config_reg(cmd_buffer->cs,
-				      R_008958_VGT_PRIMITIVE_TYPE,
-				      d->primitive_topology);
-	}
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+      radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs,
+                                 R_030908_VGT_PRIMITIVE_TYPE, 1, d->primitive_topology);
+   } else {
+      radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, d->primitive_topology);
+   }
 }
 
 static void
 radv_emit_depth_control(struct radv_cmd_buffer *cmd_buffer, uint32_t states)
 {
-	unsigned db_depth_control = cmd_buffer->state.pipeline->graphics.db_depth_control;
-	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   unsigned db_depth_control = cmd_buffer->state.pipeline->graphics.db_depth_control;
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
-		db_depth_control &= C_028800_Z_ENABLE;
-		db_depth_control |= S_028800_Z_ENABLE(d->depth_test_enable ? 1 : 0);
-	}
+   if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
+      db_depth_control &= C_028800_Z_ENABLE;
+      db_depth_control |= S_028800_Z_ENABLE(d->depth_test_enable ? 1 : 0);
+   }
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
-		db_depth_control &= C_028800_Z_WRITE_ENABLE;
-		db_depth_control |= S_028800_Z_WRITE_ENABLE(d->depth_write_enable ? 1 : 0);
-	}
+   if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
+      db_depth_control &= C_028800_Z_WRITE_ENABLE;
+      db_depth_control |= S_028800_Z_WRITE_ENABLE(d->depth_write_enable ? 1 : 0);
+   }
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
-		db_depth_control &= C_028800_ZFUNC;
-		db_depth_control |= S_028800_ZFUNC(d->depth_compare_op);
-	}
+   if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
+      db_depth_control &= C_028800_ZFUNC;
+      db_depth_control |= S_028800_ZFUNC(d->depth_compare_op);
+   }
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
-		db_depth_control &= C_028800_DEPTH_BOUNDS_ENABLE;
-		db_depth_control |= S_028800_DEPTH_BOUNDS_ENABLE(d->depth_bounds_test_enable ? 1 : 0);
-	}
+   if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
+      db_depth_control &= C_028800_DEPTH_BOUNDS_ENABLE;
+      db_depth_control |= S_028800_DEPTH_BOUNDS_ENABLE(d->depth_bounds_test_enable ? 1 : 0);
+   }
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
-		db_depth_control &= C_028800_STENCIL_ENABLE;
-		db_depth_control |= S_028800_STENCIL_ENABLE(d->stencil_test_enable ? 1 : 0);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
+      db_depth_control &= C_028800_STENCIL_ENABLE;
+      db_depth_control |= S_028800_STENCIL_ENABLE(d->stencil_test_enable ? 1 : 0);
 
-		db_depth_control &= C_028800_BACKFACE_ENABLE;
-		db_depth_control |= S_028800_BACKFACE_ENABLE(d->stencil_test_enable ? 1 : 0);
-	}
+      db_depth_control &= C_028800_BACKFACE_ENABLE;
+      db_depth_control |= S_028800_BACKFACE_ENABLE(d->stencil_test_enable ? 1 : 0);
+   }
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
-		db_depth_control &= C_028800_STENCILFUNC;
-		db_depth_control |= S_028800_STENCILFUNC(d->stencil_op.front.compare_op);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
+      db_depth_control &= C_028800_STENCILFUNC;
+      db_depth_control |= S_028800_STENCILFUNC(d->stencil_op.front.compare_op);
 
-		db_depth_control &= C_028800_STENCILFUNC_BF;
-		db_depth_control |= S_028800_STENCILFUNC_BF(d->stencil_op.back.compare_op);
-	}
+      db_depth_control &= C_028800_STENCILFUNC_BF;
+      db_depth_control |= S_028800_STENCILFUNC_BF(d->stencil_op.back.compare_op);
+   }
 
-	radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL,
-			       db_depth_control);
+   radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, db_depth_control);
 }
 
 static void
 radv_emit_stencil_control(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
 
-	radeon_set_context_reg(cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL,
-			       S_02842C_STENCILFAIL(si_translate_stencil_op(d->stencil_op.front.fail_op)) |
-			       S_02842C_STENCILZPASS(si_translate_stencil_op(d->stencil_op.front.pass_op)) |
-			       S_02842C_STENCILZFAIL(si_translate_stencil_op(d->stencil_op.front.depth_fail_op)) |
-			       S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->stencil_op.back.fail_op)) |
-			       S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->stencil_op.back.pass_op)) |
-			       S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op)));
+   radeon_set_context_reg(
+      cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL,
+      S_02842C_STENCILFAIL(si_translate_stencil_op(d->stencil_op.front.fail_op)) |
+         S_02842C_STENCILZPASS(si_translate_stencil_op(d->stencil_op.front.pass_op)) |
+         S_02842C_STENCILZFAIL(si_translate_stencil_op(d->stencil_op.front.depth_fail_op)) |
+         S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->stencil_op.back.fail_op)) |
+         S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->stencil_op.back.pass_op)) |
+         S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op)));
 }
 
 static void
 radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
-	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
-	uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1;
-	uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1;
-	uint32_t pa_cl_vrs_cntl = pipeline->graphics.vrs.pa_cl_vrs_cntl;
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1;
+   uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1;
+   uint32_t pa_cl_vrs_cntl = pipeline->graphics.vrs.pa_cl_vrs_cntl;
 
-	/* Emit per-draw VRS rate which is the first combiner. */
-	radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE,
-			       S_03098C_RATE_X(rate_x) |
-			       S_03098C_RATE_Y(rate_y));
+   /* Emit per-draw VRS rate which is the first combiner. */
+   radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE,
+                          S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y));
 
-	/* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the
-	 * draw rate and the vertex rate.
-	 */
-	pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(d->fragment_shading_rate.combiner_ops[0]);
+   /* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the
+    * draw rate and the vertex rate.
+    */
+   pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(d->fragment_shading_rate.combiner_ops[0]);
 
-	radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl);
+   radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl);
 }
 
 static void
-radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
-			 int index,
-			 struct radv_color_buffer_info *cb,
-			 struct radv_image_view *iview,
-			 VkImageLayout layout,
-			 bool in_render_loop,
-			 bool disable_dcc)
-{
-	bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8;
-	uint32_t cb_color_info = cb->cb_color_info;
-	struct radv_image *image = iview->image;
-
-	if (!radv_layout_dcc_compressed(cmd_buffer->device, image, layout, in_render_loop,
-	                                radv_image_queue_family_mask(image,
-	                                                             cmd_buffer->queue_family_index,
-	                                                             cmd_buffer->queue_family_index)) ||
-	    disable_dcc) {
-		cb_color_info &= C_028C70_DCC_ENABLE;
-	}
-
-	if (!radv_layout_fmask_compressed(cmd_buffer->device, image, layout,
-					  radv_image_queue_family_mask(image,
-								       cmd_buffer->queue_family_index,
-								       cmd_buffer->queue_family_index))) {
-		cb_color_info &= C_028C70_COMPRESSION;
-	}
-
-	if (radv_image_is_tc_compat_cmask(image) &&
-	    (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
-	     radv_is_dcc_decompress_pipeline(cmd_buffer))) {
-		/* If this bit is set, the FMASK decompression operation
-		 * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS).
-		 */
-		cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
-	}
-
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
-			radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
-			radeon_emit(cmd_buffer->cs, cb->cb_color_base);
-			radeon_emit(cmd_buffer->cs, 0);
-			radeon_emit(cmd_buffer->cs, 0);
-			radeon_emit(cmd_buffer->cs, cb->cb_color_view);
-			radeon_emit(cmd_buffer->cs, cb_color_info);
-			radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
-			radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
-			radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
-			radeon_emit(cmd_buffer->cs, 0);
-			radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
-			radeon_emit(cmd_buffer->cs, 0);
-
-			radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 1);
-			radeon_emit(cmd_buffer->cs, cb->cb_dcc_base);
-
-			radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4,
-					       cb->cb_color_base >> 32);
-			radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4,
-					       cb->cb_color_cmask >> 32);
-			radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4,
-					       cb->cb_color_fmask >> 32);
-			radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4,
-					       cb->cb_dcc_base >> 32);
-			radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4,
-					       cb->cb_color_attrib2);
-			radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4,
-					       cb->cb_color_attrib3);
-	} else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
-		radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_base);
-		radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32));
-		radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_view);
-		radeon_emit(cmd_buffer->cs, cb_color_info);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
-		radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
-		radeon_emit(cmd_buffer->cs, S_028C80_BASE_256B(cb->cb_color_cmask >> 32));
-		radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
-		radeon_emit(cmd_buffer->cs, S_028C88_BASE_256B(cb->cb_color_fmask >> 32));
-
-		radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2);
-		radeon_emit(cmd_buffer->cs, cb->cb_dcc_base);
-		radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32));
-
-		radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4,
-				       cb->cb_mrt_epitch);
-	} else {
-		radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_base);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_pitch);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_slice);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_view);
-		radeon_emit(cmd_buffer->cs, cb_color_info);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
-		radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
-		radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice);
-
-		if (is_vi) { /* DCC BASE */
-			radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base);
-		}
-	}
-
-	if (radv_dcc_enabled(image, iview->base_mip)) {
-		/* Drawing with DCC enabled also compresses colorbuffers. */
-		VkImageSubresourceRange range = {
-			.aspectMask = iview->aspect_mask,
-			.baseMipLevel = iview->base_mip,
-			.levelCount = iview->level_count,
-			.baseArrayLayer = iview->base_layer,
-			.layerCount = iview->layer_count,
-		};
-
-		radv_update_dcc_metadata(cmd_buffer, image, &range, true);
-	}
+radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index,
+                         struct radv_color_buffer_info *cb, struct radv_image_view *iview,
+                         VkImageLayout layout, bool in_render_loop, bool disable_dcc)
+{
+   bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8;
+   uint32_t cb_color_info = cb->cb_color_info;
+   struct radv_image *image = iview->image;
+
+   if (!radv_layout_dcc_compressed(
+          cmd_buffer->device, image, layout, in_render_loop,
+          radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+                                       cmd_buffer->queue_family_index)) ||
+       disable_dcc) {
+      cb_color_info &= C_028C70_DCC_ENABLE;
+   }
+
+   if (!radv_layout_fmask_compressed(
+          cmd_buffer->device, image, layout,
+          radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+                                       cmd_buffer->queue_family_index))) {
+      cb_color_info &= C_028C70_COMPRESSION;
+   }
+
+   if (radv_image_is_tc_compat_cmask(image) && (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
+                                                radv_is_dcc_decompress_pipeline(cmd_buffer))) {
+      /* If this bit is set, the FMASK decompression operation
+       * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS).
+       */
+      cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
+   }
+
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_base);
+      radeon_emit(cmd_buffer->cs, 0);
+      radeon_emit(cmd_buffer->cs, 0);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_view);
+      radeon_emit(cmd_buffer->cs, cb_color_info);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
+      radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
+      radeon_emit(cmd_buffer->cs, 0);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
+      radeon_emit(cmd_buffer->cs, 0);
+
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 1);
+      radeon_emit(cmd_buffer->cs, cb->cb_dcc_base);
+
+      radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4,
+                             cb->cb_color_base >> 32);
+      radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4,
+                             cb->cb_color_cmask >> 32);
+      radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4,
+                             cb->cb_color_fmask >> 32);
+      radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4,
+                             cb->cb_dcc_base >> 32);
+      radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4,
+                             cb->cb_color_attrib2);
+      radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4,
+                             cb->cb_color_attrib3);
+   } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_base);
+      radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32));
+      radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_view);
+      radeon_emit(cmd_buffer->cs, cb_color_info);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
+      radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
+      radeon_emit(cmd_buffer->cs, S_028C80_BASE_256B(cb->cb_color_cmask >> 32));
+      radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
+      radeon_emit(cmd_buffer->cs, S_028C88_BASE_256B(cb->cb_color_fmask >> 32));
+
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2);
+      radeon_emit(cmd_buffer->cs, cb->cb_dcc_base);
+      radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32));
+
+      radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4,
+                             cb->cb_mrt_epitch);
+   } else {
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_base);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_pitch);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_slice);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_view);
+      radeon_emit(cmd_buffer->cs, cb_color_info);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
+      radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
+      radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice);
+
+      if (is_vi) { /* DCC BASE */
+         radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c,
+                                cb->cb_dcc_base);
+      }
+   }
+
+   if (radv_dcc_enabled(image, iview->base_mip)) {
+      /* Drawing with DCC enabled also compresses colorbuffers. */
+      VkImageSubresourceRange range = {
+         .aspectMask = iview->aspect_mask,
+         .baseMipLevel = iview->base_mip,
+         .levelCount = iview->level_count,
+         .baseArrayLayer = iview->base_layer,
+         .layerCount = iview->layer_count,
+      };
+
+      radv_update_dcc_metadata(cmd_buffer, image, &range, true);
+   }
 }
 
 static void
-radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
-			     struct radv_ds_buffer_info *ds,
-			     const struct radv_image_view *iview,
-			     VkImageLayout layout,
-			     bool in_render_loop, bool requires_cond_exec)
-{
-	const struct radv_image *image = iview->image;
-	uint32_t db_z_info = ds->db_z_info;
-	uint32_t db_z_info_reg;
-
-	if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug ||
-	    !radv_image_is_tc_compat_htile(image))
-		return;
-
-	if (!radv_layout_is_htile_compressed(cmd_buffer->device, image, layout, in_render_loop,
-					     radv_image_queue_family_mask(image,
-									  cmd_buffer->queue_family_index,
-									  cmd_buffer->queue_family_index))) {
-		db_z_info &= C_028040_TILE_SURFACE_ENABLE;
-	}
-
-	db_z_info &= C_028040_ZRANGE_PRECISION;
-
-	if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
-		db_z_info_reg = R_028038_DB_Z_INFO;
-	} else {
-		db_z_info_reg = R_028040_DB_Z_INFO;
-	}
-
-	/* When we don't know the last fast clear value we need to emit a
-	 * conditional packet that will eventually skip the following
-	 * SET_CONTEXT_REG packet.
-	 */
-	if (requires_cond_exec) {
-		uint64_t va = radv_get_tc_compat_zrange_va(image, iview->base_mip);
-
-		radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
-		radeon_emit(cmd_buffer->cs, va);
-		radeon_emit(cmd_buffer->cs, va >> 32);
-		radeon_emit(cmd_buffer->cs, 0);
-		radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
-	}
-
-	radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
+radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds,
+                             const struct radv_image_view *iview, VkImageLayout layout,
+                             bool in_render_loop, bool requires_cond_exec)
+{
+   const struct radv_image *image = iview->image;
+   uint32_t db_z_info = ds->db_z_info;
+   uint32_t db_z_info_reg;
+
+   if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug ||
+       !radv_image_is_tc_compat_htile(image))
+      return;
+
+   if (!radv_layout_is_htile_compressed(
+          cmd_buffer->device, image, layout, in_render_loop,
+          radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+                                       cmd_buffer->queue_family_index))) {
+      db_z_info &= C_028040_TILE_SURFACE_ENABLE;
+   }
+
+   db_z_info &= C_028040_ZRANGE_PRECISION;
+
+   if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
+      db_z_info_reg = R_028038_DB_Z_INFO;
+   } else {
+      db_z_info_reg = R_028040_DB_Z_INFO;
+   }
+
+   /* When we don't know the last fast clear value we need to emit a
+    * conditional packet that will eventually skip the following
+    * SET_CONTEXT_REG packet.
+    */
+   if (requires_cond_exec) {
+      uint64_t va = radv_get_tc_compat_zrange_va(image, iview->base_mip);
+
+      radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
+      radeon_emit(cmd_buffer->cs, va);
+      radeon_emit(cmd_buffer->cs, va >> 32);
+      radeon_emit(cmd_buffer->cs, 0);
+      radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
+   }
+
+   radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
 }
 
 static void
-radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
-		      struct radv_ds_buffer_info *ds,
-		      struct radv_image_view *iview,
-		      VkImageLayout layout,
-		      bool in_render_loop)
-{
-	const struct radv_image *image = iview->image;
-	uint32_t db_z_info = ds->db_z_info;
-	uint32_t db_stencil_info = ds->db_stencil_info;
-
-	if (!radv_layout_is_htile_compressed(cmd_buffer->device, image, layout, in_render_loop,
-					     radv_image_queue_family_mask(image,
-									  cmd_buffer->queue_family_index,
-									  cmd_buffer->queue_family_index))) {
-		db_z_info &= C_028040_TILE_SURFACE_ENABLE;
-		db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
-	}
-
-	radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view);
-	radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface);
-
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
-		radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
-		radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size);
-
-		radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7);
-		radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1));
-		radeon_emit(cmd_buffer->cs, db_z_info);
-		radeon_emit(cmd_buffer->cs, db_stencil_info);
-		radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
-		radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
-		radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
-		radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
-
-		radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5);
-		radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
-		radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
-		radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
-		radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
-		radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32);
-	} else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
-		radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3);
-		radeon_emit(cmd_buffer->cs, ds->db_htile_data_base);
-		radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32));
-		radeon_emit(cmd_buffer->cs, ds->db_depth_size);
-
-		radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10);
-		radeon_emit(cmd_buffer->cs, db_z_info);			/* DB_Z_INFO */
-		radeon_emit(cmd_buffer->cs, db_stencil_info);	        /* DB_STENCIL_INFO */
-		radeon_emit(cmd_buffer->cs, ds->db_z_read_base);	/* DB_Z_READ_BASE */
-		radeon_emit(cmd_buffer->cs, S_028044_BASE_HI(ds->db_z_read_base >> 32));	/* DB_Z_READ_BASE_HI */
-		radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);	/* DB_STENCIL_READ_BASE */
-		radeon_emit(cmd_buffer->cs, S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
-		radeon_emit(cmd_buffer->cs, ds->db_z_write_base);	/* DB_Z_WRITE_BASE */
-		radeon_emit(cmd_buffer->cs, S_028054_BASE_HI(ds->db_z_write_base >> 32));	/* DB_Z_WRITE_BASE_HI */
-		radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base);	/* DB_STENCIL_WRITE_BASE */
-		radeon_emit(cmd_buffer->cs, S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
-
-		radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2);
-		radeon_emit(cmd_buffer->cs, ds->db_z_info2);
-		radeon_emit(cmd_buffer->cs, ds->db_stencil_info2);
-	} else {
-		radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
-
-		radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9);
-		radeon_emit(cmd_buffer->cs, ds->db_depth_info);	/* R_02803C_DB_DEPTH_INFO */
-		radeon_emit(cmd_buffer->cs, db_z_info);			/* R_028040_DB_Z_INFO */
-		radeon_emit(cmd_buffer->cs, db_stencil_info);	        /* R_028044_DB_STENCIL_INFO */
-		radeon_emit(cmd_buffer->cs, ds->db_z_read_base);	/* R_028048_DB_Z_READ_BASE */
-		radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);	/* R_02804C_DB_STENCIL_READ_BASE */
-		radeon_emit(cmd_buffer->cs, ds->db_z_write_base);	/* R_028050_DB_Z_WRITE_BASE */
-		radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base);	/* R_028054_DB_STENCIL_WRITE_BASE */
-		radeon_emit(cmd_buffer->cs, ds->db_depth_size);	/* R_028058_DB_DEPTH_SIZE */
-		radeon_emit(cmd_buffer->cs, ds->db_depth_slice);	/* R_02805C_DB_DEPTH_SLICE */
-
-	}
-
-	/* Update the ZRANGE_PRECISION value for the TC-compat bug. */
-	radv_update_zrange_precision(cmd_buffer, ds, iview, layout,
-				     in_render_loop, true);
-
-	radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
-			       ds->pa_su_poly_offset_db_fmt_cntl);
+radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds,
+                      struct radv_image_view *iview, VkImageLayout layout, bool in_render_loop)
+{
+   const struct radv_image *image = iview->image;
+   uint32_t db_z_info = ds->db_z_info;
+   uint32_t db_stencil_info = ds->db_stencil_info;
+
+   if (!radv_layout_is_htile_compressed(
+          cmd_buffer->device, image, layout, in_render_loop,
+          radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+                                       cmd_buffer->queue_family_index))) {
+      db_z_info &= C_028040_TILE_SURFACE_ENABLE;
+      db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
+   }
+
+   radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view);
+   radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface);
+
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+      radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
+      radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size);
+
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7);
+      radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1));
+      radeon_emit(cmd_buffer->cs, db_z_info);
+      radeon_emit(cmd_buffer->cs, db_stencil_info);
+      radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
+      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
+      radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
+      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
+
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5);
+      radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
+      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
+      radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
+      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
+      radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32);
+   } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3);
+      radeon_emit(cmd_buffer->cs, ds->db_htile_data_base);
+      radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32));
+      radeon_emit(cmd_buffer->cs, ds->db_depth_size);
+
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10);
+      radeon_emit(cmd_buffer->cs, db_z_info);          /* DB_Z_INFO */
+      radeon_emit(cmd_buffer->cs, db_stencil_info);    /* DB_STENCIL_INFO */
+      radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */
+      radeon_emit(cmd_buffer->cs,
+                  S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */
+      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);   /* DB_STENCIL_READ_BASE */
+      radeon_emit(cmd_buffer->cs,
+                  S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
+      radeon_emit(cmd_buffer->cs, ds->db_z_write_base);              /* DB_Z_WRITE_BASE */
+      radeon_emit(cmd_buffer->cs,
+                  S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */
+      radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base);   /* DB_STENCIL_WRITE_BASE */
+      radeon_emit(cmd_buffer->cs,
+                  S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
+
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2);
+      radeon_emit(cmd_buffer->cs, ds->db_z_info2);
+      radeon_emit(cmd_buffer->cs, ds->db_stencil_info2);
+   } else {
+      radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
+
+      radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9);
+      radeon_emit(cmd_buffer->cs, ds->db_depth_info);         /* R_02803C_DB_DEPTH_INFO */
+      radeon_emit(cmd_buffer->cs, db_z_info);                 /* R_028040_DB_Z_INFO */
+      radeon_emit(cmd_buffer->cs, db_stencil_info);           /* R_028044_DB_STENCIL_INFO */
+      radeon_emit(cmd_buffer->cs, ds->db_z_read_base);        /* R_028048_DB_Z_READ_BASE */
+      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);  /* R_02804C_DB_STENCIL_READ_BASE */
+      radeon_emit(cmd_buffer->cs, ds->db_z_write_base);       /* R_028050_DB_Z_WRITE_BASE */
+      radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* R_028054_DB_STENCIL_WRITE_BASE */
+      radeon_emit(cmd_buffer->cs, ds->db_depth_size);         /* R_028058_DB_DEPTH_SIZE */
+      radeon_emit(cmd_buffer->cs, ds->db_depth_slice);        /* R_02805C_DB_DEPTH_SLICE */
+   }
+
+   /* Update the ZRANGE_PRECISION value for the TC-compat bug. */
+   radv_update_zrange_precision(cmd_buffer, ds, iview, layout, in_render_loop, true);
+
+   radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+                          ds->pa_su_poly_offset_db_fmt_cntl);
 }
 
 /**
@@ -1873,157 +1791,143 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
  */
 static void
 radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer,
-				const struct radv_image_view *iview,
-				VkClearDepthStencilValue ds_clear_value,
-				VkImageAspectFlags aspects)
-{
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	const struct radv_image *image = iview->image;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint32_t att_idx;
-
-	if (!cmd_buffer->state.attachments || !subpass)
-		return;
-
-	if (!subpass->depth_stencil_attachment)
-		return;
-
-	att_idx = subpass->depth_stencil_attachment->attachment;
-	if (cmd_buffer->state.attachments[att_idx].iview->image != image)
-		return;
-
-	if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
-			VK_IMAGE_ASPECT_STENCIL_BIT)) {
-		radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
-		radeon_emit(cs, ds_clear_value.stencil);
-		radeon_emit(cs, fui(ds_clear_value.depth));
-	} else if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
-		radeon_set_context_reg_seq(cs, R_02802C_DB_DEPTH_CLEAR, 1);
-		radeon_emit(cs, fui(ds_clear_value.depth));
-	} else {
-		assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
-		radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 1);
-		radeon_emit(cs, ds_clear_value.stencil);
-	}
-
-	/* Update the ZRANGE_PRECISION value for the TC-compat bug. This is
-	 * only needed when clearing Z to 0.0.
-	 */
-	if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
-	    ds_clear_value.depth == 0.0) {
-		VkImageLayout layout = subpass->depth_stencil_attachment->layout;
-		bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop;
-
-		radv_update_zrange_precision(cmd_buffer, &cmd_buffer->state.attachments[att_idx].ds,
-					     iview, layout, in_render_loop, false);
-	}
-
-	cmd_buffer->state.context_roll_without_scissor_emitted = true;
+                                const struct radv_image_view *iview,
+                                VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
+{
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   const struct radv_image *image = iview->image;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint32_t att_idx;
+
+   if (!cmd_buffer->state.attachments || !subpass)
+      return;
+
+   if (!subpass->depth_stencil_attachment)
+      return;
+
+   att_idx = subpass->depth_stencil_attachment->attachment;
+   if (cmd_buffer->state.attachments[att_idx].iview->image != image)
+      return;
+
+   if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+      radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
+      radeon_emit(cs, ds_clear_value.stencil);
+      radeon_emit(cs, fui(ds_clear_value.depth));
+   } else if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
+      radeon_set_context_reg_seq(cs, R_02802C_DB_DEPTH_CLEAR, 1);
+      radeon_emit(cs, fui(ds_clear_value.depth));
+   } else {
+      assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
+      radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 1);
+      radeon_emit(cs, ds_clear_value.stencil);
+   }
+
+   /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is
+    * only needed when clearing Z to 0.0.
+    */
+   if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && ds_clear_value.depth == 0.0) {
+      VkImageLayout layout = subpass->depth_stencil_attachment->layout;
+      bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop;
+
+      radv_update_zrange_precision(cmd_buffer, &cmd_buffer->state.attachments[att_idx].ds, iview,
+                                   layout, in_render_loop, false);
+   }
+
+   cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 /**
  * Set the clear depth/stencil values to the image's metadata.
  */
 static void
-radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
-			   struct radv_image *image,
-			   const VkImageSubresourceRange *range,
-			   VkClearDepthStencilValue ds_clear_value,
-			   VkImageAspectFlags aspects)
-{
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint32_t level_count = radv_get_levelCount(image, range);
-
-	if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
-		        VK_IMAGE_ASPECT_STENCIL_BIT)) {
-		uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel);
-
-		/* Use the fastest way when both aspects are used. */
-		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + 2 * level_count, cmd_buffer->state.predicating));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
-				S_370_WR_CONFIRM(1) |
-				S_370_ENGINE_SEL(V_370_PFP));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-
-		for (uint32_t l = 0; l < level_count; l++) {
-			radeon_emit(cs, ds_clear_value.stencil);
-			radeon_emit(cs, fui(ds_clear_value.depth));
-		}
-	} else {
-		/* Otherwise we need one WRITE_DATA packet per level. */
-		for (uint32_t l = 0; l < level_count; l++) {
-			uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel + l);
-			unsigned value;
-
-			if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
-				value = fui(ds_clear_value.depth);
-				va += 4;
-			} else {
-				assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
-				value = ds_clear_value.stencil;
-			}
-
-			radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating));
-			radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
-					S_370_WR_CONFIRM(1) |
-					S_370_ENGINE_SEL(V_370_PFP));
-			radeon_emit(cs, va);
-			radeon_emit(cs, va >> 32);
-			radeon_emit(cs, value);
-		}
-	}
+radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                           const VkImageSubresourceRange *range,
+                           VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
+{
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint32_t level_count = radv_get_levelCount(image, range);
+
+   if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+      uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel);
+
+      /* Use the fastest way when both aspects are used. */
+      radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + 2 * level_count, cmd_buffer->state.predicating));
+      radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+
+      for (uint32_t l = 0; l < level_count; l++) {
+         radeon_emit(cs, ds_clear_value.stencil);
+         radeon_emit(cs, fui(ds_clear_value.depth));
+      }
+   } else {
+      /* Otherwise we need one WRITE_DATA packet per level. */
+      for (uint32_t l = 0; l < level_count; l++) {
+         uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel + l);
+         unsigned value;
+
+         if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
+            value = fui(ds_clear_value.depth);
+            va += 4;
+         } else {
+            assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
+            value = ds_clear_value.stencil;
+         }
+
+         radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating));
+         radeon_emit(cs,
+                     S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+         radeon_emit(cs, va);
+         radeon_emit(cs, va >> 32);
+         radeon_emit(cs, value);
+      }
+   }
 }
 
 /**
  * Update the TC-compat metadata value for this image.
  */
 static void
-radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
-				   struct radv_image *image,
-				   const VkImageSubresourceRange *range,
-				   uint32_t value)
+radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                                   const VkImageSubresourceRange *range, uint32_t value)
 {
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug)
-		return;
+   if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug)
+      return;
 
-	uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel);
-	uint32_t level_count = radv_get_levelCount(image, range);
+   uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel);
+   uint32_t level_count = radv_get_levelCount(image, range);
 
-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + level_count, cmd_buffer->state.predicating));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
-			S_370_WR_CONFIRM(1) |
-			S_370_ENGINE_SEL(V_370_PFP));
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
+   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + level_count, cmd_buffer->state.predicating));
+   radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
 
-	for (uint32_t l = 0; l < level_count; l++)
-		radeon_emit(cs, value);
+   for (uint32_t l = 0; l < level_count; l++)
+      radeon_emit(cs, value);
 }
 
 static void
 radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
-				      const struct radv_image_view *iview,
-				      VkClearDepthStencilValue ds_clear_value)
+                                      const struct radv_image_view *iview,
+                                      VkClearDepthStencilValue ds_clear_value)
 {
-	VkImageSubresourceRange range = {
-		.aspectMask = iview->aspect_mask,
-		.baseMipLevel = iview->base_mip,
-		.levelCount = iview->level_count,
-		.baseArrayLayer = iview->base_layer,
-		.layerCount = iview->layer_count,
-	};
-	uint32_t cond_val;
+   VkImageSubresourceRange range = {
+      .aspectMask = iview->aspect_mask,
+      .baseMipLevel = iview->base_mip,
+      .levelCount = iview->level_count,
+      .baseArrayLayer = iview->base_layer,
+      .layerCount = iview->layer_count,
+   };
+   uint32_t cond_val;
 
-	/* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
-	 * depth clear value is 0.0f.
-	 */
-	cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
+   /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
+    * depth clear value is 0.0f.
+    */
+   cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
 
-	radv_set_tc_compat_zrange_metadata(cmd_buffer, iview->image, &range,
-					   cond_val);
+   radv_set_tc_compat_zrange_metadata(cmd_buffer, iview->image, &range, cond_val);
 }
 
 /**
@@ -2031,79 +1935,72 @@ radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
  */
 void
 radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
-			      const struct radv_image_view *iview,
-			      VkClearDepthStencilValue ds_clear_value,
-			      VkImageAspectFlags aspects)
+                              const struct radv_image_view *iview,
+                              VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
 {
-	VkImageSubresourceRange range = {
-		.aspectMask = iview->aspect_mask,
-		.baseMipLevel = iview->base_mip,
-		.levelCount = iview->level_count,
-		.baseArrayLayer = iview->base_layer,
-		.layerCount = iview->layer_count,
-	};
-	struct radv_image *image = iview->image;
+   VkImageSubresourceRange range = {
+      .aspectMask = iview->aspect_mask,
+      .baseMipLevel = iview->base_mip,
+      .levelCount = iview->level_count,
+      .baseArrayLayer = iview->base_layer,
+      .layerCount = iview->layer_count,
+   };
+   struct radv_image *image = iview->image;
 
-	assert(radv_htile_enabled(image, range.baseMipLevel));
+   assert(radv_htile_enabled(image, range.baseMipLevel));
 
-	radv_set_ds_clear_metadata(cmd_buffer, iview->image, &range,
-				   ds_clear_value, aspects);
+   radv_set_ds_clear_metadata(cmd_buffer, iview->image, &range, ds_clear_value, aspects);
 
-	if (radv_image_is_tc_compat_htile(image) &&
-	    (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
-		radv_update_tc_compat_zrange_metadata(cmd_buffer, iview,
-						      ds_clear_value);
-	}
+   if (radv_image_is_tc_compat_htile(image) && (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
+      radv_update_tc_compat_zrange_metadata(cmd_buffer, iview, ds_clear_value);
+   }
 
-	radv_update_bound_fast_clear_ds(cmd_buffer, iview, ds_clear_value,
-					aspects);
+   radv_update_bound_fast_clear_ds(cmd_buffer, iview, ds_clear_value, aspects);
 }
 
 /**
  * Load the clear depth/stencil values from the image's metadata.
  */
 static void
-radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
-			    const struct radv_image_view *iview)
-{
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	const struct radv_image *image = iview->image;
-	VkImageAspectFlags aspects = vk_format_aspects(image->vk_format);
-	uint64_t va = radv_get_ds_clear_value_va(image, iview->base_mip);
-	unsigned reg_offset = 0, reg_count = 0;
-
-	assert(radv_image_has_htile(image));
-
-	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
-		++reg_count;
-	} else {
-		++reg_offset;
-		va += 4;
-	}
-	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
-		++reg_count;
-
-	uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
-
-	if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
-		radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
-		radeon_emit(cs, reg_count);
-	} else {
-		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-				COPY_DATA_DST_SEL(COPY_DATA_REG) |
-				(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, reg >> 2);
-		radeon_emit(cs, 0);
-
-		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
-		radeon_emit(cs, 0);
-	}
+radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview)
+{
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   const struct radv_image *image = iview->image;
+   VkImageAspectFlags aspects = vk_format_aspects(image->vk_format);
+   uint64_t va = radv_get_ds_clear_value_va(image, iview->base_mip);
+   unsigned reg_offset = 0, reg_count = 0;
+
+   assert(radv_image_has_htile(image));
+
+   if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+      ++reg_count;
+   } else {
+      ++reg_offset;
+      va += 4;
+   }
+   if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
+      ++reg_count;
+
+   uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
+
+   if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
+      radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
+      radeon_emit(cs, reg_count);
+   } else {
+      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
+                         (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      radeon_emit(cs, reg >> 2);
+      radeon_emit(cs, 0);
+
+      radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+      radeon_emit(cs, 0);
+   }
 }
 
 /*
@@ -2112,126 +2009,115 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
  * cmask eliminate is required.
  */
 void
-radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
-			 struct radv_image *image,
-			 const VkImageSubresourceRange *range, bool value)
+radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                         const VkImageSubresourceRange *range, bool value)
 {
-	if (!image->fce_pred_offset)
-		return;
+   if (!image->fce_pred_offset)
+      return;
 
-	uint64_t pred_val = value;
-	uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel);
-	uint32_t level_count = radv_get_levelCount(image, range);
-	uint32_t count = 2 * level_count;
+   uint64_t pred_val = value;
+   uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel);
+   uint32_t level_count = radv_get_levelCount(image, range);
+   uint32_t count = 2 * level_count;
 
-	radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
-	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
-				    S_370_WR_CONFIRM(1) |
-				    S_370_ENGINE_SEL(V_370_PFP));
-	radeon_emit(cmd_buffer->cs, va);
-	radeon_emit(cmd_buffer->cs, va >> 32);
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
+   radeon_emit(cmd_buffer->cs,
+               S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+   radeon_emit(cmd_buffer->cs, va);
+   radeon_emit(cmd_buffer->cs, va >> 32);
 
-	for (uint32_t l = 0; l < level_count; l++) {
-		radeon_emit(cmd_buffer->cs, pred_val);
-		radeon_emit(cmd_buffer->cs, pred_val >> 32);
-	}
+   for (uint32_t l = 0; l < level_count; l++) {
+      radeon_emit(cmd_buffer->cs, pred_val);
+      radeon_emit(cmd_buffer->cs, pred_val >> 32);
+   }
 }
 
 /**
  * Update the DCC predicate to reflect the compression state.
  */
 void
-radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
-			 struct radv_image *image,
-			 const VkImageSubresourceRange *range, bool value)
+radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                         const VkImageSubresourceRange *range, bool value)
 {
-	if (image->dcc_pred_offset == 0)
-		return;
+   if (image->dcc_pred_offset == 0)
+      return;
 
-	uint64_t pred_val = value;
-	uint64_t va = radv_image_get_dcc_pred_va(image, range->baseMipLevel);
-	uint32_t level_count = radv_get_levelCount(image, range);
-	uint32_t count = 2 * level_count;
+   uint64_t pred_val = value;
+   uint64_t va = radv_image_get_dcc_pred_va(image, range->baseMipLevel);
+   uint32_t level_count = radv_get_levelCount(image, range);
+   uint32_t count = 2 * level_count;
 
-	assert(radv_dcc_enabled(image, range->baseMipLevel));
+   assert(radv_dcc_enabled(image, range->baseMipLevel));
 
-	radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
-	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
-				    S_370_WR_CONFIRM(1) |
-				    S_370_ENGINE_SEL(V_370_PFP));
-	radeon_emit(cmd_buffer->cs, va);
-	radeon_emit(cmd_buffer->cs, va >> 32);
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
+   radeon_emit(cmd_buffer->cs,
+               S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+   radeon_emit(cmd_buffer->cs, va);
+   radeon_emit(cmd_buffer->cs, va >> 32);
 
-	for (uint32_t l = 0; l < level_count; l++) {
-		radeon_emit(cmd_buffer->cs, pred_val);
-		radeon_emit(cmd_buffer->cs, pred_val >> 32);
-	}
+   for (uint32_t l = 0; l < level_count; l++) {
+      radeon_emit(cmd_buffer->cs, pred_val);
+      radeon_emit(cmd_buffer->cs, pred_val >> 32);
+   }
 }
 
 /**
  * Update the fast clear color values if the image is bound as a color buffer.
  */
 static void
-radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
-				   struct radv_image *image,
-				   int cb_idx,
-				   uint32_t color_values[2])
+radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                                   int cb_idx, uint32_t color_values[2])
 {
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint32_t att_idx;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint32_t att_idx;
 
-	if (!cmd_buffer->state.attachments || !subpass)
-		return;
+   if (!cmd_buffer->state.attachments || !subpass)
+      return;
 
-	att_idx = subpass->color_attachments[cb_idx].attachment;
-	if (att_idx == VK_ATTACHMENT_UNUSED)
-		return;
+   att_idx = subpass->color_attachments[cb_idx].attachment;
+   if (att_idx == VK_ATTACHMENT_UNUSED)
+      return;
 
-	if (cmd_buffer->state.attachments[att_idx].iview->image != image)
-		return;
+   if (cmd_buffer->state.attachments[att_idx].iview->image != image)
+      return;
 
-	radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2);
-	radeon_emit(cs, color_values[0]);
-	radeon_emit(cs, color_values[1]);
+   radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2);
+   radeon_emit(cs, color_values[0]);
+   radeon_emit(cs, color_values[1]);
 
-	cmd_buffer->state.context_roll_without_scissor_emitted = true;
+   cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 /**
  * Set the clear color values to the image's metadata.
  */
 static void
-radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
-			      struct radv_image *image,
-			      const VkImageSubresourceRange *range,
-			      uint32_t color_values[2])
-{
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint32_t level_count = radv_get_levelCount(image, range);
-	uint32_t count = 2 * level_count;
-
-	assert(radv_image_has_cmask(image) ||
-	       radv_dcc_enabled(image, range->baseMipLevel));
-
-	if (radv_image_has_clear_value(image)) {
-		uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel);
-
-		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
-				S_370_WR_CONFIRM(1) |
-				S_370_ENGINE_SEL(V_370_PFP));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-
-		for (uint32_t l = 0; l < level_count; l++) {
-			radeon_emit(cs, color_values[0]);
-			radeon_emit(cs, color_values[1]);
-		}
-	} else {
-		/* Some default value we can set in the update. */
-		assert(color_values[0] == 0 && color_values[1] == 0);
-	}
+radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                              const VkImageSubresourceRange *range, uint32_t color_values[2])
+{
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint32_t level_count = radv_get_levelCount(image, range);
+   uint32_t count = 2 * level_count;
+
+   assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel));
+
+   if (radv_image_has_clear_value(image)) {
+      uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel);
+
+      radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating));
+      radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+
+      for (uint32_t l = 0; l < level_count; l++) {
+         radeon_emit(cs, color_values[0]);
+         radeon_emit(cs, color_values[1]);
+      }
+   } else {
+      /* Some default value we can set in the update. */
+      assert(color_values[0] == 0 && color_values[1] == 0);
+   }
 }
 
 /**
@@ -2239,72 +2125,65 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
  */
 void
 radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
-				 const struct radv_image_view *iview,
-				 int cb_idx,
-				 uint32_t color_values[2])
+                                 const struct radv_image_view *iview, int cb_idx,
+                                 uint32_t color_values[2])
 {
-	struct radv_image *image = iview->image;
-	VkImageSubresourceRange range = {
-		.aspectMask = iview->aspect_mask,
-		.baseMipLevel = iview->base_mip,
-		.levelCount = iview->level_count,
-		.baseArrayLayer = iview->base_layer,
-		.layerCount = iview->layer_count,
-	};
+   struct radv_image *image = iview->image;
+   VkImageSubresourceRange range = {
+      .aspectMask = iview->aspect_mask,
+      .baseMipLevel = iview->base_mip,
+      .levelCount = iview->level_count,
+      .baseArrayLayer = iview->base_layer,
+      .layerCount = iview->layer_count,
+   };
 
-	assert(radv_image_has_cmask(image) ||
-	       radv_dcc_enabled(image, iview->base_mip));
+   assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, iview->base_mip));
 
-	radv_set_color_clear_metadata(cmd_buffer, image, &range, color_values);
+   radv_set_color_clear_metadata(cmd_buffer, image, &range, color_values);
 
-	radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx,
-					   color_values);
+   radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, color_values);
 }
 
 /**
  * Load the clear color values from the image's metadata.
  */
 static void
-radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
-			       struct radv_image_view *iview,
-			       int cb_idx)
-{
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	struct radv_image *image = iview->image;
-
-	if (!radv_image_has_cmask(image) &&
-	    !radv_dcc_enabled(image, iview->base_mip))
-		return;
-
-	if (!radv_image_has_clear_value(image)) {
-		uint32_t color_values[2] = {0, 0};
-		radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx,
-		                                   color_values);
-		return;
-	}
-
-	uint64_t va = radv_image_get_fast_clear_va(image, iview->base_mip);
-	uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
-
-	if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
-		radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, cmd_buffer->state.predicating));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
-		radeon_emit(cs, 2);
-	} else {
-		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
-		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-				COPY_DATA_DST_SEL(COPY_DATA_REG) |
-				COPY_DATA_COUNT_SEL);
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, reg >> 2);
-		radeon_emit(cs, 0);
-
-		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
-		radeon_emit(cs, 0);
-	}
+radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *iview,
+                               int cb_idx)
+{
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct radv_image *image = iview->image;
+
+   if (!radv_image_has_cmask(image) && !radv_dcc_enabled(image, iview->base_mip))
+      return;
+
+   if (!radv_image_has_clear_value(image)) {
+      uint32_t color_values[2] = {0, 0};
+      radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, color_values);
+      return;
+   }
+
+   uint64_t va = radv_image_get_fast_clear_va(image, iview->base_mip);
+   uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
+
+   if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
+      radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, cmd_buffer->state.predicating));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
+      radeon_emit(cs, 2);
+   } else {
+      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
+      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
+                         COPY_DATA_COUNT_SEL);
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      radeon_emit(cs, reg >> 2);
+      radeon_emit(cs, 0);
+
+      radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
+      radeon_emit(cs, 0);
+   }
 }
 
 /* GFX9+ metadata cache flushing workaround. metadata cache coherency is
@@ -2316,36 +2195,35 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 static void
 radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	bool color_mip_changed = false;
+   struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   bool color_mip_changed = false;
 
-	/* Entire workaround is not applicable before GFX9 */
-	if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
-		return;
+   /* Entire workaround is not applicable before GFX9 */
+   if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
+      return;
 
-	if (!framebuffer)
-		return;
+   if (!framebuffer)
+      return;
 
-	for (int i = 0; i < subpass->color_count; ++i) {
-		int idx = subpass->color_attachments[i].attachment;
-		if (idx == VK_ATTACHMENT_UNUSED)
-			continue;
+   for (int i = 0; i < subpass->color_count; ++i) {
+      int idx = subpass->color_attachments[i].attachment;
+      if (idx == VK_ATTACHMENT_UNUSED)
+         continue;
 
-		struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
+      struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
 
-		if ((radv_image_has_CB_metadata(iview->image) ||
-		     radv_image_has_dcc(iview->image)) &&
-		    cmd_buffer->state.cb_mip[i] != iview->base_mip)
-			color_mip_changed = true;
+      if ((radv_image_has_CB_metadata(iview->image) || radv_image_has_dcc(iview->image)) &&
+          cmd_buffer->state.cb_mip[i] != iview->base_mip)
+         color_mip_changed = true;
 
-		cmd_buffer->state.cb_mip[i] = iview->base_mip;
-	}
+      cmd_buffer->state.cb_mip[i] = iview->base_mip;
+   }
 
-	if (color_mip_changed) {
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-		                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-	}
+   if (color_mip_changed) {
+      cmd_buffer->state.flush_bits |=
+         RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+   }
 }
 
 /* This function does the flushes for mip changes if the levels are not zero for
@@ -2355,2924 +2233,2739 @@ radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer)
 static void
 radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer)
 {
-	/* Entire workaround is not applicable before GFX9 */
-	if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
-		return;
+   /* Entire workaround is not applicable before GFX9 */
+   if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
+      return;
 
-	bool need_color_mip_flush = false;
-	for (unsigned i = 0; i < 8; ++i) {
-		if (cmd_buffer->state.cb_mip[i]) {
-			need_color_mip_flush = true;
-			break;
-		}
-	}
+   bool need_color_mip_flush = false;
+   for (unsigned i = 0; i < 8; ++i) {
+      if (cmd_buffer->state.cb_mip[i]) {
+         need_color_mip_flush = true;
+         break;
+      }
+   }
 
-	if (need_color_mip_flush) {
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-		                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-	}
+   if (need_color_mip_flush) {
+      cmd_buffer->state.flush_bits |=
+         RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+   }
 
-	memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip));
+   memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip));
 }
 
 static void
 radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
 {
-	int i;
-	struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-
-	/* this may happen for inherited secondary recording */
-	if (!framebuffer)
-		return;
-
-	for (i = 0; i < 8; ++i) {
-		if (i >= subpass->color_count || subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
-			radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
-				       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
-			continue;
-		}
-
-		int idx = subpass->color_attachments[i].attachment;
-		struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
-		VkImageLayout layout = subpass->color_attachments[i].layout;
-		bool in_render_loop = subpass->color_attachments[i].in_render_loop;
-
-		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->bo);
-
-		assert(iview->aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT |
-		                                       VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT));
-		radv_emit_fb_color_state(cmd_buffer, i, &cmd_buffer->state.attachments[idx].cb, iview, layout,
-					 in_render_loop, cmd_buffer->state.attachments[idx].disable_dcc);
-
-		radv_load_color_clear_metadata(cmd_buffer, iview, i);
-	}
-
-	if (subpass->depth_stencil_attachment) {
-		int idx = subpass->depth_stencil_attachment->attachment;
-		VkImageLayout layout = subpass->depth_stencil_attachment->layout;
-		bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop;
-		struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
-		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->state.attachments[idx].iview->bo);
-
-		radv_emit_fb_ds_state(cmd_buffer, &cmd_buffer->state.attachments[idx].ds, iview, layout, in_render_loop);
-
-		if (radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop,
-						    radv_image_queue_family_mask(iview->image,
-										 cmd_buffer->queue_family_index,
-										 cmd_buffer->queue_family_index))) {
-			/* Only load the depth/stencil fast clear values when
-			 * compressed rendering is enabled.
-			 */
-			radv_load_ds_clear_metadata(cmd_buffer, iview);
-		}
-	} else {
-		if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9)
-			radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2);
-		else
-			radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2);
-
-		radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */
-		radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
-	}
-	radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
-			       S_028208_BR_X(framebuffer->width) |
-			       S_028208_BR_Y(framebuffer->height));
-
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) {
-		bool disable_constant_encode =
-			cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode;
-		enum chip_class chip_class =
-			cmd_buffer->device->physical_device->rad_info.chip_class;
-		uint8_t watermark = chip_class >= GFX10 ? 6 : 4;
-
-		radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
-				       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(chip_class <= GFX9) |
-				       S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
-				       S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
-	}
-
-	if (cmd_buffer->device->dfsm_allowed) {
-		radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
-	}
-
-	cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
+   int i;
+   struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+
+   /* this may happen for inherited secondary recording */
+   if (!framebuffer)
+      return;
+
+   for (i = 0; i < 8; ++i) {
+      if (i >= subpass->color_count ||
+          subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
+         radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+                                S_028C70_FORMAT(V_028C70_COLOR_INVALID));
+         continue;
+      }
+
+      int idx = subpass->color_attachments[i].attachment;
+      struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
+      VkImageLayout layout = subpass->color_attachments[i].layout;
+      bool in_render_loop = subpass->color_attachments[i].in_render_loop;
+
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->bo);
+
+      assert(iview->aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT |
+                                   VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT));
+      radv_emit_fb_color_state(cmd_buffer, i, &cmd_buffer->state.attachments[idx].cb, iview, layout,
+                               in_render_loop, cmd_buffer->state.attachments[idx].disable_dcc);
+
+      radv_load_color_clear_metadata(cmd_buffer, iview, i);
+   }
+
+   if (subpass->depth_stencil_attachment) {
+      int idx = subpass->depth_stencil_attachment->attachment;
+      VkImageLayout layout = subpass->depth_stencil_attachment->layout;
+      bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop;
+      struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+                         cmd_buffer->state.attachments[idx].iview->bo);
+
+      radv_emit_fb_ds_state(cmd_buffer, &cmd_buffer->state.attachments[idx].ds, iview, layout,
+                            in_render_loop);
+
+      if (radv_layout_is_htile_compressed(
+             cmd_buffer->device, iview->image, layout, in_render_loop,
+             radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,
+                                          cmd_buffer->queue_family_index))) {
+         /* Only load the depth/stencil fast clear values when
+          * compressed rendering is enabled.
+          */
+         radv_load_ds_clear_metadata(cmd_buffer, iview);
+      }
+   } else {
+      if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9)
+         radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2);
+      else
+         radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2);
+
+      radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID));       /* DB_Z_INFO */
+      radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
+   }
+   radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
+                          S_028208_BR_X(framebuffer->width) | S_028208_BR_Y(framebuffer->height));
+
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) {
+      bool disable_constant_encode =
+         cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode;
+      enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
+      uint8_t watermark = chip_class >= GFX10 ? 6 : 4;
+
+      radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
+                             S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(chip_class <= GFX9) |
+                                S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
+                                S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
+   }
+
+   if (cmd_buffer->device->dfsm_allowed) {
+      radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
+   }
+
+   cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
 }
 
 static void
 radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer, bool indirect)
 {
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	struct radv_cmd_state *state = &cmd_buffer->state;
-
-	if (state->index_type != state->last_index_type) {
-		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
-			radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device,
-						   cs, R_03090C_VGT_INDEX_TYPE,
-						   2, state->index_type);
-		} else {
-			radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
-			radeon_emit(cs, state->index_type);
-		}
-
-		state->last_index_type = state->index_type;
-	}
-
-	/* For the direct indexed draws we use DRAW_INDEX_2, which includes
-	 * the index_va and max_index_count already. */
-	if (!indirect)
-		return;
-
-	radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
-	radeon_emit(cs, state->index_va);
-	radeon_emit(cs, state->index_va >> 32);
-
-	radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
-	radeon_emit(cs, state->max_index_count);
-
-	cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER;
-}
-
-void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer)
-{
-	bool has_perfect_queries = cmd_buffer->state.perfect_occlusion_queries_enabled;
-	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
-	uint32_t pa_sc_mode_cntl_1 =
-		pipeline ? pipeline->graphics.ms.pa_sc_mode_cntl_1 : 0;
-	uint32_t db_count_control;
-
-	if(!cmd_buffer->state.active_occlusion_queries) {
-		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
-			if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
-			    pipeline->graphics.disable_out_of_order_rast_for_occlusion &&
-			    has_perfect_queries) {
-				/* Re-enable out-of-order rasterization if the
-				 * bound pipeline supports it and if it's has
-				 * been disabled before starting any perfect
-				 * occlusion queries.
-				 */
-				radeon_set_context_reg(cmd_buffer->cs,
-						       R_028A4C_PA_SC_MODE_CNTL_1,
-						       pa_sc_mode_cntl_1);
-			}
-		}
-		db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
-	} else {
-		const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-		uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0;
-		bool gfx10_perfect = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10 && has_perfect_queries;
-
-		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
-			/* Always enable PERFECT_ZPASS_COUNTS due to issues with partially
-			 * covered tiles, discards, and early depth testing. For more details,
-			 * see https://gitlab.freedesktop.org/mesa/mesa/-/issues/3218 */
-			db_count_control =
-				S_028004_PERFECT_ZPASS_COUNTS(1) |
-				S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
-				S_028004_SAMPLE_RATE(sample_rate) |
-				S_028004_ZPASS_ENABLE(1) |
-				S_028004_SLICE_EVEN_ENABLE(1) |
-				S_028004_SLICE_ODD_ENABLE(1);
-
-			if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
-			    pipeline->graphics.disable_out_of_order_rast_for_occlusion &&
-			    has_perfect_queries) {
-				/* If the bound pipeline has enabled
-				 * out-of-order rasterization, we should
-				 * disable it before starting any perfect
-				 * occlusion queries.
-				 */
-				pa_sc_mode_cntl_1 &= C_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE;
-
-				radeon_set_context_reg(cmd_buffer->cs,
-						       R_028A4C_PA_SC_MODE_CNTL_1,
-						       pa_sc_mode_cntl_1);
-			}
-		} else {
-			db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) |
-				S_028004_SAMPLE_RATE(sample_rate);
-		}
-	}
-
-	radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control);
-
-	cmd_buffer->state.context_roll_without_scissor_emitted = true;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct radv_cmd_state *state = &cmd_buffer->state;
+
+   if (state->index_type != state->last_index_type) {
+      if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+         radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
+                                    R_03090C_VGT_INDEX_TYPE, 2, state->index_type);
+      } else {
+         radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
+         radeon_emit(cs, state->index_type);
+      }
+
+      state->last_index_type = state->index_type;
+   }
+
+   /* For the direct indexed draws we use DRAW_INDEX_2, which includes
+    * the index_va and max_index_count already. */
+   if (!indirect)
+      return;
+
+   radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
+   radeon_emit(cs, state->index_va);
+   radeon_emit(cs, state->index_va >> 32);
+
+   radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
+   radeon_emit(cs, state->max_index_count);
+
+   cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER;
+}
+
+void
+radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer)
+{
+   bool has_perfect_queries = cmd_buffer->state.perfect_occlusion_queries_enabled;
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   uint32_t pa_sc_mode_cntl_1 = pipeline ? pipeline->graphics.ms.pa_sc_mode_cntl_1 : 0;
+   uint32_t db_count_control;
+
+   if (!cmd_buffer->state.active_occlusion_queries) {
+      if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+         if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
+             pipeline->graphics.disable_out_of_order_rast_for_occlusion && has_perfect_queries) {
+            /* Re-enable out-of-order rasterization if the
+             * bound pipeline supports it and if it's has
+             * been disabled before starting any perfect
+             * occlusion queries.
+             */
+            radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1);
+         }
+      }
+      db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
+   } else {
+      const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+      uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0;
+      bool gfx10_perfect =
+         cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10 && has_perfect_queries;
+
+      if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+         /* Always enable PERFECT_ZPASS_COUNTS due to issues with partially
+          * covered tiles, discards, and early depth testing. For more details,
+          * see https://gitlab.freedesktop.org/mesa/mesa/-/issues/3218 */
+         db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) |
+                            S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
+                            S_028004_SAMPLE_RATE(sample_rate) | S_028004_ZPASS_ENABLE(1) |
+                            S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1);
+
+         if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
+             pipeline->graphics.disable_out_of_order_rast_for_occlusion && has_perfect_queries) {
+            /* If the bound pipeline has enabled
+             * out-of-order rasterization, we should
+             * disable it before starting any perfect
+             * occlusion queries.
+             */
+            pa_sc_mode_cntl_1 &= C_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE;
+
+            radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1);
+         }
+      } else {
+         db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | S_028004_SAMPLE_RATE(sample_rate);
+      }
+   }
+
+   radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control);
+
+   cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 static void
 radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
 {
-	uint32_t states = cmd_buffer->state.dirty & cmd_buffer->state.emitted_pipeline->graphics.needed_dynamic_state;
+   uint32_t states =
+      cmd_buffer->state.dirty & cmd_buffer->state.emitted_pipeline->graphics.needed_dynamic_state;
 
-	if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
-		radv_emit_viewport(cmd_buffer);
+   if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
+      radv_emit_viewport(cmd_buffer);
 
-	if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) &&
-	    !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
-		radv_emit_scissor(cmd_buffer);
+   if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) &&
+       !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
+      radv_emit_scissor(cmd_buffer);
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)
-		radv_emit_line_width(cmd_buffer);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)
+      radv_emit_line_width(cmd_buffer);
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
-		radv_emit_blend_constants(cmd_buffer);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
+      radv_emit_blend_constants(cmd_buffer);
 
-	if (states & (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE |
-				       RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
-				       RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK))
-		radv_emit_stencil(cmd_buffer);
+   if (states &
+       (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
+        RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK))
+      radv_emit_stencil(cmd_buffer);
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS)
-		radv_emit_depth_bounds(cmd_buffer);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS)
+      radv_emit_depth_bounds(cmd_buffer);
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)
-		radv_emit_depth_bias(cmd_buffer);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)
+      radv_emit_depth_bias(cmd_buffer);
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
-		radv_emit_discard_rectangle(cmd_buffer);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
+      radv_emit_discard_rectangle(cmd_buffer);
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
-		radv_emit_sample_locations(cmd_buffer);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
+      radv_emit_sample_locations(cmd_buffer);
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE)
-		radv_emit_line_stipple(cmd_buffer);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE)
+      radv_emit_line_stipple(cmd_buffer);
 
-	if (states & (RADV_CMD_DIRTY_DYNAMIC_CULL_MODE |
-		      RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE))
-		radv_emit_culling(cmd_buffer, states);
+   if (states & (RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE))
+      radv_emit_culling(cmd_buffer, states);
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)
-		radv_emit_primitive_topology(cmd_buffer);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)
+      radv_emit_primitive_topology(cmd_buffer);
 
-	if (states & (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |
-		      RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
-		      RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
-		      RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
-		      RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |
-		      RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP))
-		radv_emit_depth_control(cmd_buffer, states);
+   if (states &
+       (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
+        RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
+        RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP))
+      radv_emit_depth_control(cmd_buffer, states);
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)
-		radv_emit_stencil_control(cmd_buffer);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)
+      radv_emit_stencil_control(cmd_buffer);
 
-	if (states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE)
-		radv_emit_fragment_shading_rate(cmd_buffer);
+   if (states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE)
+      radv_emit_fragment_shading_rate(cmd_buffer);
 
-	cmd_buffer->state.dirty &= ~states;
+   cmd_buffer->state.dirty &= ~states;
 }
 
 static void
-radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer,
-			    VkPipelineBindPoint bind_point)
+radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
 {
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, bind_point);
-	struct radv_descriptor_set *set =
-		(struct radv_descriptor_set *)&descriptors_state->push_set.set;
-	unsigned bo_offset;
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, bind_point);
+   struct radv_descriptor_set *set = (struct radv_descriptor_set *)&descriptors_state->push_set.set;
+   unsigned bo_offset;
 
-	if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size,
-					 set->header.mapped_ptr,
-					 &bo_offset))
-		return;
+   if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, set->header.mapped_ptr,
+                                    &bo_offset))
+      return;
 
-	set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-	set->header.va += bo_offset;
+   set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+   set->header.va += bo_offset;
 }
 
 static void
 radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
-				    VkPipelineBindPoint bind_point)
+                                    VkPipelineBindPoint bind_point)
 {
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, bind_point);
-	uint32_t size = MAX_SETS * 4;
-	uint32_t offset;
-	void *ptr;
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, bind_point);
+   uint32_t size = MAX_SETS * 4;
+   uint32_t offset;
+   void *ptr;
 
-	if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr))
-		return;
+   if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr))
+      return;
 
-	for (unsigned i = 0; i < MAX_SETS; i++) {
-		uint32_t *uptr = ((uint32_t *)ptr) + i;
-		uint64_t set_va = 0;
-		struct radv_descriptor_set *set = descriptors_state->sets[i];
-		if (descriptors_state->valid & (1u << i))
-			set_va = set->header.va;
-		uptr[0] = set_va & 0xffffffff;
-	}
+   for (unsigned i = 0; i < MAX_SETS; i++) {
+      uint32_t *uptr = ((uint32_t *)ptr) + i;
+      uint64_t set_va = 0;
+      struct radv_descriptor_set *set = descriptors_state->sets[i];
+      if (descriptors_state->valid & (1u << i))
+         set_va = set->header.va;
+      uptr[0] = set_va & 0xffffffff;
+   }
 
-	uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-	va += offset;
+   uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+   va += offset;
 
-	if (cmd_buffer->state.pipeline) {
-		if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_VERTEX])
-			radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
-						   AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+   if (cmd_buffer->state.pipeline) {
+      if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_VERTEX])
+         radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
+                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
 
-		if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT])
-			radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_FRAGMENT,
-						   AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+      if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT])
+         radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_FRAGMENT,
+                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
 
-		if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
-			radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
-						   AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+      if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
+         radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
+                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
 
-		if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
-			radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_CTRL,
-						   AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+      if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
+         radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_CTRL,
+                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
 
-		if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
-			radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_EVAL,
-						   AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
-	}
+      if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
+         radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_EVAL,
+                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+   }
 
-	if (cmd_buffer->state.compute_pipeline)
-		radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.compute_pipeline, MESA_SHADER_COMPUTE,
-					   AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+   if (cmd_buffer->state.compute_pipeline)
+      radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.compute_pipeline,
+                                 MESA_SHADER_COMPUTE, AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
 }
 
 static void
-radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
-		       VkShaderStageFlags stages)
+radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages)
 {
-	VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT ?
-					 VK_PIPELINE_BIND_POINT_COMPUTE :
-					 VK_PIPELINE_BIND_POINT_GRAPHICS;
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, bind_point);
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	bool flush_indirect_descriptors;
+   VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT
+                                       ? VK_PIPELINE_BIND_POINT_COMPUTE
+                                       : VK_PIPELINE_BIND_POINT_GRAPHICS;
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, bind_point);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   bool flush_indirect_descriptors;
 
-	if (!descriptors_state->dirty)
-		return;
+   if (!descriptors_state->dirty)
+      return;
 
-	if (descriptors_state->push_dirty)
-		radv_flush_push_descriptors(cmd_buffer, bind_point);
+   if (descriptors_state->push_dirty)
+      radv_flush_push_descriptors(cmd_buffer, bind_point);
 
-	flush_indirect_descriptors =
-		(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS &&
-		 state->pipeline && state->pipeline->need_indirect_descriptor_sets) ||
-		(bind_point == VK_PIPELINE_BIND_POINT_COMPUTE &&
-		 state->compute_pipeline && state->compute_pipeline->need_indirect_descriptor_sets);
+   flush_indirect_descriptors =
+      (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS && state->pipeline &&
+       state->pipeline->need_indirect_descriptor_sets) ||
+      (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE && state->compute_pipeline &&
+       state->compute_pipeline->need_indirect_descriptor_sets);
 
-	if (flush_indirect_descriptors)
-		radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point);
+   if (flush_indirect_descriptors)
+      radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point);
 
-	ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
-	                                                   cmd_buffer->cs,
-	                                                   MAX_SETS * MESA_SHADER_STAGES * 4);
+   ASSERTED unsigned cdw_max =
+      radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SETS * MESA_SHADER_STAGES * 4);
 
-	if (cmd_buffer->state.pipeline) {
-		radv_foreach_stage(stage, stages) {
-			if (!cmd_buffer->state.pipeline->shaders[stage])
-				continue;
+   if (cmd_buffer->state.pipeline) {
+      radv_foreach_stage(stage, stages)
+      {
+         if (!cmd_buffer->state.pipeline->shaders[stage])
+            continue;
 
-			radv_emit_descriptor_pointers(cmd_buffer,
-						      cmd_buffer->state.pipeline,
-						      descriptors_state, stage);
-		}
-	}
+         radv_emit_descriptor_pointers(cmd_buffer, cmd_buffer->state.pipeline, descriptors_state,
+                                       stage);
+      }
+   }
 
-	if (cmd_buffer->state.compute_pipeline &&
-	    (stages & VK_SHADER_STAGE_COMPUTE_BIT)) {
-		radv_emit_descriptor_pointers(cmd_buffer,
-					      cmd_buffer->state.compute_pipeline,
-					      descriptors_state,
-					      MESA_SHADER_COMPUTE);
-	}
+   if (cmd_buffer->state.compute_pipeline && (stages & VK_SHADER_STAGE_COMPUTE_BIT)) {
+      radv_emit_descriptor_pointers(cmd_buffer, cmd_buffer->state.compute_pipeline,
+                                    descriptors_state, MESA_SHADER_COMPUTE);
+   }
 
-	descriptors_state->dirty = 0;
-	descriptors_state->push_dirty = false;
+   descriptors_state->dirty = 0;
+   descriptors_state->push_dirty = false;
 
-	assert(cmd_buffer->cs->cdw <= cdw_max);
+   assert(cmd_buffer->cs->cdw <= cdw_max);
 
-	if (unlikely(cmd_buffer->device->trace_bo))
-		radv_save_descriptors(cmd_buffer, bind_point);
+   if (unlikely(cmd_buffer->device->trace_bo))
+      radv_save_descriptors(cmd_buffer, bind_point);
 }
 
 static void
-radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
-		     VkShaderStageFlags stages)
-{
-	struct radv_pipeline *pipeline = stages & VK_SHADER_STAGE_COMPUTE_BIT
-					 ? cmd_buffer->state.compute_pipeline
-					 : cmd_buffer->state.pipeline;
-	VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT ?
-					 VK_PIPELINE_BIND_POINT_COMPUTE :
-					 VK_PIPELINE_BIND_POINT_GRAPHICS;
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, bind_point);
-	struct radv_pipeline_layout *layout = pipeline->layout;
-	struct radv_shader_variant *shader, *prev_shader;
-	bool need_push_constants = false;
-	unsigned offset;
-	void *ptr;
-	uint64_t va;
-
-	stages &= cmd_buffer->push_constant_stages;
-	if (!stages ||
-	    (!layout->push_constant_size && !layout->dynamic_offset_count))
-		return;
-
-	radv_foreach_stage(stage, stages) {
-		shader = radv_get_shader(pipeline, stage);
-		if (!shader)
-			continue;
-
-		need_push_constants |= shader->info.loads_push_constants;
-		need_push_constants |= shader->info.loads_dynamic_offsets;
-
-		uint8_t base = shader->info.base_inline_push_consts;
-		uint8_t count = shader->info.num_inline_push_consts;
-
-		radv_emit_inline_push_consts(cmd_buffer, pipeline, stage,
-					     AC_UD_INLINE_PUSH_CONSTANTS,
-					     count,
-					     (uint32_t *)&cmd_buffer->push_constants[base * 4]);
-	}
-
-	if (need_push_constants) {
-		if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
-						  16 * layout->dynamic_offset_count, &offset, &ptr))
-			return;
-
-		memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
-		memcpy((char*)ptr + layout->push_constant_size,
-		       descriptors_state->dynamic_buffers,
-		       16 * layout->dynamic_offset_count);
-
-		va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-		va += offset;
-
-		ASSERTED unsigned cdw_max =
-			radeon_check_space(cmd_buffer->device->ws,
-	                                   cmd_buffer->cs, MESA_SHADER_STAGES * 4);
-
-		prev_shader = NULL;
-		radv_foreach_stage(stage, stages) {
-			shader = radv_get_shader(pipeline, stage);
-
-			/* Avoid redundantly emitting the address for merged stages. */
-			if (shader && shader != prev_shader) {
-				radv_emit_userdata_address(cmd_buffer, pipeline, stage,
-							   AC_UD_PUSH_CONSTANTS, va);
-
-				prev_shader = shader;
-			}
-		}
-		assert(cmd_buffer->cs->cdw <= cdw_max);
-	}
-
-	cmd_buffer->push_constant_stages &= ~stages;
+radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages)
+{
+   struct radv_pipeline *pipeline = stages & VK_SHADER_STAGE_COMPUTE_BIT
+                                       ? cmd_buffer->state.compute_pipeline
+                                       : cmd_buffer->state.pipeline;
+   VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT
+                                       ? VK_PIPELINE_BIND_POINT_COMPUTE
+                                       : VK_PIPELINE_BIND_POINT_GRAPHICS;
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, bind_point);
+   struct radv_pipeline_layout *layout = pipeline->layout;
+   struct radv_shader_variant *shader, *prev_shader;
+   bool need_push_constants = false;
+   unsigned offset;
+   void *ptr;
+   uint64_t va;
+
+   stages &= cmd_buffer->push_constant_stages;
+   if (!stages || (!layout->push_constant_size && !layout->dynamic_offset_count))
+      return;
+
+   radv_foreach_stage(stage, stages)
+   {
+      shader = radv_get_shader(pipeline, stage);
+      if (!shader)
+         continue;
+
+      need_push_constants |= shader->info.loads_push_constants;
+      need_push_constants |= shader->info.loads_dynamic_offsets;
+
+      uint8_t base = shader->info.base_inline_push_consts;
+      uint8_t count = shader->info.num_inline_push_consts;
+
+      radv_emit_inline_push_consts(cmd_buffer, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS, count,
+                                   (uint32_t *)&cmd_buffer->push_constants[base * 4]);
+   }
+
+   if (need_push_constants) {
+      if (!radv_cmd_buffer_upload_alloc(
+             cmd_buffer, layout->push_constant_size + 16 * layout->dynamic_offset_count, &offset,
+             &ptr))
+         return;
+
+      memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
+      memcpy((char *)ptr + layout->push_constant_size, descriptors_state->dynamic_buffers,
+             16 * layout->dynamic_offset_count);
+
+      va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+      va += offset;
+
+      ASSERTED unsigned cdw_max =
+         radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MESA_SHADER_STAGES * 4);
+
+      prev_shader = NULL;
+      radv_foreach_stage(stage, stages)
+      {
+         shader = radv_get_shader(pipeline, stage);
+
+         /* Avoid redundantly emitting the address for merged stages. */
+         if (shader && shader != prev_shader) {
+            radv_emit_userdata_address(cmd_buffer, pipeline, stage, AC_UD_PUSH_CONSTANTS, va);
+
+            prev_shader = shader;
+         }
+      }
+      assert(cmd_buffer->cs->cdw <= cdw_max);
+   }
+
+   cmd_buffer->push_constant_stages &= ~stages;
 }
 
 static void
-radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
-			      bool pipeline_is_dirty)
-{
-	if ((pipeline_is_dirty ||
-	    (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
-	    cmd_buffer->state.pipeline->num_vertex_bindings &&
-	    radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.vs.has_vertex_buffers) {
-		unsigned vb_offset;
-		void *vb_ptr;
-		uint32_t i = 0;
-		uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings;
-		uint64_t va;
-
-		/* allocate some descriptor state for vertex buffers */
-		if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16,
-						  &vb_offset, &vb_ptr))
-			return;
-
-		for (i = 0; i < count; i++) {
-			uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
-			uint32_t offset;
-			struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer;
-			unsigned num_records;
-			unsigned stride;
-
-			if (!buffer) {
-				memset(desc, 0, 4 * 4);
-				continue;
-			}
-
-			va = radv_buffer_get_va(buffer->bo);
-
-			offset = cmd_buffer->vertex_bindings[i].offset;
-			va += offset + buffer->offset;
-
-			if (cmd_buffer->vertex_bindings[i].size) {
-				num_records = cmd_buffer->vertex_bindings[i].size;
-			} else {
-				num_records = buffer->size - offset;
-			}
-
-			if (cmd_buffer->state.pipeline->graphics.uses_dynamic_stride) {
-				stride = cmd_buffer->vertex_bindings[i].stride;
-			} else {
-				stride = cmd_buffer->state.pipeline->binding_stride[i];
-			}
-
-			if (cmd_buffer->device->physical_device->rad_info.chip_class != GFX8 && stride)
-				num_records = DIV_ROUND_UP(num_records, stride);
-
-			uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-					      S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-					      S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-					      S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
-			if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
-				/* OOB_SELECT chooses the out-of-bounds check:
-				 * - 1: index >= NUM_RECORDS (Structured)
-				 * - 3: offset >= NUM_RECORDS (Raw)
-				 */
-                               int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW;
-
-                               rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_UINT) |
-					     S_008F0C_OOB_SELECT(oob_select) |
-					     S_008F0C_RESOURCE_LEVEL(1);
-                       } else {
-                               rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
-					     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-                       }
-
-			desc[0] = va;
-			desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
-			desc[2] = num_records;
-			desc[3] = rsrc_word3;
-		}
-
-		va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-		va += vb_offset;
-
-		radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
-					   AC_UD_VS_VERTEX_BUFFERS, va);
-
-		cmd_buffer->state.vb_va = va;
-		cmd_buffer->state.vb_size = count * 16;
-		cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_VBO_DESCRIPTORS;
-
-		if (unlikely(cmd_buffer->device->trace_bo))
-			radv_save_vertex_descriptors(cmd_buffer, (uintptr_t)vb_ptr);
-	}
-	cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER;
+radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
+{
+   if ((pipeline_is_dirty || (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
+       cmd_buffer->state.pipeline->num_vertex_bindings &&
+       radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.vs.has_vertex_buffers) {
+      unsigned vb_offset;
+      void *vb_ptr;
+      uint32_t i = 0;
+      uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings;
+      uint64_t va;
+
+      /* allocate some descriptor state for vertex buffers */
+      if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, &vb_offset, &vb_ptr))
+         return;
+
+      for (i = 0; i < count; i++) {
+         uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
+         uint32_t offset;
+         struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer;
+         unsigned num_records;
+         unsigned stride;
+
+         if (!buffer) {
+            memset(desc, 0, 4 * 4);
+            continue;
+         }
+
+         va = radv_buffer_get_va(buffer->bo);
+
+         offset = cmd_buffer->vertex_bindings[i].offset;
+         va += offset + buffer->offset;
+
+         if (cmd_buffer->vertex_bindings[i].size) {
+            num_records = cmd_buffer->vertex_bindings[i].size;
+         } else {
+            num_records = buffer->size - offset;
+         }
+
+         if (cmd_buffer->state.pipeline->graphics.uses_dynamic_stride) {
+            stride = cmd_buffer->vertex_bindings[i].stride;
+         } else {
+            stride = cmd_buffer->state.pipeline->binding_stride[i];
+         }
+
+         if (cmd_buffer->device->physical_device->rad_info.chip_class != GFX8 && stride)
+            num_records = DIV_ROUND_UP(num_records, stride);
+
+         uint32_t rsrc_word3 =
+            S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+            S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+         if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+            /* OOB_SELECT chooses the out-of-bounds check:
+             * - 1: index >= NUM_RECORDS (Structured)
+             * - 3: offset >= NUM_RECORDS (Raw)
+             */
+            int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW;
+
+            rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_UINT) |
+                          S_008F0C_OOB_SELECT(oob_select) | S_008F0C_RESOURCE_LEVEL(1);
+         } else {
+            rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
+                          S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+         }
+
+         desc[0] = va;
+         desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
+         desc[2] = num_records;
+         desc[3] = rsrc_word3;
+      }
+
+      va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+      va += vb_offset;
+
+      radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
+                                 AC_UD_VS_VERTEX_BUFFERS, va);
+
+      cmd_buffer->state.vb_va = va;
+      cmd_buffer->state.vb_size = count * 16;
+      cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_VBO_DESCRIPTORS;
+
+      if (unlikely(cmd_buffer->device->trace_bo))
+         radv_save_vertex_descriptors(cmd_buffer, (uintptr_t)vb_ptr);
+   }
+   cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER;
 }
 
 static void
 radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
 {
-	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
-	struct radv_userdata_info *loc;
-	uint32_t base_reg;
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   struct radv_userdata_info *loc;
+   uint32_t base_reg;
 
-	for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
-		if (!radv_get_shader(pipeline, stage))
-			continue;
+   for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
+      if (!radv_get_shader(pipeline, stage))
+         continue;
 
-		loc = radv_lookup_user_sgpr(pipeline, stage,
-					    AC_UD_STREAMOUT_BUFFERS);
-		if (loc->sgpr_idx == -1)
-			continue;
+      loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_STREAMOUT_BUFFERS);
+      if (loc->sgpr_idx == -1)
+         continue;
 
-		base_reg = pipeline->user_data_0[stage];
+      base_reg = pipeline->user_data_0[stage];
 
-		radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
-					 base_reg + loc->sgpr_idx * 4, va, false);
-	}
+      radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va,
+                               false);
+   }
 
-	if (radv_pipeline_has_gs_copy_shader(pipeline)) {
-		loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS];
-		if (loc->sgpr_idx != -1) {
-			base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
+   if (radv_pipeline_has_gs_copy_shader(pipeline)) {
+      loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS];
+      if (loc->sgpr_idx != -1) {
+         base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
 
-			radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
-						 base_reg + loc->sgpr_idx * 4, va, false);
-		}
-	}
+         radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
+                                  va, false);
+      }
+   }
 }
 
 static void
 radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
 {
-	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) {
-		struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
-		struct radv_streamout_state *so = &cmd_buffer->state.streamout;
-		unsigned so_offset;
-		void *so_ptr;
-		uint64_t va;
-
-		/* Allocate some descriptor state for streamout buffers. */
-		if (!radv_cmd_buffer_upload_alloc(cmd_buffer,
-						  MAX_SO_BUFFERS * 16,
-						  &so_offset, &so_ptr))
-			return;
-
-		for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) {
-			struct radv_buffer *buffer = sb[i].buffer;
-			uint32_t *desc = &((uint32_t *)so_ptr)[i * 4];
-
-			if (!(so->enabled_mask & (1 << i)))
-				continue;
-
-			va = radv_buffer_get_va(buffer->bo) + buffer->offset;
-
-			va += sb[i].offset;
-
-			/* Set the descriptor.
-			 *
-			 * On GFX8, the format must be non-INVALID, otherwise
-			 * the buffer will be considered not bound and store
-			 * instructions will be no-ops.
-			 */
-			uint32_t size = 0xffffffff;
-
-			/* Compute the correct buffer size for NGG streamout
-			 * because it's used to determine the max emit per
-			 * buffer.
-			 */
-			if (cmd_buffer->device->physical_device->use_ngg_streamout)
-				size = buffer->size - sb[i].offset;
-
-			uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-					      S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-					      S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-					      S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
-			if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
-				rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
-					      S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
-					      S_008F0C_RESOURCE_LEVEL(1);
-			} else {
-				rsrc_word3 |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-			}
-
-			desc[0] = va;
-			desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
-			desc[2] = size;
-			desc[3] = rsrc_word3;
-		}
-
-		va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-		va += so_offset;
-
-		radv_emit_streamout_buffers(cmd_buffer, va);
-	}
-
-	cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER;
+   if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) {
+      struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+      struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+      unsigned so_offset;
+      void *so_ptr;
+      uint64_t va;
+
+      /* Allocate some descriptor state for streamout buffers. */
+      if (!radv_cmd_buffer_upload_alloc(cmd_buffer, MAX_SO_BUFFERS * 16, &so_offset, &so_ptr))
+         return;
+
+      for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) {
+         struct radv_buffer *buffer = sb[i].buffer;
+         uint32_t *desc = &((uint32_t *)so_ptr)[i * 4];
+
+         if (!(so->enabled_mask & (1 << i)))
+            continue;
+
+         va = radv_buffer_get_va(buffer->bo) + buffer->offset;
+
+         va += sb[i].offset;
+
+         /* Set the descriptor.
+          *
+          * On GFX8, the format must be non-INVALID, otherwise
+          * the buffer will be considered not bound and store
+          * instructions will be no-ops.
+          */
+         uint32_t size = 0xffffffff;
+
+         /* Compute the correct buffer size for NGG streamout
+          * because it's used to determine the max emit per
+          * buffer.
+          */
+         if (cmd_buffer->device->physical_device->use_ngg_streamout)
+            size = buffer->size - sb[i].offset;
+
+         uint32_t rsrc_word3 =
+            S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+            S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+         if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+            rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                          S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+         } else {
+            rsrc_word3 |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+         }
+
+         desc[0] = va;
+         desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+         desc[2] = size;
+         desc[3] = rsrc_word3;
+      }
+
+      va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+      va += so_offset;
+
+      radv_emit_streamout_buffers(cmd_buffer, va);
+   }
+
+   cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER;
 }
 
 static void
 radv_flush_ngg_gs_state(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
-	struct radv_userdata_info *loc;
-	uint32_t ngg_gs_state = 0;
-	uint32_t base_reg;
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   struct radv_userdata_info *loc;
+   uint32_t ngg_gs_state = 0;
+   uint32_t base_reg;
 
-	if (!radv_pipeline_has_gs(pipeline) ||
-	    !radv_pipeline_has_ngg(pipeline))
-		return;
+   if (!radv_pipeline_has_gs(pipeline) || !radv_pipeline_has_ngg(pipeline))
+      return;
 
-	/* By default NGG GS queries are disabled but they are enabled if the
-	 * command buffer has active GDS queries or if it's a secondary command
-	 * buffer that inherits the number of generated primitives.
-	 */
-	if (cmd_buffer->state.active_pipeline_gds_queries ||
-	    (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT))
-		ngg_gs_state = 1;
+   /* By default NGG GS queries are disabled but they are enabled if the
+    * command buffer has active GDS queries or if it's a secondary command
+    * buffer that inherits the number of generated primitives.
+    */
+   if (cmd_buffer->state.active_pipeline_gds_queries ||
+       (cmd_buffer->state.inherited_pipeline_statistics &
+        VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT))
+      ngg_gs_state = 1;
 
-	loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_GEOMETRY,
-				    AC_UD_NGG_GS_STATE);
-	base_reg = pipeline->user_data_0[MESA_SHADER_GEOMETRY];
-	assert(loc->sgpr_idx != -1);
+   loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_GEOMETRY, AC_UD_NGG_GS_STATE);
+   base_reg = pipeline->user_data_0[MESA_SHADER_GEOMETRY];
+   assert(loc->sgpr_idx != -1);
 
-	radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
-			  ngg_gs_state);
+   radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ngg_gs_state);
 }
 
 static void
 radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
 {
-	radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty);
-	radv_flush_streamout_descriptors(cmd_buffer);
-	radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
-	radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
-	radv_flush_ngg_gs_state(cmd_buffer);
+   radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty);
+   radv_flush_streamout_descriptors(cmd_buffer);
+   radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
+   radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
+   radv_flush_ngg_gs_state(cmd_buffer);
 }
 
 struct radv_draw_info {
-	/**
-	 * Number of vertices.
-	 */
-	uint32_t count;
-
-	/**
-	 * First instance id.
-	 */
-	uint32_t first_instance;
-
-	/**
-	 * Number of instances.
-	 */
-	uint32_t instance_count;
-
-	/**
-	 * First index (indexed draws only).
-	 */
-	uint32_t first_index;
-
-	/**
-	 * Whether it's an indexed draw.
-	 */
-	bool indexed;
-
-	/**
-	 * Indirect draw parameters resource.
-	 */
-	struct radv_buffer *indirect;
-	uint64_t indirect_offset;
-	uint32_t stride;
-
-	/**
-	 * Draw count parameters resource.
-	 */
-	struct radv_buffer *count_buffer;
-	uint64_t count_buffer_offset;
-
-	/**
-	 * Stream output parameters resource.
-	 */
-	struct radv_buffer *strmout_buffer;
-	uint64_t strmout_buffer_offset;
+   /**
+    * Number of vertices.
+    */
+   uint32_t count;
+
+   /**
+    * First instance id.
+    */
+   uint32_t first_instance;
+
+   /**
+    * Number of instances.
+    */
+   uint32_t instance_count;
+
+   /**
+    * First index (indexed draws only).
+    */
+   uint32_t first_index;
+
+   /**
+    * Whether it's an indexed draw.
+    */
+   bool indexed;
+
+   /**
+    * Indirect draw parameters resource.
+    */
+   struct radv_buffer *indirect;
+   uint64_t indirect_offset;
+   uint32_t stride;
+
+   /**
+    * Draw count parameters resource.
+    */
+   struct radv_buffer *count_buffer;
+   uint64_t count_buffer_offset;
+
+   /**
+    * Stream output parameters resource.
+    */
+   struct radv_buffer *strmout_buffer;
+   uint64_t strmout_buffer_offset;
 };
 
 static uint32_t
 radv_get_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
 {
-	switch (cmd_buffer->state.index_type) {
-	case V_028A7C_VGT_INDEX_8:
-		return 0xffu;
-	case V_028A7C_VGT_INDEX_16:
-		return 0xffffu;
-	case V_028A7C_VGT_INDEX_32:
-		return 0xffffffffu;
-	default:
-		unreachable("invalid index type");
-	}
+   switch (cmd_buffer->state.index_type) {
+   case V_028A7C_VGT_INDEX_8:
+      return 0xffu;
+   case V_028A7C_VGT_INDEX_16:
+      return 0xffffu;
+   case V_028A7C_VGT_INDEX_32:
+      return 0xffffffffu;
+   default:
+      unreachable("invalid index type");
+   }
 }
 
 static void
-si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
-			   bool instanced_draw, bool indirect_draw,
-			   bool count_from_stream_output,
-			   uint32_t draw_vertex_count)
-{
-	struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	unsigned topology = state->dynamic.primitive_topology;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	unsigned ia_multi_vgt_param;
-
-	ia_multi_vgt_param =
-		si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw,
-					  indirect_draw,
-					  count_from_stream_output,
-					  draw_vertex_count,
-					  topology);
-
-	if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
-		if (info->chip_class == GFX9) {
-			radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device,
-						   cs,
-						   R_030960_IA_MULTI_VGT_PARAM,
-						   4, ia_multi_vgt_param);
-		} else if (info->chip_class >= GFX7) {
-			radeon_set_context_reg_idx(cs,
-						   R_028AA8_IA_MULTI_VGT_PARAM,
-						   1, ia_multi_vgt_param);
-		} else {
-			radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM,
-					       ia_multi_vgt_param);
-		}
-		state->last_ia_multi_vgt_param = ia_multi_vgt_param;
-	}
+si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
+                           bool indirect_draw, bool count_from_stream_output,
+                           uint32_t draw_vertex_count)
+{
+   struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   unsigned topology = state->dynamic.primitive_topology;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   unsigned ia_multi_vgt_param;
+
+   ia_multi_vgt_param =
+      si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output,
+                                draw_vertex_count, topology);
+
+   if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
+      if (info->chip_class == GFX9) {
+         radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
+                                    R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
+      } else if (info->chip_class >= GFX7) {
+         radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
+      } else {
+         radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
+      }
+      state->last_ia_multi_vgt_param = ia_multi_vgt_param;
+   }
 }
 
 static void
-radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer,
-			 const struct radv_draw_info *draw_info)
-{
-	struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	int32_t primitive_reset_en;
-
-	/* Draw state. */
-	if (info->chip_class < GFX10) {
-		si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1,
-					   draw_info->indirect,
-					   !!draw_info->strmout_buffer,
-					   draw_info->indirect ? 0 : draw_info->count);
-	}
-
-	/* Primitive restart. */
-	primitive_reset_en =
-		draw_info->indexed && state->pipeline->graphics.prim_restart_enable;
-
-	if (primitive_reset_en != state->last_primitive_reset_en) {
-		state->last_primitive_reset_en = primitive_reset_en;
-		if (info->chip_class >= GFX9) {
-			radeon_set_uconfig_reg(cs,
-					       R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
-					       primitive_reset_en);
-		} else {
-			radeon_set_context_reg(cs,
-					       R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
-					       primitive_reset_en);
-		}
-	}
-
-	if (primitive_reset_en) {
-		uint32_t primitive_reset_index =
-			radv_get_primitive_reset_index(cmd_buffer);
-
-		if (primitive_reset_index != state->last_primitive_reset_index) {
-			radeon_set_context_reg(cs,
-					       R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
-					       primitive_reset_index);
-			state->last_primitive_reset_index = primitive_reset_index;
-		}
-	}
-
-	if (draw_info->strmout_buffer) {
-		uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo);
-
-		va += draw_info->strmout_buffer->offset +
-		      draw_info->strmout_buffer_offset;
-
-		radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
-				       draw_info->stride);
-
-		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-				COPY_DATA_DST_SEL(COPY_DATA_REG) |
-				COPY_DATA_WR_CONFIRM);
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
-		radeon_emit(cs, 0); /* unused */
-
-		radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
-	}
-}
-
-static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer,
-			     VkPipelineStageFlags src_stage_mask)
-{
-	if (src_stage_mask & (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
-	                      VK_PIPELINE_STAGE_TRANSFER_BIT |
-	                      VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT |
-	                      VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
-	}
-
-	if (src_stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
-			      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
-			      VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
-			      VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
-			      VK_PIPELINE_STAGE_TRANSFER_BIT |
-			      VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT |
-			      VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT |
-			      VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
-	} else if (src_stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
-	                             VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
-	                             VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
-				     VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
-				     VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
-				     VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
-				     VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)) {
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
-	}
+radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info)
+{
+   struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   int32_t primitive_reset_en;
+
+   /* Draw state. */
+   if (info->chip_class < GFX10) {
+      si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect,
+                                 !!draw_info->strmout_buffer,
+                                 draw_info->indirect ? 0 : draw_info->count);
+   }
+
+   /* Primitive restart. */
+   primitive_reset_en = draw_info->indexed && state->pipeline->graphics.prim_restart_enable;
+
+   if (primitive_reset_en != state->last_primitive_reset_en) {
+      state->last_primitive_reset_en = primitive_reset_en;
+      if (info->chip_class >= GFX9) {
+         radeon_set_uconfig_reg(cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, primitive_reset_en);
+      } else {
+         radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, primitive_reset_en);
+      }
+   }
+
+   if (primitive_reset_en) {
+      uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer);
+
+      if (primitive_reset_index != state->last_primitive_reset_index) {
+         radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, primitive_reset_index);
+         state->last_primitive_reset_index = primitive_reset_index;
+      }
+   }
+
+   if (draw_info->strmout_buffer) {
+      uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo);
+
+      va += draw_info->strmout_buffer->offset + draw_info->strmout_buffer_offset;
+
+      radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride);
+
+      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
+                         COPY_DATA_WR_CONFIRM);
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
+      radeon_emit(cs, 0); /* unused */
+
+      radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
+   }
+}
+
+static void
+radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags src_stage_mask)
+{
+   if (src_stage_mask &
+       (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT |
+        VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
+      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
+   }
+
+   if (src_stage_mask &
+       (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
+        VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
+        VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT |
+        VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
+      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
+   } else if (src_stage_mask &
+              (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
+               VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
+               VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
+               VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
+               VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
+               VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)) {
+      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
+   }
 }
 
 /* Determine if the image is affected by the pipe misaligned metadata issue
  * which requires to invalidate L2.
  */
 static bool
-radv_image_is_pipe_misaligned(const struct radv_device *device,
-			      const struct radv_image *image)
-{
-	struct radeon_info *rad_info = &device->physical_device->rad_info;
-	unsigned log2_samples = util_logbase2(image->info.samples);
-
-	assert(rad_info->chip_class >= GFX10);
-
-	for (unsigned i = 0; i < image->plane_count; ++i) {
-		VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
-		unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
-		unsigned log2_bpp_and_samples;
-
-		if (rad_info->chip_class >= GFX10_3) {
-			log2_bpp_and_samples = log2_bpp + log2_samples;
-		} else {
-			if (vk_format_has_depth(image->vk_format) &&
-			    image->info.array_size >= 8) {
-				log2_bpp = 2;
-			}
-
-			log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
-		}
-
-		unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
-		int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
-
-		if (vk_format_has_depth(image->vk_format)) {
-			if (radv_image_is_tc_compat_htile(image) && overlap) {
-				return true;
-			}
-		} else {
-			unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
-			int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
-			int samples_overlap = MIN2(log2_samples, overlap);
-
-			/* TODO: It shouldn't be necessary if the image has DCC but
-			 * not readable by shader.
-			 */
-			if ((radv_image_has_dcc(image) ||
-			     radv_image_is_tc_compat_cmask(image)) &&
-			    (samples_overlap > log2_samples_frag_diff)) {
-				return true;
-			}
-		}
-	}
-
-	return false;
+radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
+{
+   struct radeon_info *rad_info = &device->physical_device->rad_info;
+   unsigned log2_samples = util_logbase2(image->info.samples);
+
+   assert(rad_info->chip_class >= GFX10);
+
+   for (unsigned i = 0; i < image->plane_count; ++i) {
+      VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
+      unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
+      unsigned log2_bpp_and_samples;
+
+      if (rad_info->chip_class >= GFX10_3) {
+         log2_bpp_and_samples = log2_bpp + log2_samples;
+      } else {
+         if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
+            log2_bpp = 2;
+         }
+
+         log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
+      }
+
+      unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
+      int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
+
+      if (vk_format_has_depth(image->vk_format)) {
+         if (radv_image_is_tc_compat_htile(image) && overlap) {
+            return true;
+         }
+      } else {
+         unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
+         int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
+         int samples_overlap = MIN2(log2_samples, overlap);
+
+         /* TODO: It shouldn't be necessary if the image has DCC but
+          * not readable by shader.
+          */
+         if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
+             (samples_overlap > log2_samples_frag_diff)) {
+            return true;
+         }
+      }
+   }
+
+   return false;
 }
 
 static bool
 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
 {
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		return !device->physical_device->rad_info.tcc_rb_non_coherent &&
-			(image && !radv_image_is_pipe_misaligned(device, image));
-	} else if (device->physical_device->rad_info.chip_class == GFX9 && image) {
-		if (image->info.samples == 1 &&
-		    (image->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
-				     VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
-		    !vk_format_has_stencil(image->vk_format)) {
-			/* Single-sample color and single-sample depth
-			 * (not stencil) are coherent with shaders on
-			 * GFX9.
-			 */
-			return true;
-		}
-	}
-
-	return false;
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      return !device->physical_device->rad_info.tcc_rb_non_coherent &&
+             (image && !radv_image_is_pipe_misaligned(device, image));
+   } else if (device->physical_device->rad_info.chip_class == GFX9 && image) {
+      if (image->info.samples == 1 &&
+          (image->usage &
+           (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
+          !vk_format_has_stencil(image->vk_format)) {
+         /* Single-sample color and single-sample depth
+          * (not stencil) are coherent with shaders on
+          * GFX9.
+          */
+         return true;
+      }
+   }
+
+   return false;
 }
 
 enum radv_cmd_flush_bits
-radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
-		      VkAccessFlags src_flags,
-		      const struct radv_image *image)
-{
-	bool has_CB_meta = true, has_DB_meta = true;
-	bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
-	enum radv_cmd_flush_bits flush_bits = 0;
-
-	if (image) {
-		if (!radv_image_has_CB_metadata(image))
-			has_CB_meta = false;
-		if (!radv_image_has_htile(image))
-			has_DB_meta = false;
-	}
-
-	u_foreach_bit(b, src_flags) {
-		switch ((VkAccessFlagBits)(1 << b)) {
-		case VK_ACCESS_SHADER_WRITE_BIT:
-			/* since the STORAGE bit isn't set we know that this is a meta operation.
-			 * on the dst flush side we skip CB/DB flushes without the STORAGE bit, so
-			 * set it here. */
-			if (image && !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
-				if (vk_format_is_depth_or_stencil(image->vk_format)) {
-					flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-				} else {
-					flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
-				}
-			}
-			if (!image_is_coherent)
-				flush_bits |= RADV_CMD_FLAG_WB_L2;
-			break;
-		case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
-		case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
-			if (!image_is_coherent)
-				flush_bits |= RADV_CMD_FLAG_WB_L2;
-			break;
-		case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
-			flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
-			if (has_CB_meta)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-			break;
-		case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
-			flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-			if (has_DB_meta)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
-			break;
-		case VK_ACCESS_TRANSFER_WRITE_BIT:
-			flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-			              RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-
-			if (!image_is_coherent)
-				flush_bits |= RADV_CMD_FLAG_INV_L2;
-			if (has_CB_meta)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-			if (has_DB_meta)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
-			break;
-		case VK_ACCESS_MEMORY_WRITE_BIT:
-			flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-				      RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-
-			if (!image_is_coherent)
-				flush_bits |= RADV_CMD_FLAG_INV_L2;
-			if (has_CB_meta)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-			if (has_DB_meta)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
-			break;
-		default:
-			break;
-		}
-	}
-	return flush_bits;
+radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flags,
+                      const struct radv_image *image)
+{
+   bool has_CB_meta = true, has_DB_meta = true;
+   bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
+   enum radv_cmd_flush_bits flush_bits = 0;
+
+   if (image) {
+      if (!radv_image_has_CB_metadata(image))
+         has_CB_meta = false;
+      if (!radv_image_has_htile(image))
+         has_DB_meta = false;
+   }
+
+   u_foreach_bit(b, src_flags)
+   {
+      switch ((VkAccessFlagBits)(1 << b)) {
+      case VK_ACCESS_SHADER_WRITE_BIT:
+         /* since the STORAGE bit isn't set we know that this is a meta operation.
+          * on the dst flush side we skip CB/DB flushes without the STORAGE bit, so
+          * set it here. */
+         if (image && !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
+            if (vk_format_is_depth_or_stencil(image->vk_format)) {
+               flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+            } else {
+               flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+            }
+         }
+         if (!image_is_coherent)
+            flush_bits |= RADV_CMD_FLAG_WB_L2;
+         break;
+      case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
+      case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
+         if (!image_is_coherent)
+            flush_bits |= RADV_CMD_FLAG_WB_L2;
+         break;
+      case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+         flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+         if (has_CB_meta)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+         break;
+      case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+         flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+         if (has_DB_meta)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+         break;
+      case VK_ACCESS_TRANSFER_WRITE_BIT:
+         flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+
+         if (!image_is_coherent)
+            flush_bits |= RADV_CMD_FLAG_INV_L2;
+         if (has_CB_meta)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+         if (has_DB_meta)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+         break;
+      case VK_ACCESS_MEMORY_WRITE_BIT:
+         flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+
+         if (!image_is_coherent)
+            flush_bits |= RADV_CMD_FLAG_INV_L2;
+         if (has_CB_meta)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+         if (has_DB_meta)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+         break;
+      default:
+         break;
+      }
+   }
+   return flush_bits;
 }
 
 enum radv_cmd_flush_bits
-radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
-                      VkAccessFlags dst_flags,
+radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flags,
                       const struct radv_image *image)
 {
-	bool has_CB_meta = true, has_DB_meta = true;
-	enum radv_cmd_flush_bits flush_bits = 0;
-	bool flush_CB = true, flush_DB = true;
-	bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
-
-	if (image) {
-		if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
-			flush_CB = false;
-			flush_DB = false;
-		}
-
-		if (!radv_image_has_CB_metadata(image))
-			has_CB_meta = false;
-		if (!radv_image_has_htile(image))
-			has_DB_meta = false;
-	}
-
-	u_foreach_bit(b, dst_flags) {
-		switch ((VkAccessFlagBits)(1 << b)) {
-		case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
-		case VK_ACCESS_INDEX_READ_BIT:
-		case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
-			break;
-		case VK_ACCESS_UNIFORM_READ_BIT:
-			flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
-			break;
-		case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
-		case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
-		case VK_ACCESS_TRANSFER_READ_BIT:
-		case VK_ACCESS_TRANSFER_WRITE_BIT:
-			flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
-
-			if (has_CB_meta || has_DB_meta)
-				flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA;
-			if (!image_is_coherent)
-				flush_bits |= RADV_CMD_FLAG_INV_L2;
-			break;
-		case VK_ACCESS_SHADER_READ_BIT:
-			flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
-			/* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
-			 * invalidate the scalar cache. */
-			if (!cmd_buffer->device->physical_device->use_llvm && !image)
-				flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
-
-			if (has_CB_meta || has_DB_meta)
-				flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA;
-			if (!image_is_coherent)
-				flush_bits |= RADV_CMD_FLAG_INV_L2;
-			break;
-		case VK_ACCESS_SHADER_WRITE_BIT:
-			break;
-		case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
-		case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
-			if (flush_CB)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
-			if (has_CB_meta)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-			break;
-		case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT:
-		case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
-			if (flush_DB)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-			if (has_DB_meta)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
-			break;
-		case VK_ACCESS_MEMORY_READ_BIT:
-		case VK_ACCESS_MEMORY_WRITE_BIT:
-			flush_bits |= RADV_CMD_FLAG_INV_VCACHE |
-				      RADV_CMD_FLAG_INV_SCACHE;
-			if (!image_is_coherent)
-				flush_bits |= RADV_CMD_FLAG_INV_L2;
-			if (flush_CB)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
-			if (has_CB_meta)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-			if (flush_DB)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-			if (has_DB_meta)
-				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
-			break;
-		default:
-			break;
-		}
-	}
-	return flush_bits;
-}
-
-void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
-			  const struct radv_subpass_barrier *barrier)
-{
-	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	if (fb && !fb->imageless) {
-		for (int i = 0; i < fb->attachment_count; ++i) {
-			cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask,
-									      fb->attachments[i]->image);
-		}
-	} else {
-		cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask,
-								      NULL);
-	}
-
-	radv_stage_flush(cmd_buffer, barrier->src_stage_mask);
-
-	if (fb && !fb->imageless) {
-		for (int i = 0; i < fb->attachment_count; ++i) {
-			cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask,
-									      fb->attachments[i]->image);
-		}
-	} else {
-		cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask,
-		                                                      NULL);
-	}
+   bool has_CB_meta = true, has_DB_meta = true;
+   enum radv_cmd_flush_bits flush_bits = 0;
+   bool flush_CB = true, flush_DB = true;
+   bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
+
+   if (image) {
+      if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
+         flush_CB = false;
+         flush_DB = false;
+      }
+
+      if (!radv_image_has_CB_metadata(image))
+         has_CB_meta = false;
+      if (!radv_image_has_htile(image))
+         has_DB_meta = false;
+   }
+
+   u_foreach_bit(b, dst_flags)
+   {
+      switch ((VkAccessFlagBits)(1 << b)) {
+      case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
+      case VK_ACCESS_INDEX_READ_BIT:
+      case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
+         break;
+      case VK_ACCESS_UNIFORM_READ_BIT:
+         flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
+         break;
+      case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
+      case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
+      case VK_ACCESS_TRANSFER_READ_BIT:
+      case VK_ACCESS_TRANSFER_WRITE_BIT:
+         flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
+
+         if (has_CB_meta || has_DB_meta)
+            flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA;
+         if (!image_is_coherent)
+            flush_bits |= RADV_CMD_FLAG_INV_L2;
+         break;
+      case VK_ACCESS_SHADER_READ_BIT:
+         flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
+         /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
+          * invalidate the scalar cache. */
+         if (!cmd_buffer->device->physical_device->use_llvm && !image)
+            flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
+
+         if (has_CB_meta || has_DB_meta)
+            flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA;
+         if (!image_is_coherent)
+            flush_bits |= RADV_CMD_FLAG_INV_L2;
+         break;
+      case VK_ACCESS_SHADER_WRITE_BIT:
+         break;
+      case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
+      case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+         if (flush_CB)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+         if (has_CB_meta)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+         break;
+      case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT:
+      case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+         if (flush_DB)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+         if (has_DB_meta)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+         break;
+      case VK_ACCESS_MEMORY_READ_BIT:
+      case VK_ACCESS_MEMORY_WRITE_BIT:
+         flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
+         if (!image_is_coherent)
+            flush_bits |= RADV_CMD_FLAG_INV_L2;
+         if (flush_CB)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+         if (has_CB_meta)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+         if (flush_DB)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+         if (has_DB_meta)
+            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+         break;
+      default:
+         break;
+      }
+   }
+   return flush_bits;
+}
+
+void
+radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass_barrier *barrier)
+{
+   struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+   if (fb && !fb->imageless) {
+      for (int i = 0; i < fb->attachment_count; ++i) {
+         cmd_buffer->state.flush_bits |=
+            radv_src_access_flush(cmd_buffer, barrier->src_access_mask, fb->attachments[i]->image);
+      }
+   } else {
+      cmd_buffer->state.flush_bits |=
+         radv_src_access_flush(cmd_buffer, barrier->src_access_mask, NULL);
+   }
+
+   radv_stage_flush(cmd_buffer, barrier->src_stage_mask);
+
+   if (fb && !fb->imageless) {
+      for (int i = 0; i < fb->attachment_count; ++i) {
+         cmd_buffer->state.flush_bits |=
+            radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, fb->attachments[i]->image);
+      }
+   } else {
+      cmd_buffer->state.flush_bits |=
+         radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, NULL);
+   }
 }
 
 uint32_t
 radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	uint32_t subpass_id = state->subpass - state->pass->subpasses;
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   uint32_t subpass_id = state->subpass - state->pass->subpasses;
 
-	/* The id of this subpass shouldn't exceed the number of subpasses in
-	 * this render pass minus 1.
-	 */
-	assert(subpass_id < state->pass->subpass_count);
-	return subpass_id;
+   /* The id of this subpass shouldn't exceed the number of subpasses in
+    * this render pass minus 1.
+    */
+   assert(subpass_id < state->pass->subpass_count);
+   return subpass_id;
 }
 
 static struct radv_sample_locations_state *
-radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer,
-				     uint32_t att_idx,
-				     bool begin_subpass)
-{
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
-	struct radv_image_view *view = state->attachments[att_idx].iview;
-
-	if (view->image->info.samples == 1)
-		return NULL;
-
-	if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) {
-		/* Return the initial sample locations if this is the initial
-		 * layout transition of the given subpass attachemnt.
-		 */
-		if (state->attachments[att_idx].sample_location.count > 0)
-			return &state->attachments[att_idx].sample_location;
-	} else {
-		/* Otherwise return the subpass sample locations if defined. */
-		if (state->subpass_sample_locs) {
-			/* Because the driver sets the current subpass before
-			 * initial layout transitions, we should use the sample
-			 * locations from the previous subpass to avoid an
-			 * off-by-one problem. Otherwise, use the sample
-			 * locations for the current subpass for final layout
-			 * transitions.
-			 */
-			if (begin_subpass)
-				subpass_id--;
-
-			for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
-				if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
-					return &state->subpass_sample_locs[i].sample_location;
-			}
-		}
-	}
-
-	return NULL;
-}
-
-static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
-						 struct radv_subpass_attachment att,
-						 bool begin_subpass)
-{
-	unsigned idx = att.attachment;
-	struct radv_image_view *view = cmd_buffer->state.attachments[idx].iview;
-	struct radv_sample_locations_state *sample_locs;
-	VkImageSubresourceRange range;
-	range.aspectMask = view->aspect_mask;
-	range.baseMipLevel = view->base_mip;
-	range.levelCount = 1;
-	range.baseArrayLayer = view->base_layer;
-	range.layerCount = cmd_buffer->state.framebuffer->layers;
-
-	if (cmd_buffer->state.subpass->view_mask) {
-		/* If the current subpass uses multiview, the driver might have
-		 * performed a fast color/depth clear to the whole image
-		 * (including all layers). To make sure the driver will
-		 * decompress the image correctly (if needed), we have to
-		 * account for the "real" number of layers. If the view mask is
-		 * sparse, this will decompress more layers than needed.
-		 */
-		range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
-	}
-
-	/* Get the subpass sample locations for the given attachment, if NULL
-	 * is returned the driver will use the default HW locations.
-	 */
-	sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx,
-							   begin_subpass);
-
-	/* Determine if the subpass uses separate depth/stencil layouts. */
-	bool uses_separate_depth_stencil_layouts = false;
-	if ((cmd_buffer->state.attachments[idx].current_layout !=
-	     cmd_buffer->state.attachments[idx].current_stencil_layout) ||
-	    (att.layout != att.stencil_layout)) {
-		uses_separate_depth_stencil_layouts = true;
-	}
-
-	/* For separate layouts, perform depth and stencil transitions
-	 * separately.
-	 */
-	if (uses_separate_depth_stencil_layouts &&
-	    (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT |
-				  VK_IMAGE_ASPECT_STENCIL_BIT))) {
-		/* Depth-only transitions. */
-		range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
-		radv_handle_image_transition(cmd_buffer,
-					     view->image,
-					     cmd_buffer->state.attachments[idx].current_layout,
-					     cmd_buffer->state.attachments[idx].current_in_render_loop,
-					     att.layout, att.in_render_loop,
-					     0, 0, &range, sample_locs);
-
-		/* Stencil-only transitions. */
-		range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
-		radv_handle_image_transition(cmd_buffer,
-					     view->image,
-					     cmd_buffer->state.attachments[idx].current_stencil_layout,
-					     cmd_buffer->state.attachments[idx].current_in_render_loop,
-					     att.stencil_layout, att.in_render_loop,
-					     0, 0, &range, sample_locs);
-	} else {
-		radv_handle_image_transition(cmd_buffer,
-					     view->image,
-					     cmd_buffer->state.attachments[idx].current_layout,
-					     cmd_buffer->state.attachments[idx].current_in_render_loop,
-					     att.layout, att.in_render_loop,
-					     0, 0, &range, sample_locs);
-	}
-
-	cmd_buffer->state.attachments[idx].current_layout = att.layout;
-	cmd_buffer->state.attachments[idx].current_stencil_layout = att.stencil_layout;
-	cmd_buffer->state.attachments[idx].current_in_render_loop = att.in_render_loop;
-
+radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer, uint32_t att_idx,
+                                     bool begin_subpass)
+{
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+   struct radv_image_view *view = state->attachments[att_idx].iview;
+
+   if (view->image->info.samples == 1)
+      return NULL;
+
+   if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) {
+      /* Return the initial sample locations if this is the initial
+       * layout transition of the given subpass attachemnt.
+       */
+      if (state->attachments[att_idx].sample_location.count > 0)
+         return &state->attachments[att_idx].sample_location;
+   } else {
+      /* Otherwise return the subpass sample locations if defined. */
+      if (state->subpass_sample_locs) {
+         /* Because the driver sets the current subpass before
+          * initial layout transitions, we should use the sample
+          * locations from the previous subpass to avoid an
+          * off-by-one problem. Otherwise, use the sample
+          * locations for the current subpass for final layout
+          * transitions.
+          */
+         if (begin_subpass)
+            subpass_id--;
+
+         for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
+            if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
+               return &state->subpass_sample_locs[i].sample_location;
+         }
+      }
+   }
+
+   return NULL;
+}
 
+static void
+radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
+                                     struct radv_subpass_attachment att, bool begin_subpass)
+{
+   unsigned idx = att.attachment;
+   struct radv_image_view *view = cmd_buffer->state.attachments[idx].iview;
+   struct radv_sample_locations_state *sample_locs;
+   VkImageSubresourceRange range;
+   range.aspectMask = view->aspect_mask;
+   range.baseMipLevel = view->base_mip;
+   range.levelCount = 1;
+   range.baseArrayLayer = view->base_layer;
+   range.layerCount = cmd_buffer->state.framebuffer->layers;
+
+   if (cmd_buffer->state.subpass->view_mask) {
+      /* If the current subpass uses multiview, the driver might have
+       * performed a fast color/depth clear to the whole image
+       * (including all layers). To make sure the driver will
+       * decompress the image correctly (if needed), we have to
+       * account for the "real" number of layers. If the view mask is
+       * sparse, this will decompress more layers than needed.
+       */
+      range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
+   }
+
+   /* Get the subpass sample locations for the given attachment, if NULL
+    * is returned the driver will use the default HW locations.
+    */
+   sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx, begin_subpass);
+
+   /* Determine if the subpass uses separate depth/stencil layouts. */
+   bool uses_separate_depth_stencil_layouts = false;
+   if ((cmd_buffer->state.attachments[idx].current_layout !=
+        cmd_buffer->state.attachments[idx].current_stencil_layout) ||
+       (att.layout != att.stencil_layout)) {
+      uses_separate_depth_stencil_layouts = true;
+   }
+
+   /* For separate layouts, perform depth and stencil transitions
+    * separately.
+    */
+   if (uses_separate_depth_stencil_layouts &&
+       (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
+      /* Depth-only transitions. */
+      range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+      radv_handle_image_transition(cmd_buffer, view->image,
+                                   cmd_buffer->state.attachments[idx].current_layout,
+                                   cmd_buffer->state.attachments[idx].current_in_render_loop,
+                                   att.layout, att.in_render_loop, 0, 0, &range, sample_locs);
+
+      /* Stencil-only transitions. */
+      range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
+      radv_handle_image_transition(
+         cmd_buffer, view->image, cmd_buffer->state.attachments[idx].current_stencil_layout,
+         cmd_buffer->state.attachments[idx].current_in_render_loop, att.stencil_layout,
+         att.in_render_loop, 0, 0, &range, sample_locs);
+   } else {
+      radv_handle_image_transition(cmd_buffer, view->image,
+                                   cmd_buffer->state.attachments[idx].current_layout,
+                                   cmd_buffer->state.attachments[idx].current_in_render_loop,
+                                   att.layout, att.in_render_loop, 0, 0, &range, sample_locs);
+   }
+
+   cmd_buffer->state.attachments[idx].current_layout = att.layout;
+   cmd_buffer->state.attachments[idx].current_stencil_layout = att.stencil_layout;
+   cmd_buffer->state.attachments[idx].current_in_render_loop = att.in_render_loop;
 }
 
 void
-radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
-			    const struct radv_subpass *subpass)
+radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass *subpass)
 {
-	cmd_buffer->state.subpass = subpass;
+   cmd_buffer->state.subpass = subpass;
 
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
+   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
 }
 
 static VkResult
 radv_cmd_state_setup_sample_locations(struct radv_cmd_buffer *cmd_buffer,
-				      struct radv_render_pass *pass,
-				      const VkRenderPassBeginInfo *info)
-{
-	const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs =
-		vk_find_struct_const(info->pNext,
-				     RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT);
-	struct radv_cmd_state *state = &cmd_buffer->state;
-
-	if (!sample_locs) {
-		state->subpass_sample_locs = NULL;
-		return VK_SUCCESS;
-	}
-
-	for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) {
-		const VkAttachmentSampleLocationsEXT *att_sample_locs =
-			&sample_locs->pAttachmentInitialSampleLocations[i];
-		uint32_t att_idx = att_sample_locs->attachmentIndex;
-		struct radv_image *image = cmd_buffer->state.attachments[att_idx].iview->image;
-
-		assert(vk_format_is_depth_or_stencil(image->vk_format));
-
-		/* From the Vulkan spec 1.1.108:
-		 *
-		 * "If the image referenced by the framebuffer attachment at
-		 *  index attachmentIndex was not created with
-		 *  VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT
-		 *  then the values specified in sampleLocationsInfo are
-		 *  ignored."
-		 */
-		if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT))
-			continue;
-
-		const VkSampleLocationsInfoEXT *sample_locs_info =
-			&att_sample_locs->sampleLocationsInfo;
-
-		state->attachments[att_idx].sample_location.per_pixel =
-			sample_locs_info->sampleLocationsPerPixel;
-		state->attachments[att_idx].sample_location.grid_size =
-			sample_locs_info->sampleLocationGridSize;
-		state->attachments[att_idx].sample_location.count =
-			sample_locs_info->sampleLocationsCount;
-		typed_memcpy(&state->attachments[att_idx].sample_location.locations[0],
-			     sample_locs_info->pSampleLocations,
-			     sample_locs_info->sampleLocationsCount);
-	}
-
-	state->subpass_sample_locs = vk_alloc(&cmd_buffer->pool->alloc,
-					      sample_locs->postSubpassSampleLocationsCount *
-					      sizeof(state->subpass_sample_locs[0]),
-					      8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (state->subpass_sample_locs == NULL) {
-		cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
-		return cmd_buffer->record_result;
-	}
-
-	state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount;
-
-	for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) {
-		const VkSubpassSampleLocationsEXT *subpass_sample_locs_info =
-			&sample_locs->pPostSubpassSampleLocations[i];
-		const VkSampleLocationsInfoEXT *sample_locs_info =
-			&subpass_sample_locs_info->sampleLocationsInfo;
-
-		state->subpass_sample_locs[i].subpass_idx =
-			subpass_sample_locs_info->subpassIndex;
-		state->subpass_sample_locs[i].sample_location.per_pixel =
-			sample_locs_info->sampleLocationsPerPixel;
-		state->subpass_sample_locs[i].sample_location.grid_size =
-			sample_locs_info->sampleLocationGridSize;
-		state->subpass_sample_locs[i].sample_location.count =
-			sample_locs_info->sampleLocationsCount;
-		typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0],
-			     sample_locs_info->pSampleLocations,
-			     sample_locs_info->sampleLocationsCount);
-	}
-
-	return VK_SUCCESS;
+                                      struct radv_render_pass *pass,
+                                      const VkRenderPassBeginInfo *info)
+{
+   const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs =
+      vk_find_struct_const(info->pNext, RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+
+   if (!sample_locs) {
+      state->subpass_sample_locs = NULL;
+      return VK_SUCCESS;
+   }
+
+   for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) {
+      const VkAttachmentSampleLocationsEXT *att_sample_locs =
+         &sample_locs->pAttachmentInitialSampleLocations[i];
+      uint32_t att_idx = att_sample_locs->attachmentIndex;
+      struct radv_image *image = cmd_buffer->state.attachments[att_idx].iview->image;
+
+      assert(vk_format_is_depth_or_stencil(image->vk_format));
+
+      /* From the Vulkan spec 1.1.108:
+       *
+       * "If the image referenced by the framebuffer attachment at
+       *  index attachmentIndex was not created with
+       *  VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT
+       *  then the values specified in sampleLocationsInfo are
+       *  ignored."
+       */
+      if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT))
+         continue;
+
+      const VkSampleLocationsInfoEXT *sample_locs_info = &att_sample_locs->sampleLocationsInfo;
+
+      state->attachments[att_idx].sample_location.per_pixel =
+         sample_locs_info->sampleLocationsPerPixel;
+      state->attachments[att_idx].sample_location.grid_size =
+         sample_locs_info->sampleLocationGridSize;
+      state->attachments[att_idx].sample_location.count = sample_locs_info->sampleLocationsCount;
+      typed_memcpy(&state->attachments[att_idx].sample_location.locations[0],
+                   sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount);
+   }
+
+   state->subpass_sample_locs =
+      vk_alloc(&cmd_buffer->pool->alloc,
+               sample_locs->postSubpassSampleLocationsCount * sizeof(state->subpass_sample_locs[0]),
+               8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (state->subpass_sample_locs == NULL) {
+      cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      return cmd_buffer->record_result;
+   }
+
+   state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount;
+
+   for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) {
+      const VkSubpassSampleLocationsEXT *subpass_sample_locs_info =
+         &sample_locs->pPostSubpassSampleLocations[i];
+      const VkSampleLocationsInfoEXT *sample_locs_info =
+         &subpass_sample_locs_info->sampleLocationsInfo;
+
+      state->subpass_sample_locs[i].subpass_idx = subpass_sample_locs_info->subpassIndex;
+      state->subpass_sample_locs[i].sample_location.per_pixel =
+         sample_locs_info->sampleLocationsPerPixel;
+      state->subpass_sample_locs[i].sample_location.grid_size =
+         sample_locs_info->sampleLocationGridSize;
+      state->subpass_sample_locs[i].sample_location.count = sample_locs_info->sampleLocationsCount;
+      typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0],
+                   sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount);
+   }
+
+   return VK_SUCCESS;
 }
 
 static VkResult
-radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
-				 struct radv_render_pass *pass,
-				 const VkRenderPassBeginInfo *info,
-				 const struct radv_extra_render_pass_begin_info *extra)
-{
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	const struct VkRenderPassAttachmentBeginInfo *attachment_info = NULL;
-
-	if (info) {
-		attachment_info = vk_find_struct_const(info->pNext,
-		                                       RENDER_PASS_ATTACHMENT_BEGIN_INFO);
-	}
-
-
-	if (pass->attachment_count == 0) {
-		state->attachments = NULL;
-		return VK_SUCCESS;
-	}
-
-	state->attachments = vk_alloc(&cmd_buffer->pool->alloc,
-					pass->attachment_count *
-					sizeof(state->attachments[0]),
-					8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (state->attachments == NULL) {
-		cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
-		return cmd_buffer->record_result;
-	}
-
-	for (uint32_t i = 0; i < pass->attachment_count; ++i) {
-		struct radv_render_pass_attachment *att = &pass->attachments[i];
-		VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
-		VkImageAspectFlags clear_aspects = 0;
-
-		if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
-			/* color attachment */
-			if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
-				clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
-			}
-		} else {
-			/* depthstencil attachment */
-			if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
-			    att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
-				clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
-				if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
-				    att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
-					clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
-			}
-			if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
-			    att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
-				clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
-			}
-		}
-
-		state->attachments[i].pending_clear_aspects = clear_aspects;
-		state->attachments[i].cleared_views = 0;
-		if (clear_aspects && info) {
-			assert(info->clearValueCount > i);
-			state->attachments[i].clear_value = info->pClearValues[i];
-		}
-
-		state->attachments[i].current_layout = att->initial_layout;
-		state->attachments[i].current_in_render_loop = false;
-		state->attachments[i].current_stencil_layout = att->stencil_initial_layout;
-		state->attachments[i].disable_dcc = extra && extra->disable_dcc;
-		state->attachments[i].sample_location.count = 0;
-
-		struct radv_image_view *iview;
-		if (attachment_info && attachment_info->attachmentCount > i) {
-			iview = radv_image_view_from_handle(attachment_info->pAttachments[i]);
-		} else {
-			iview = state->framebuffer->attachments[i];
-		}
-
-		state->attachments[i].iview = iview;
-		if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
-			radv_initialise_ds_surface(cmd_buffer->device, &state->attachments[i].ds, iview);
-		} else {
-			radv_initialise_color_surface(cmd_buffer->device, &state->attachments[i].cb, iview);
-		}
-	}
-
-	return VK_SUCCESS;
-}
-
-VkResult radv_AllocateCommandBuffers(
-	VkDevice _device,
-	const VkCommandBufferAllocateInfo *pAllocateInfo,
-	VkCommandBuffer *pCommandBuffers)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_cmd_pool, pool, pAllocateInfo->commandPool);
-
-	VkResult result = VK_SUCCESS;
-	uint32_t i;
-
-	for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
-
-		if (!list_is_empty(&pool->free_cmd_buffers)) {
-			struct radv_cmd_buffer *cmd_buffer = list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link);
-
-			list_del(&cmd_buffer->pool_link);
-			list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
-
-			result = radv_reset_cmd_buffer(cmd_buffer);
-			cmd_buffer->level = pAllocateInfo->level;
-			vk_object_base_reset(&cmd_buffer->base);
-
-			pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer);
-		} else {
-			result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level,
-			                                &pCommandBuffers[i]);
-		}
-		if (result != VK_SUCCESS)
-			break;
-	}
-
-	if (result != VK_SUCCESS) {
-		radv_FreeCommandBuffers(_device, pAllocateInfo->commandPool,
-					i, pCommandBuffers);
-
-		/* From the Vulkan 1.0.66 spec:
-		 *
-		 * "vkAllocateCommandBuffers can be used to create multiple
-		 *  command buffers. If the creation of any of those command
-		 *  buffers fails, the implementation must destroy all
-		 *  successfully created command buffer objects from this
-		 *  command, set all entries of the pCommandBuffers array to
-		 *  NULL and return the error."
-		 */
-		memset(pCommandBuffers, 0,
-		       sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount);
-	}
-
-	return result;
-}
-
-void radv_FreeCommandBuffers(
-	VkDevice device,
-	VkCommandPool commandPool,
-	uint32_t commandBufferCount,
-	const VkCommandBuffer *pCommandBuffers)
-{
-	for (uint32_t i = 0; i < commandBufferCount; i++) {
-		RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
-
-		if (cmd_buffer) {
-			if (cmd_buffer->pool) {
-				list_del(&cmd_buffer->pool_link);
-				list_addtail(&cmd_buffer->pool_link, &cmd_buffer->pool->free_cmd_buffers);
-			} else
-				radv_destroy_cmd_buffer(cmd_buffer);
-
-		}
-	}
-}
-
-VkResult radv_ResetCommandBuffer(
-	VkCommandBuffer commandBuffer,
-	VkCommandBufferResetFlags flags)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	return radv_reset_cmd_buffer(cmd_buffer);
-}
-
-VkResult radv_BeginCommandBuffer(
-	VkCommandBuffer commandBuffer,
-	const VkCommandBufferBeginInfo *pBeginInfo)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	VkResult result = VK_SUCCESS;
-
-	if (cmd_buffer->status != RADV_CMD_BUFFER_STATUS_INITIAL) {
-		/* If the command buffer has already been resetted with
-		 * vkResetCommandBuffer, no need to do it again.
-		 */
-		result = radv_reset_cmd_buffer(cmd_buffer);
-		if (result != VK_SUCCESS)
-			return result;
-	}
-
-	memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
-	cmd_buffer->state.last_primitive_reset_en = -1;
-	cmd_buffer->state.last_index_type = -1;
-	cmd_buffer->state.last_num_instances = -1;
-	cmd_buffer->state.last_vertex_offset = -1;
-	cmd_buffer->state.last_first_instance = -1;
-	cmd_buffer->state.last_drawid = -1;
-	cmd_buffer->state.predication_type = -1;
-	cmd_buffer->state.last_sx_ps_downconvert = -1;
-	cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
-	cmd_buffer->state.last_sx_blend_opt_control = -1;
-	cmd_buffer->usage_flags = pBeginInfo->flags;
-
-	if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
-	    (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
-		assert(pBeginInfo->pInheritanceInfo);
-		cmd_buffer->state.framebuffer = radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
-		cmd_buffer->state.pass = radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
-
-		struct radv_subpass *subpass =
-			&cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
-
-		if (cmd_buffer->state.framebuffer) {
-			result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL, NULL);
-			if (result != VK_SUCCESS)
-				return result;
-		}
-
-		cmd_buffer->state.inherited_pipeline_statistics =
-			pBeginInfo->pInheritanceInfo->pipelineStatistics;
-
-		radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
-	}
-
-	if (unlikely(cmd_buffer->device->trace_bo))
-		radv_cmd_buffer_trace_emit(cmd_buffer);
-
-	radv_describe_begin_cmd_buffer(cmd_buffer);
-
-	cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING;
-
-	return result;
-}
-
-void radv_CmdBindVertexBuffers(
-        VkCommandBuffer                             commandBuffer,
-        uint32_t                                    firstBinding,
-        uint32_t                                    bindingCount,
-        const VkBuffer*                             pBuffers,
-        const VkDeviceSize*                         pOffsets)
-{
-	radv_CmdBindVertexBuffers2EXT(commandBuffer, firstBinding,
-				      bindingCount, pBuffers, pOffsets,
-				      NULL, NULL);
-}
-
-void radv_CmdBindVertexBuffers2EXT(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    firstBinding,
-	uint32_t                                    bindingCount,
-	const VkBuffer*                             pBuffers,
-	const VkDeviceSize*                         pOffsets,
-	const VkDeviceSize*                         pSizes,
-	const VkDeviceSize*                         pStrides)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings;
-	bool changed = false;
-
-	/* We have to defer setting up vertex buffer since we need the buffer
-	 * stride from the pipeline. */
-
-	assert(firstBinding + bindingCount <= MAX_VBS);
-	for (uint32_t i = 0; i < bindingCount; i++) {
-		RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
-		uint32_t idx = firstBinding + i;
-		VkDeviceSize size = pSizes ? pSizes[i] : 0;
-		VkDeviceSize stride = pStrides ? pStrides[i] : 0;
-
-		/* pSizes and pStrides are optional. */
-		if (!changed &&
-		    (vb[idx].buffer != buffer ||
-		     vb[idx].offset != pOffsets[i] ||
-		     vb[idx].size != size ||
-		     vb[idx].stride != stride)) {
-			changed = true;
-		}
-
-		vb[idx].buffer = buffer;
-		vb[idx].offset = pOffsets[i];
-		vb[idx].size = size;
-		vb[idx].stride = stride;
-
-		if (buffer) {
-			radv_cs_add_buffer(cmd_buffer->device->ws,
-					   cmd_buffer->cs, vb[idx].buffer->bo);
-		}
-	}
-
-	if (!changed) {
-		/* No state changes. */
-		return;
-	}
-
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER;
+radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer, struct radv_render_pass *pass,
+                                 const VkRenderPassBeginInfo *info,
+                                 const struct radv_extra_render_pass_begin_info *extra)
+{
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   const struct VkRenderPassAttachmentBeginInfo *attachment_info = NULL;
+
+   if (info) {
+      attachment_info = vk_find_struct_const(info->pNext, RENDER_PASS_ATTACHMENT_BEGIN_INFO);
+   }
+
+   if (pass->attachment_count == 0) {
+      state->attachments = NULL;
+      return VK_SUCCESS;
+   }
+
+   state->attachments =
+      vk_alloc(&cmd_buffer->pool->alloc, pass->attachment_count * sizeof(state->attachments[0]), 8,
+               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (state->attachments == NULL) {
+      cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      return cmd_buffer->record_result;
+   }
+
+   for (uint32_t i = 0; i < pass->attachment_count; ++i) {
+      struct radv_render_pass_attachment *att = &pass->attachments[i];
+      VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
+      VkImageAspectFlags clear_aspects = 0;
+
+      if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+         /* color attachment */
+         if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+            clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
+         }
+      } else {
+         /* depthstencil attachment */
+         if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+             att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+            clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+            if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+                att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
+               clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+         }
+         if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+             att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+            clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+         }
+      }
+
+      state->attachments[i].pending_clear_aspects = clear_aspects;
+      state->attachments[i].cleared_views = 0;
+      if (clear_aspects && info) {
+         assert(info->clearValueCount > i);
+         state->attachments[i].clear_value = info->pClearValues[i];
+      }
+
+      state->attachments[i].current_layout = att->initial_layout;
+      state->attachments[i].current_in_render_loop = false;
+      state->attachments[i].current_stencil_layout = att->stencil_initial_layout;
+      state->attachments[i].disable_dcc = extra && extra->disable_dcc;
+      state->attachments[i].sample_location.count = 0;
+
+      struct radv_image_view *iview;
+      if (attachment_info && attachment_info->attachmentCount > i) {
+         iview = radv_image_view_from_handle(attachment_info->pAttachments[i]);
+      } else {
+         iview = state->framebuffer->attachments[i];
+      }
+
+      state->attachments[i].iview = iview;
+      if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+         radv_initialise_ds_surface(cmd_buffer->device, &state->attachments[i].ds, iview);
+      } else {
+         radv_initialise_color_surface(cmd_buffer->device, &state->attachments[i].cb, iview);
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+VkResult
+radv_AllocateCommandBuffers(VkDevice _device, const VkCommandBufferAllocateInfo *pAllocateInfo,
+                            VkCommandBuffer *pCommandBuffers)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_cmd_pool, pool, pAllocateInfo->commandPool);
+
+   VkResult result = VK_SUCCESS;
+   uint32_t i;
+
+   for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
+
+      if (!list_is_empty(&pool->free_cmd_buffers)) {
+         struct radv_cmd_buffer *cmd_buffer =
+            list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link);
+
+         list_del(&cmd_buffer->pool_link);
+         list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
+
+         result = radv_reset_cmd_buffer(cmd_buffer);
+         cmd_buffer->level = pAllocateInfo->level;
+         vk_object_base_reset(&cmd_buffer->base);
+
+         pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer);
+      } else {
+         result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, &pCommandBuffers[i]);
+      }
+      if (result != VK_SUCCESS)
+         break;
+   }
+
+   if (result != VK_SUCCESS) {
+      radv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, pCommandBuffers);
+
+      /* From the Vulkan 1.0.66 spec:
+       *
+       * "vkAllocateCommandBuffers can be used to create multiple
+       *  command buffers. If the creation of any of those command
+       *  buffers fails, the implementation must destroy all
+       *  successfully created command buffer objects from this
+       *  command, set all entries of the pCommandBuffers array to
+       *  NULL and return the error."
+       */
+      memset(pCommandBuffers, 0, sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount);
+   }
+
+   return result;
+}
+
+void
+radv_FreeCommandBuffers(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount,
+                        const VkCommandBuffer *pCommandBuffers)
+{
+   for (uint32_t i = 0; i < commandBufferCount; i++) {
+      RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
+
+      if (cmd_buffer) {
+         if (cmd_buffer->pool) {
+            list_del(&cmd_buffer->pool_link);
+            list_addtail(&cmd_buffer->pool_link, &cmd_buffer->pool->free_cmd_buffers);
+         } else
+            radv_destroy_cmd_buffer(cmd_buffer);
+      }
+   }
+}
+
+VkResult
+radv_ResetCommandBuffer(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   return radv_reset_cmd_buffer(cmd_buffer);
+}
+
+VkResult
+radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   VkResult result = VK_SUCCESS;
+
+   if (cmd_buffer->status != RADV_CMD_BUFFER_STATUS_INITIAL) {
+      /* If the command buffer has already been resetted with
+       * vkResetCommandBuffer, no need to do it again.
+       */
+      result = radv_reset_cmd_buffer(cmd_buffer);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
+   cmd_buffer->state.last_primitive_reset_en = -1;
+   cmd_buffer->state.last_index_type = -1;
+   cmd_buffer->state.last_num_instances = -1;
+   cmd_buffer->state.last_vertex_offset = -1;
+   cmd_buffer->state.last_first_instance = -1;
+   cmd_buffer->state.last_drawid = -1;
+   cmd_buffer->state.predication_type = -1;
+   cmd_buffer->state.last_sx_ps_downconvert = -1;
+   cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
+   cmd_buffer->state.last_sx_blend_opt_control = -1;
+   cmd_buffer->usage_flags = pBeginInfo->flags;
+
+   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
+       (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
+      assert(pBeginInfo->pInheritanceInfo);
+      cmd_buffer->state.framebuffer =
+         radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
+      cmd_buffer->state.pass =
+         radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
+
+      struct radv_subpass *subpass =
+         &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
+
+      if (cmd_buffer->state.framebuffer) {
+         result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL, NULL);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      cmd_buffer->state.inherited_pipeline_statistics =
+         pBeginInfo->pInheritanceInfo->pipelineStatistics;
+
+      radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+   }
+
+   if (unlikely(cmd_buffer->device->trace_bo))
+      radv_cmd_buffer_trace_emit(cmd_buffer);
+
+   radv_describe_begin_cmd_buffer(cmd_buffer);
+
+   cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING;
+
+   return result;
+}
+
+void
+radv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding,
+                          uint32_t bindingCount, const VkBuffer *pBuffers,
+                          const VkDeviceSize *pOffsets)
+{
+   radv_CmdBindVertexBuffers2EXT(commandBuffer, firstBinding, bindingCount, pBuffers, pOffsets,
+                                 NULL, NULL);
+}
+
+void
+radv_CmdBindVertexBuffers2EXT(VkCommandBuffer commandBuffer, uint32_t firstBinding,
+                              uint32_t bindingCount, const VkBuffer *pBuffers,
+                              const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
+                              const VkDeviceSize *pStrides)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings;
+   bool changed = false;
+
+   /* We have to defer setting up vertex buffer since we need the buffer
+    * stride from the pipeline. */
+
+   assert(firstBinding + bindingCount <= MAX_VBS);
+   for (uint32_t i = 0; i < bindingCount; i++) {
+      RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
+      uint32_t idx = firstBinding + i;
+      VkDeviceSize size = pSizes ? pSizes[i] : 0;
+      VkDeviceSize stride = pStrides ? pStrides[i] : 0;
+
+      /* pSizes and pStrides are optional. */
+      if (!changed && (vb[idx].buffer != buffer || vb[idx].offset != pOffsets[i] ||
+                       vb[idx].size != size || vb[idx].stride != stride)) {
+         changed = true;
+      }
+
+      vb[idx].buffer = buffer;
+      vb[idx].offset = pOffsets[i];
+      vb[idx].size = size;
+      vb[idx].stride = stride;
+
+      if (buffer) {
+         radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, vb[idx].buffer->bo);
+      }
+   }
+
+   if (!changed) {
+      /* No state changes. */
+      return;
+   }
+
+   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER;
 }
 
 static uint32_t
 vk_to_index_type(VkIndexType type)
 {
-	switch (type) {
-	case VK_INDEX_TYPE_UINT8_EXT:
-		return V_028A7C_VGT_INDEX_8;
-	case VK_INDEX_TYPE_UINT16:
-		return V_028A7C_VGT_INDEX_16;
-	case VK_INDEX_TYPE_UINT32:
-		return V_028A7C_VGT_INDEX_32;
-	default:
-		unreachable("invalid index type");
-	}
+   switch (type) {
+   case VK_INDEX_TYPE_UINT8_EXT:
+      return V_028A7C_VGT_INDEX_8;
+   case VK_INDEX_TYPE_UINT16:
+      return V_028A7C_VGT_INDEX_16;
+   case VK_INDEX_TYPE_UINT32:
+      return V_028A7C_VGT_INDEX_32;
+   default:
+      unreachable("invalid index type");
+   }
 }
 
 static uint32_t
 radv_get_vgt_index_size(uint32_t type)
 {
-	switch (type) {
-	case V_028A7C_VGT_INDEX_8:
-		return 1;
-	case V_028A7C_VGT_INDEX_16:
-		return 2;
-	case V_028A7C_VGT_INDEX_32:
-		return 4;
-	default:
-		unreachable("invalid index type");
-	}
+   switch (type) {
+   case V_028A7C_VGT_INDEX_8:
+      return 1;
+   case V_028A7C_VGT_INDEX_16:
+      return 2;
+   case V_028A7C_VGT_INDEX_32:
+      return 4;
+   default:
+      unreachable("invalid index type");
+   }
 }
 
-void radv_CmdBindIndexBuffer(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer buffer,
-	VkDeviceSize offset,
-	VkIndexType indexType)
+void
+radv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+                        VkIndexType indexType)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer);
 
-	if (cmd_buffer->state.index_buffer == index_buffer &&
-	    cmd_buffer->state.index_offset == offset &&
-	    cmd_buffer->state.index_type == indexType) {
-		/* No state changes. */
-		return;
-	}
+   if (cmd_buffer->state.index_buffer == index_buffer && cmd_buffer->state.index_offset == offset &&
+       cmd_buffer->state.index_type == indexType) {
+      /* No state changes. */
+      return;
+   }
 
-	cmd_buffer->state.index_buffer = index_buffer;
-	cmd_buffer->state.index_offset = offset;
-	cmd_buffer->state.index_type = vk_to_index_type(indexType);
-	cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo);
-	cmd_buffer->state.index_va += index_buffer->offset + offset;
+   cmd_buffer->state.index_buffer = index_buffer;
+   cmd_buffer->state.index_offset = offset;
+   cmd_buffer->state.index_type = vk_to_index_type(indexType);
+   cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo);
+   cmd_buffer->state.index_va += index_buffer->offset + offset;
 
-	int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType));
-	cmd_buffer->state.max_index_count = (index_buffer->size - offset) / index_size;
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
-	radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo);
+   int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType));
+   cmd_buffer->state.max_index_count = (index_buffer->size - offset) / index_size;
+   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
+   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo);
 }
 
-
 static void
-radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
-			 VkPipelineBindPoint bind_point,
-			 struct radv_descriptor_set *set, unsigned idx)
-{
-	struct radeon_winsys *ws = cmd_buffer->device->ws;
-
-	radv_set_descriptor_set(cmd_buffer, bind_point, set, idx);
-
-	assert(set);
-
-	if (!cmd_buffer->device->use_global_bo_list) {
-		for (unsigned j = 0; j < set->header.buffer_count; ++j)
-			if (set->descriptors[j])
-				radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j]);
-	}
-
-	if(set->header.bo)
-		radv_cs_add_buffer(ws, cmd_buffer->cs, set->header.bo);
-}
-
-void radv_CmdBindDescriptorSets(
-	VkCommandBuffer                             commandBuffer,
-	VkPipelineBindPoint                         pipelineBindPoint,
-	VkPipelineLayout                            _layout,
-	uint32_t                                    firstSet,
-	uint32_t                                    descriptorSetCount,
-	const VkDescriptorSet*                      pDescriptorSets,
-	uint32_t                                    dynamicOffsetCount,
-	const uint32_t*                             pDynamicOffsets)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
-	unsigned dyn_idx = 0;
-
-	const bool no_dynamic_bounds = cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS;
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
-
-	for (unsigned i = 0; i < descriptorSetCount; ++i) {
-		unsigned set_idx = i + firstSet;
-		RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
-
-		/* If the set is already bound we only need to update the
-		 * (potentially changed) dynamic offsets. */
-		if (descriptors_state->sets[set_idx] != set ||
-		    !(descriptors_state->valid & (1u << set_idx))) {
-			radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx);
-		}
-
-		for(unsigned j = 0; j < layout->set[set_idx].dynamic_offset_count; ++j, ++dyn_idx) {
-			unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
-			uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4;
-			assert(dyn_idx < dynamicOffsetCount);
-
-			struct radv_descriptor_range *range = set->header.dynamic_descriptors + j;
-
-			if (!range->va) {
-				memset(dst, 0, 4 * 4);
-			} else {
-				uint64_t va = range->va + pDynamicOffsets[dyn_idx];
-				dst[0] = va;
-				dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
-				dst[2] = no_dynamic_bounds ? 0xffffffffu : range->size;
-				dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-					 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-					 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-					 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
-				if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
-					dst[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
-						  S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
-						  S_008F0C_RESOURCE_LEVEL(1);
-				} else {
-					dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-						  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-				}
-			}
-
-			cmd_buffer->push_constant_stages |= layout->set[set_idx].dynamic_offset_stages;
-		}
-	}
-}
-
-static bool radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
-                                          struct radv_descriptor_set *set,
-                                          struct radv_descriptor_set_layout *layout,
-					  VkPipelineBindPoint bind_point)
-{
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, bind_point);
-	set->header.size = layout->size;
-	set->header.layout = layout;
-
-	if (descriptors_state->push_set.capacity < set->header.size) {
-		size_t new_size = MAX2(set->header.size, 1024);
-		new_size = MAX2(new_size, 2 * descriptors_state->push_set.capacity);
-		new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS);
-
-		free(set->header.mapped_ptr);
-		set->header.mapped_ptr = malloc(new_size);
-
-		if (!set->header.mapped_ptr) {
-			descriptors_state->push_set.capacity = 0;
-			cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
-			return false;
-		}
-
-		descriptors_state->push_set.capacity = new_size;
-	}
-
-	return true;
-}
-
-void radv_meta_push_descriptor_set(
-	struct radv_cmd_buffer*              cmd_buffer,
-	VkPipelineBindPoint                  pipelineBindPoint,
-	VkPipelineLayout                     _layout,
-	uint32_t                             set,
-	uint32_t                             descriptorWriteCount,
-	const VkWriteDescriptorSet*          pDescriptorWrites)
-{
-	RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
-	struct radv_descriptor_set *push_set =
-		(struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors;
-	unsigned bo_offset;
-
-	assert(set == 0);
-	assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
-
-	push_set->header.size = layout->set[set].layout->size;
-	push_set->header.layout = layout->set[set].layout;
-
-	if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size,
-	                                  &bo_offset, (void**) &push_set->header.mapped_ptr))
-		return;
-
-	push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-	push_set->header.va += bo_offset;
-
-	radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
-	                            radv_descriptor_set_to_handle(push_set),
-	                            descriptorWriteCount, pDescriptorWrites, 0, NULL);
-
-	radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
-}
-
-void radv_CmdPushDescriptorSetKHR(
-	VkCommandBuffer                             commandBuffer,
-	VkPipelineBindPoint                         pipelineBindPoint,
-	VkPipelineLayout                            _layout,
-	uint32_t                                    set,
-	uint32_t                                    descriptorWriteCount,
-	const VkWriteDescriptorSet*                 pDescriptorWrites)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
-	struct radv_descriptor_set *push_set =
-		(struct radv_descriptor_set *)&descriptors_state->push_set.set;
-
-	assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
-
-	if (!radv_init_push_descriptor_set(cmd_buffer, push_set,
-					   layout->set[set].layout,
-					   pipelineBindPoint))
-		return;
+radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
+                         struct radv_descriptor_set *set, unsigned idx)
+{
+   struct radeon_winsys *ws = cmd_buffer->device->ws;
+
+   radv_set_descriptor_set(cmd_buffer, bind_point, set, idx);
 
-	/* Check that there are no inline uniform block updates when calling vkCmdPushDescriptorSetKHR()
-	 * because it is invalid, according to Vulkan spec.
-	 */
-	for (int i = 0; i < descriptorWriteCount; i++) {
-		ASSERTED const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
-		assert(writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
-	}
+   assert(set);
 
-	radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
-	                            radv_descriptor_set_to_handle(push_set),
-	                            descriptorWriteCount, pDescriptorWrites, 0, NULL);
+   if (!cmd_buffer->device->use_global_bo_list) {
+      for (unsigned j = 0; j < set->header.buffer_count; ++j)
+         if (set->descriptors[j])
+            radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j]);
+   }
 
-	radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
-	descriptors_state->push_dirty = true;
+   if (set->header.bo)
+      radv_cs_add_buffer(ws, cmd_buffer->cs, set->header.bo);
+}
+
+void
+radv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+                           VkPipelineLayout _layout, uint32_t firstSet, uint32_t descriptorSetCount,
+                           const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
+                           const uint32_t *pDynamicOffsets)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
+   unsigned dyn_idx = 0;
+
+   const bool no_dynamic_bounds =
+      cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS;
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
+
+   for (unsigned i = 0; i < descriptorSetCount; ++i) {
+      unsigned set_idx = i + firstSet;
+      RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
+
+      /* If the set is already bound we only need to update the
+       * (potentially changed) dynamic offsets. */
+      if (descriptors_state->sets[set_idx] != set ||
+          !(descriptors_state->valid & (1u << set_idx))) {
+         radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx);
+      }
+
+      for (unsigned j = 0; j < layout->set[set_idx].dynamic_offset_count; ++j, ++dyn_idx) {
+         unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
+         uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4;
+         assert(dyn_idx < dynamicOffsetCount);
+
+         struct radv_descriptor_range *range = set->header.dynamic_descriptors + j;
+
+         if (!range->va) {
+            memset(dst, 0, 4 * 4);
+         } else {
+            uint64_t va = range->va + pDynamicOffsets[dyn_idx];
+            dst[0] = va;
+            dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+            dst[2] = no_dynamic_bounds ? 0xffffffffu : range->size;
+            dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                     S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+            if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+               dst[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                         S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+            } else {
+               dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+            }
+         }
+
+         cmd_buffer->push_constant_stages |= layout->set[set_idx].dynamic_offset_stages;
+      }
+   }
 }
 
-void radv_CmdPushDescriptorSetWithTemplateKHR(
-	VkCommandBuffer                             commandBuffer,
-	VkDescriptorUpdateTemplate                  descriptorUpdateTemplate,
-	VkPipelineLayout                            _layout,
-	uint32_t                                    set,
-	const void*                                 pData)
+static bool
+radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set,
+                              struct radv_descriptor_set_layout *layout,
+                              VkPipelineBindPoint bind_point)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
-	RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, templ->bind_point);
-	struct radv_descriptor_set *push_set =
-		(struct radv_descriptor_set *)&descriptors_state->push_set.set;
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, bind_point);
+   set->header.size = layout->size;
+   set->header.layout = layout;
+
+   if (descriptors_state->push_set.capacity < set->header.size) {
+      size_t new_size = MAX2(set->header.size, 1024);
+      new_size = MAX2(new_size, 2 * descriptors_state->push_set.capacity);
+      new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS);
 
-	assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
+      free(set->header.mapped_ptr);
+      set->header.mapped_ptr = malloc(new_size);
 
-	if (!radv_init_push_descriptor_set(cmd_buffer, push_set,
-					   layout->set[set].layout,
-					   templ->bind_point))
-		return;
+      if (!set->header.mapped_ptr) {
+         descriptors_state->push_set.capacity = 0;
+         cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         return false;
+      }
 
-	radv_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set,
-						 descriptorUpdateTemplate, pData);
+      descriptors_state->push_set.capacity = new_size;
+   }
 
-	radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set);
-	descriptors_state->push_dirty = true;
+   return true;
 }
 
-void radv_CmdPushConstants(VkCommandBuffer commandBuffer,
-			   VkPipelineLayout layout,
-			   VkShaderStageFlags stageFlags,
-			   uint32_t offset,
-			   uint32_t size,
-			   const void* pValues)
+void
+radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
+                              VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
+                              uint32_t set, uint32_t descriptorWriteCount,
+                              const VkWriteDescriptorSet *pDescriptorWrites)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	memcpy(cmd_buffer->push_constants + offset, pValues, size);
-	cmd_buffer->push_constant_stages |= stageFlags;
+   RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
+   struct radv_descriptor_set *push_set =
+      (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors;
+   unsigned bo_offset;
+
+   assert(set == 0);
+   assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
+
+   push_set->header.size = layout->set[set].layout->size;
+   push_set->header.layout = layout->set[set].layout;
+
+   if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size, &bo_offset,
+                                     (void **)&push_set->header.mapped_ptr))
+      return;
+
+   push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+   push_set->header.va += bo_offset;
+
+   radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
+                               radv_descriptor_set_to_handle(push_set), descriptorWriteCount,
+                               pDescriptorWrites, 0, NULL);
+
+   radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
 }
 
-VkResult radv_EndCommandBuffer(
-	VkCommandBuffer                             commandBuffer)
+void
+radv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+                             VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount,
+                             const VkWriteDescriptorSet *pDescriptorWrites)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
+   struct radv_descriptor_set *push_set =
+      (struct radv_descriptor_set *)&descriptors_state->push_set.set;
 
-	radv_emit_mip_change_flush_default(cmd_buffer);
+   assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
 
-	if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
-		if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
-			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
+   if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout,
+                                      pipelineBindPoint))
+      return;
 
-		/* Make sure to sync all pending active queries at the end of
-		 * command buffer.
-		 */
-		cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits;
+   /* Check that there are no inline uniform block updates when calling vkCmdPushDescriptorSetKHR()
+    * because it is invalid, according to Vulkan spec.
+    */
+   for (int i = 0; i < descriptorWriteCount; i++) {
+      ASSERTED const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
+      assert(writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
+   }
 
-		/* Since NGG streamout uses GDS, we need to make GDS idle when
-		 * we leave the IB, otherwise another process might overwrite
-		 * it while our shaders are busy.
-		 */
-		if (cmd_buffer->gds_needed)
-			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
+   radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
+                               radv_descriptor_set_to_handle(push_set), descriptorWriteCount,
+                               pDescriptorWrites, 0, NULL);
 
-		si_emit_cache_flush(cmd_buffer);
-	}
+   radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
+   descriptors_state->push_dirty = true;
+}
 
-	/* Make sure CP DMA is idle at the end of IBs because the kernel
-	 * doesn't wait for it.
-	 */
-	si_cp_dma_wait_for_idle(cmd_buffer);
+void
+radv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer,
+                                         VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+                                         VkPipelineLayout _layout, uint32_t set, const void *pData)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
+   RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, templ->bind_point);
+   struct radv_descriptor_set *push_set =
+      (struct radv_descriptor_set *)&descriptors_state->push_set.set;
 
-	radv_describe_end_cmd_buffer(cmd_buffer);
+   assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
 
-	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
-	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
+   if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout,
+                                      templ->bind_point))
+      return;
 
-	VkResult result = cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs);
-	if (result != VK_SUCCESS)
-		return vk_error(cmd_buffer->device->instance, result);
+   radv_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set,
+                                            descriptorUpdateTemplate, pData);
 
-	cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE;
+   radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set);
+   descriptors_state->push_dirty = true;
+}
 
-	return cmd_buffer->record_result;
+void
+radv_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
+                      VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
+                      const void *pValues)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   memcpy(cmd_buffer->push_constants + offset, pValues, size);
+   cmd_buffer->push_constant_stages |= stageFlags;
 }
 
-static void
-radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
+VkResult
+radv_EndCommandBuffer(VkCommandBuffer commandBuffer)
 {
-	struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   radv_emit_mip_change_flush_default(cmd_buffer);
+
+   if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
+      if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
+         cmd_buffer->state.flush_bits |=
+            RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
+
+      /* Make sure to sync all pending active queries at the end of
+       * command buffer.
+       */
+      cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits;
+
+      /* Since NGG streamout uses GDS, we need to make GDS idle when
+       * we leave the IB, otherwise another process might overwrite
+       * it while our shaders are busy.
+       */
+      if (cmd_buffer->gds_needed)
+         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
+
+      si_emit_cache_flush(cmd_buffer);
+   }
+
+   /* Make sure CP DMA is idle at the end of IBs because the kernel
+    * doesn't wait for it.
+    */
+   si_cp_dma_wait_for_idle(cmd_buffer);
+
+   radv_describe_end_cmd_buffer(cmd_buffer);
 
-	if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline)
-		return;
-
-	assert(!pipeline->ctx_cs.cdw);
-
-	cmd_buffer->state.emitted_compute_pipeline = pipeline;
+   vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
+   vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
 
-	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw);
-	radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
-
-	cmd_buffer->compute_scratch_size_per_wave_needed = MAX2(cmd_buffer->compute_scratch_size_per_wave_needed,
-	                                                        pipeline->scratch_bytes_per_wave);
-	cmd_buffer->compute_scratch_waves_wanted = MAX2(cmd_buffer->compute_scratch_waves_wanted,
-	                                                pipeline->max_waves);
-
-	radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
-			   pipeline->shaders[MESA_SHADER_COMPUTE]->bo);
-
-	if (unlikely(cmd_buffer->device->trace_bo))
-		radv_save_pipeline(cmd_buffer, pipeline);
-}
-
-static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer,
-					    VkPipelineBindPoint bind_point)
-{
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, bind_point);
+   VkResult result = cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs);
+   if (result != VK_SUCCESS)
+      return vk_error(cmd_buffer->device->instance, result);
 
-	descriptors_state->dirty |= descriptors_state->valid;
-}
+   cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE;
 
-void radv_CmdBindPipeline(
-	VkCommandBuffer                             commandBuffer,
-	VkPipelineBindPoint                         pipelineBindPoint,
-	VkPipeline                                  _pipeline)
+   return cmd_buffer->record_result;
+}
+
+static void
+radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+   struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
 
-	switch (pipelineBindPoint) {
-	case VK_PIPELINE_BIND_POINT_COMPUTE:
-		if (cmd_buffer->state.compute_pipeline == pipeline)
-			return;
-		radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
+   if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline)
+      return;
 
-		cmd_buffer->state.compute_pipeline = pipeline;
-		cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
-		break;
-	case VK_PIPELINE_BIND_POINT_GRAPHICS:
-		if (cmd_buffer->state.pipeline == pipeline)
-			return;
-		radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
+   assert(!pipeline->ctx_cs.cdw);
 
-		bool vtx_emit_count_changed = !pipeline ||
-					      !cmd_buffer->state.pipeline ||
-					      cmd_buffer->state.pipeline->graphics.vtx_emit_num !=
-					      pipeline->graphics.vtx_emit_num ||
-					      cmd_buffer->state.pipeline->graphics.vtx_base_sgpr !=
-					      pipeline->graphics.vtx_base_sgpr;
-		cmd_buffer->state.pipeline = pipeline;
-		if (!pipeline)
-			break;
+   cmd_buffer->state.emitted_compute_pipeline = pipeline;
 
-		cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
-		cmd_buffer->push_constant_stages |= pipeline->active_stages;
+   radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw);
+   radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
 
-		/* the new vertex shader might not have the same user regs */
-		if (vtx_emit_count_changed) {
-			cmd_buffer->state.last_first_instance = -1;
-			cmd_buffer->state.last_vertex_offset = -1;
-			cmd_buffer->state.last_drawid = -1;
-		}
+   cmd_buffer->compute_scratch_size_per_wave_needed =
+      MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, pipeline->scratch_bytes_per_wave);
+   cmd_buffer->compute_scratch_waves_wanted =
+      MAX2(cmd_buffer->compute_scratch_waves_wanted, pipeline->max_waves);
 
-		/* Prefetch all pipeline shaders at first draw time. */
-		cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS;
+   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+                      pipeline->shaders[MESA_SHADER_COMPUTE]->bo);
 
-		if ((cmd_buffer->device->physical_device->rad_info.chip_class == GFX10 ||
-		     cmd_buffer->device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID) &&
-		    cmd_buffer->state.emitted_pipeline &&
-		    radv_pipeline_has_ngg(cmd_buffer->state.emitted_pipeline) &&
-		    !radv_pipeline_has_ngg(cmd_buffer->state.pipeline)) {
-			/* Transitioning from NGG to legacy GS requires
-			 * VGT_FLUSH on GFX10 and Sienna Cichlid. VGT_FLUSH
-			 * is also emitted at the beginning of IBs when legacy
-			 * GS ring pointers are set.
-			 */
-			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
-		}
+   if (unlikely(cmd_buffer->device->trace_bo))
+      radv_save_pipeline(cmd_buffer, pipeline);
+}
 
-		radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state);
-		radv_bind_streamout_state(cmd_buffer, pipeline);
+static void
+radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
+{
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, bind_point);
 
-		if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
-			cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size;
-		if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
-			cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size;
+   descriptors_state->dirty |= descriptors_state->valid;
+}
 
-		if (radv_pipeline_has_tess(pipeline))
-			cmd_buffer->tess_rings_needed = true;
-		break;
-	default:
-		assert(!"invalid bind point");
-		break;
-	}
+void
+radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+                     VkPipeline _pipeline)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+
+   switch (pipelineBindPoint) {
+   case VK_PIPELINE_BIND_POINT_COMPUTE:
+      if (cmd_buffer->state.compute_pipeline == pipeline)
+         return;
+      radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
+
+      cmd_buffer->state.compute_pipeline = pipeline;
+      cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
+      break;
+   case VK_PIPELINE_BIND_POINT_GRAPHICS:
+      if (cmd_buffer->state.pipeline == pipeline)
+         return;
+      radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
+
+      bool vtx_emit_count_changed =
+         !pipeline || !cmd_buffer->state.pipeline ||
+         cmd_buffer->state.pipeline->graphics.vtx_emit_num != pipeline->graphics.vtx_emit_num ||
+         cmd_buffer->state.pipeline->graphics.vtx_base_sgpr != pipeline->graphics.vtx_base_sgpr;
+      cmd_buffer->state.pipeline = pipeline;
+      if (!pipeline)
+         break;
+
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
+      cmd_buffer->push_constant_stages |= pipeline->active_stages;
+
+      /* the new vertex shader might not have the same user regs */
+      if (vtx_emit_count_changed) {
+         cmd_buffer->state.last_first_instance = -1;
+         cmd_buffer->state.last_vertex_offset = -1;
+         cmd_buffer->state.last_drawid = -1;
+      }
+
+      /* Prefetch all pipeline shaders at first draw time. */
+      cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS;
+
+      if ((cmd_buffer->device->physical_device->rad_info.chip_class == GFX10 ||
+           cmd_buffer->device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID) &&
+          cmd_buffer->state.emitted_pipeline &&
+          radv_pipeline_has_ngg(cmd_buffer->state.emitted_pipeline) &&
+          !radv_pipeline_has_ngg(cmd_buffer->state.pipeline)) {
+         /* Transitioning from NGG to legacy GS requires
+          * VGT_FLUSH on GFX10 and Sienna Cichlid. VGT_FLUSH
+          * is also emitted at the beginning of IBs when legacy
+          * GS ring pointers are set.
+          */
+         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
+      }
+
+      radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state);
+      radv_bind_streamout_state(cmd_buffer, pipeline);
+
+      if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
+         cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size;
+      if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
+         cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size;
+
+      if (radv_pipeline_has_tess(pipeline))
+         cmd_buffer->tess_rings_needed = true;
+      break;
+   default:
+      assert(!"invalid bind point");
+      break;
+   }
 }
 
-void radv_CmdSetViewport(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    firstViewport,
-	uint32_t                                    viewportCount,
-	const VkViewport*                           pViewports)
+void
+radv_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
+                    const VkViewport *pViewports)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	ASSERTED const uint32_t total_count = firstViewport + viewportCount;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   ASSERTED const uint32_t total_count = firstViewport + viewportCount;
 
-	assert(firstViewport < MAX_VIEWPORTS);
-	assert(total_count >= 1 && total_count <= MAX_VIEWPORTS);
+   assert(firstViewport < MAX_VIEWPORTS);
+   assert(total_count >= 1 && total_count <= MAX_VIEWPORTS);
 
-	if (total_count <= state->dynamic.viewport.count &&
-	    !memcmp(state->dynamic.viewport.viewports + firstViewport,
-		    pViewports, viewportCount * sizeof(*pViewports))) {
-		return;
-	}
+   if (total_count <= state->dynamic.viewport.count &&
+       !memcmp(state->dynamic.viewport.viewports + firstViewport, pViewports,
+               viewportCount * sizeof(*pViewports))) {
+      return;
+   }
 
-	if (state->dynamic.viewport.count < total_count)
-		state->dynamic.viewport.count = total_count;
+   if (state->dynamic.viewport.count < total_count)
+      state->dynamic.viewport.count = total_count;
 
-	memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports,
-	       viewportCount * sizeof(*pViewports));
+   memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports,
+          viewportCount * sizeof(*pViewports));
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT;
 }
 
-void radv_CmdSetScissor(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    firstScissor,
-	uint32_t                                    scissorCount,
-	const VkRect2D*                             pScissors)
+void
+radv_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
+                   const VkRect2D *pScissors)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	ASSERTED const uint32_t total_count = firstScissor + scissorCount;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   ASSERTED const uint32_t total_count = firstScissor + scissorCount;
 
-	assert(firstScissor < MAX_SCISSORS);
-	assert(total_count >= 1 && total_count <= MAX_SCISSORS);
+   assert(firstScissor < MAX_SCISSORS);
+   assert(total_count >= 1 && total_count <= MAX_SCISSORS);
 
-	if (total_count <= state->dynamic.scissor.count &&
-	    !memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors,
-		    scissorCount * sizeof(*pScissors))) {
-		return;
-	}
+   if (total_count <= state->dynamic.scissor.count &&
+       !memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors,
+               scissorCount * sizeof(*pScissors))) {
+      return;
+   }
 
-	if (state->dynamic.scissor.count < total_count)
-		state->dynamic.scissor.count = total_count;
+   if (state->dynamic.scissor.count < total_count)
+      state->dynamic.scissor.count = total_count;
 
-	memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors,
-	       scissorCount * sizeof(*pScissors));
+   memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors,
+          scissorCount * sizeof(*pScissors));
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
 }
 
-void radv_CmdSetLineWidth(
-	VkCommandBuffer                             commandBuffer,
-	float                                       lineWidth)
+void
+radv_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 
-	if (cmd_buffer->state.dynamic.line_width == lineWidth)
-		return;
+   if (cmd_buffer->state.dynamic.line_width == lineWidth)
+      return;
 
-	cmd_buffer->state.dynamic.line_width = lineWidth;
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
+   cmd_buffer->state.dynamic.line_width = lineWidth;
+   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
 }
 
-void radv_CmdSetDepthBias(
-	VkCommandBuffer                             commandBuffer,
-	float                                       depthBiasConstantFactor,
-	float                                       depthBiasClamp,
-	float                                       depthBiasSlopeFactor)
+void
+radv_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor,
+                     float depthBiasClamp, float depthBiasSlopeFactor)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.depth_bias.bias == depthBiasConstantFactor &&
-	    state->dynamic.depth_bias.clamp == depthBiasClamp &&
-	    state->dynamic.depth_bias.slope == depthBiasSlopeFactor) {
-		return;
-	}
+   if (state->dynamic.depth_bias.bias == depthBiasConstantFactor &&
+       state->dynamic.depth_bias.clamp == depthBiasClamp &&
+       state->dynamic.depth_bias.slope == depthBiasSlopeFactor) {
+      return;
+   }
 
-	state->dynamic.depth_bias.bias = depthBiasConstantFactor;
-	state->dynamic.depth_bias.clamp = depthBiasClamp;
-	state->dynamic.depth_bias.slope = depthBiasSlopeFactor;
+   state->dynamic.depth_bias.bias = depthBiasConstantFactor;
+   state->dynamic.depth_bias.clamp = depthBiasClamp;
+   state->dynamic.depth_bias.slope = depthBiasSlopeFactor;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
 }
 
-void radv_CmdSetBlendConstants(
-	VkCommandBuffer                             commandBuffer,
-	const float                                 blendConstants[4])
+void
+radv_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4))
-		return;
+   if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4))
+      return;
 
-	memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4);
+   memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4);
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
 }
 
-void radv_CmdSetDepthBounds(
-	VkCommandBuffer                             commandBuffer,
-	float                                       minDepthBounds,
-	float                                       maxDepthBounds)
+void
+radv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.depth_bounds.min == minDepthBounds &&
-	    state->dynamic.depth_bounds.max == maxDepthBounds) {
-		return;
-	}
+   if (state->dynamic.depth_bounds.min == minDepthBounds &&
+       state->dynamic.depth_bounds.max == maxDepthBounds) {
+      return;
+   }
 
-	state->dynamic.depth_bounds.min = minDepthBounds;
-	state->dynamic.depth_bounds.max = maxDepthBounds;
+   state->dynamic.depth_bounds.min = minDepthBounds;
+   state->dynamic.depth_bounds.max = maxDepthBounds;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
 }
 
-void radv_CmdSetStencilCompareMask(
-	VkCommandBuffer                             commandBuffer,
-	VkStencilFaceFlags                          faceMask,
-	uint32_t                                    compareMask)
+void
+radv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+                              uint32_t compareMask)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	bool front_same = state->dynamic.stencil_compare_mask.front == compareMask;
-	bool back_same = state->dynamic.stencil_compare_mask.back == compareMask;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   bool front_same = state->dynamic.stencil_compare_mask.front == compareMask;
+   bool back_same = state->dynamic.stencil_compare_mask.back == compareMask;
 
-	if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
-	    (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
-		return;
-	}
+   if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+       (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+      return;
+   }
 
-	if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
-		state->dynamic.stencil_compare_mask.front = compareMask;
-	if (faceMask & VK_STENCIL_FACE_BACK_BIT)
-		state->dynamic.stencil_compare_mask.back = compareMask;
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+      state->dynamic.stencil_compare_mask.front = compareMask;
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+      state->dynamic.stencil_compare_mask.back = compareMask;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
 }
 
-void radv_CmdSetStencilWriteMask(
-	VkCommandBuffer                             commandBuffer,
-	VkStencilFaceFlags                          faceMask,
-	uint32_t                                    writeMask)
+void
+radv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+                            uint32_t writeMask)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	bool front_same = state->dynamic.stencil_write_mask.front == writeMask;
-	bool back_same = state->dynamic.stencil_write_mask.back == writeMask;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   bool front_same = state->dynamic.stencil_write_mask.front == writeMask;
+   bool back_same = state->dynamic.stencil_write_mask.back == writeMask;
 
-	if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
-	    (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
-		return;
-	}
+   if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+       (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+      return;
+   }
 
-	if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
-		state->dynamic.stencil_write_mask.front = writeMask;
-	if (faceMask & VK_STENCIL_FACE_BACK_BIT)
-		state->dynamic.stencil_write_mask.back = writeMask;
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+      state->dynamic.stencil_write_mask.front = writeMask;
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+      state->dynamic.stencil_write_mask.back = writeMask;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
 }
 
-void radv_CmdSetStencilReference(
-	VkCommandBuffer                             commandBuffer,
-	VkStencilFaceFlags                          faceMask,
-	uint32_t                                    reference)
+void
+radv_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+                            uint32_t reference)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	bool front_same = state->dynamic.stencil_reference.front == reference;
-	bool back_same = state->dynamic.stencil_reference.back == reference;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   bool front_same = state->dynamic.stencil_reference.front == reference;
+   bool back_same = state->dynamic.stencil_reference.back == reference;
 
-	if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
-	    (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
-		return;
-	}
+   if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+       (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+      return;
+   }
 
-	if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
-		cmd_buffer->state.dynamic.stencil_reference.front = reference;
-	if (faceMask & VK_STENCIL_FACE_BACK_BIT)
-		cmd_buffer->state.dynamic.stencil_reference.back = reference;
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+      cmd_buffer->state.dynamic.stencil_reference.front = reference;
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+      cmd_buffer->state.dynamic.stencil_reference.back = reference;
 
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
+   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
 }
 
-void radv_CmdSetDiscardRectangleEXT(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    firstDiscardRectangle,
-	uint32_t                                    discardRectangleCount,
-	const VkRect2D*                             pDiscardRectangles)
+void
+radv_CmdSetDiscardRectangleEXT(VkCommandBuffer commandBuffer, uint32_t firstDiscardRectangle,
+                               uint32_t discardRectangleCount, const VkRect2D *pDiscardRectangles)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	ASSERTED const uint32_t total_count = firstDiscardRectangle + discardRectangleCount;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   ASSERTED const uint32_t total_count = firstDiscardRectangle + discardRectangleCount;
 
-	assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES);
-	assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES);
+   assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES);
+   assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES);
 
-	if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle,
-		    pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) {
-		return;
-	}
+   if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle,
+               pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) {
+      return;
+   }
 
-	typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle],
-	             pDiscardRectangles, discardRectangleCount);
+   typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle],
+                pDiscardRectangles, discardRectangleCount);
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
 }
 
-void radv_CmdSetSampleLocationsEXT(
-	VkCommandBuffer                             commandBuffer,
-	const VkSampleLocationsInfoEXT*             pSampleLocationsInfo)
+void
+radv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer,
+                              const VkSampleLocationsInfoEXT *pSampleLocationsInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
+   assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
 
-	state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
-	state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
-	state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
-	typed_memcpy(&state->dynamic.sample_location.locations[0],
-		     pSampleLocationsInfo->pSampleLocations,
-		     pSampleLocationsInfo->sampleLocationsCount);
+   state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
+   state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
+   state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
+   typed_memcpy(&state->dynamic.sample_location.locations[0],
+                pSampleLocationsInfo->pSampleLocations, pSampleLocationsInfo->sampleLocationsCount);
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
 }
 
-void radv_CmdSetLineStippleEXT(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    lineStippleFactor,
-	uint16_t                                    lineStipplePattern)
+void
+radv_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, uint32_t lineStippleFactor,
+                          uint16_t lineStipplePattern)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.line_stipple.factor == lineStippleFactor &&
-	    state->dynamic.line_stipple.pattern == lineStipplePattern)
-		return;
+   if (state->dynamic.line_stipple.factor == lineStippleFactor &&
+       state->dynamic.line_stipple.pattern == lineStipplePattern)
+      return;
 
-	state->dynamic.line_stipple.factor = lineStippleFactor;
-	state->dynamic.line_stipple.pattern = lineStipplePattern;
+   state->dynamic.line_stipple.factor = lineStippleFactor;
+   state->dynamic.line_stipple.pattern = lineStipplePattern;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
 }
 
-void radv_CmdSetCullModeEXT(
-	VkCommandBuffer                             commandBuffer,
-	VkCullModeFlags                             cullMode)
+void
+radv_CmdSetCullModeEXT(VkCommandBuffer commandBuffer, VkCullModeFlags cullMode)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.cull_mode == cullMode)
-		return;
+   if (state->dynamic.cull_mode == cullMode)
+      return;
 
-	state->dynamic.cull_mode = cullMode;
+   state->dynamic.cull_mode = cullMode;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE;
 }
 
-void radv_CmdSetFrontFaceEXT(
-	VkCommandBuffer                             commandBuffer,
-	VkFrontFace                                 frontFace)
+void
+radv_CmdSetFrontFaceEXT(VkCommandBuffer commandBuffer, VkFrontFace frontFace)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.front_face == frontFace)
-		return;
+   if (state->dynamic.front_face == frontFace)
+      return;
 
-	state->dynamic.front_face = frontFace;
+   state->dynamic.front_face = frontFace;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
 }
 
-void radv_CmdSetPrimitiveTopologyEXT(
-	VkCommandBuffer                             commandBuffer,
-	VkPrimitiveTopology                         primitiveTopology)
+void
+radv_CmdSetPrimitiveTopologyEXT(VkCommandBuffer commandBuffer,
+                                VkPrimitiveTopology primitiveTopology)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	unsigned primitive_topology = si_translate_prim(primitiveTopology);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   unsigned primitive_topology = si_translate_prim(primitiveTopology);
 
-	if (state->dynamic.primitive_topology == primitive_topology)
-		return;
+   if (state->dynamic.primitive_topology == primitive_topology)
+      return;
 
-	state->dynamic.primitive_topology = primitive_topology;
+   state->dynamic.primitive_topology = primitive_topology;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
 }
 
-void radv_CmdSetViewportWithCountEXT(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    viewportCount,
-	const VkViewport*                           pViewports)
+void
+radv_CmdSetViewportWithCountEXT(VkCommandBuffer commandBuffer, uint32_t viewportCount,
+                                const VkViewport *pViewports)
 {
-	radv_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports);
+   radv_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports);
 }
 
-void radv_CmdSetScissorWithCountEXT(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    scissorCount,
-	const VkRect2D*                             pScissors)
+void
+radv_CmdSetScissorWithCountEXT(VkCommandBuffer commandBuffer, uint32_t scissorCount,
+                               const VkRect2D *pScissors)
 {
-	radv_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors);
+   radv_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors);
 }
 
-void radv_CmdSetDepthTestEnableEXT(
-	VkCommandBuffer                             commandBuffer,
-	VkBool32                                    depthTestEnable)
+void
+radv_CmdSetDepthTestEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthTestEnable)
 
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.depth_test_enable == depthTestEnable)
-		return;
+   if (state->dynamic.depth_test_enable == depthTestEnable)
+      return;
 
-	state->dynamic.depth_test_enable = depthTestEnable;
+   state->dynamic.depth_test_enable = depthTestEnable;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
 }
 
-void radv_CmdSetDepthWriteEnableEXT(
-	VkCommandBuffer                             commandBuffer,
-	VkBool32                                    depthWriteEnable)
+void
+radv_CmdSetDepthWriteEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthWriteEnable)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.depth_write_enable == depthWriteEnable)
-		return;
+   if (state->dynamic.depth_write_enable == depthWriteEnable)
+      return;
 
-	state->dynamic.depth_write_enable = depthWriteEnable;
+   state->dynamic.depth_write_enable = depthWriteEnable;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
 }
 
-void radv_CmdSetDepthCompareOpEXT(
-	VkCommandBuffer                             commandBuffer,
-	VkCompareOp                                 depthCompareOp)
+void
+radv_CmdSetDepthCompareOpEXT(VkCommandBuffer commandBuffer, VkCompareOp depthCompareOp)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.depth_compare_op == depthCompareOp)
-		return;
+   if (state->dynamic.depth_compare_op == depthCompareOp)
+      return;
 
-	state->dynamic.depth_compare_op = depthCompareOp;
+   state->dynamic.depth_compare_op = depthCompareOp;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
 }
 
-void radv_CmdSetDepthBoundsTestEnableEXT(
-	VkCommandBuffer                             commandBuffer,
-	VkBool32                                    depthBoundsTestEnable)
+void
+radv_CmdSetDepthBoundsTestEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthBoundsTestEnable)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.depth_bounds_test_enable == depthBoundsTestEnable)
-		return;
+   if (state->dynamic.depth_bounds_test_enable == depthBoundsTestEnable)
+      return;
 
-	state->dynamic.depth_bounds_test_enable = depthBoundsTestEnable;
+   state->dynamic.depth_bounds_test_enable = depthBoundsTestEnable;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
 }
 
-void radv_CmdSetStencilTestEnableEXT(
-	VkCommandBuffer                             commandBuffer,
-	VkBool32                                    stencilTestEnable)
+void
+radv_CmdSetStencilTestEnableEXT(VkCommandBuffer commandBuffer, VkBool32 stencilTestEnable)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.stencil_test_enable == stencilTestEnable)
-		return;
+   if (state->dynamic.stencil_test_enable == stencilTestEnable)
+      return;
 
-	state->dynamic.stencil_test_enable = stencilTestEnable;
+   state->dynamic.stencil_test_enable = stencilTestEnable;
 
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
 }
 
-void radv_CmdSetStencilOpEXT(
-	VkCommandBuffer                             commandBuffer,
-	VkStencilFaceFlags                          faceMask,
-	VkStencilOp                                 failOp,
-	VkStencilOp                                 passOp,
-	VkStencilOp                                 depthFailOp,
-	VkCompareOp                                 compareOp)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	bool front_same =
-		state->dynamic.stencil_op.front.fail_op == failOp &&
-		state->dynamic.stencil_op.front.pass_op == passOp &&
-		state->dynamic.stencil_op.front.depth_fail_op == depthFailOp &&
-		state->dynamic.stencil_op.front.compare_op == compareOp;
-	bool back_same =
-		state->dynamic.stencil_op.back.fail_op == failOp &&
-		state->dynamic.stencil_op.back.pass_op == passOp &&
-		state->dynamic.stencil_op.back.depth_fail_op == depthFailOp &&
-		state->dynamic.stencil_op.back.compare_op == compareOp;
-
-	if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
-	    (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same))
-		return;
-
-	if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
-		state->dynamic.stencil_op.front.fail_op = failOp;
-		state->dynamic.stencil_op.front.pass_op = passOp;
-		state->dynamic.stencil_op.front.depth_fail_op = depthFailOp;
-		state->dynamic.stencil_op.front.compare_op = compareOp;
-	}
-
-	if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
-		state->dynamic.stencil_op.back.fail_op = failOp;
-		state->dynamic.stencil_op.back.pass_op = passOp;
-		state->dynamic.stencil_op.back.depth_fail_op = depthFailOp;
-		state->dynamic.stencil_op.back.compare_op = compareOp;
-	}
-
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
+void
+radv_CmdSetStencilOpEXT(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+                        VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp,
+                        VkCompareOp compareOp)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   bool front_same = state->dynamic.stencil_op.front.fail_op == failOp &&
+                     state->dynamic.stencil_op.front.pass_op == passOp &&
+                     state->dynamic.stencil_op.front.depth_fail_op == depthFailOp &&
+                     state->dynamic.stencil_op.front.compare_op == compareOp;
+   bool back_same = state->dynamic.stencil_op.back.fail_op == failOp &&
+                    state->dynamic.stencil_op.back.pass_op == passOp &&
+                    state->dynamic.stencil_op.back.depth_fail_op == depthFailOp &&
+                    state->dynamic.stencil_op.back.compare_op == compareOp;
+
+   if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+       (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same))
+      return;
+
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
+      state->dynamic.stencil_op.front.fail_op = failOp;
+      state->dynamic.stencil_op.front.pass_op = passOp;
+      state->dynamic.stencil_op.front.depth_fail_op = depthFailOp;
+      state->dynamic.stencil_op.front.compare_op = compareOp;
+   }
+
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
+      state->dynamic.stencil_op.back.fail_op = failOp;
+      state->dynamic.stencil_op.back.pass_op = passOp;
+      state->dynamic.stencil_op.back.depth_fail_op = depthFailOp;
+      state->dynamic.stencil_op.back.compare_op = compareOp;
+   }
+
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
 }
 
-void radv_CmdSetFragmentShadingRateKHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkExtent2D*                           pFragmentSize,
-	const VkFragmentShadingRateCombinerOpKHR    combinerOps[2])
+void
+radv_CmdSetFragmentShadingRateKHR(VkCommandBuffer commandBuffer, const VkExtent2D *pFragmentSize,
+                                  const VkFragmentShadingRateCombinerOpKHR combinerOps[2])
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (state->dynamic.fragment_shading_rate.size.width == pFragmentSize->width &&
-	    state->dynamic.fragment_shading_rate.size.height == pFragmentSize->height &&
-	    state->dynamic.fragment_shading_rate.combiner_ops[0] == combinerOps[0] &&
-	    state->dynamic.fragment_shading_rate.combiner_ops[1] == combinerOps[1])
-		return;
-
-	state->dynamic.fragment_shading_rate.size = *pFragmentSize;
-	for (unsigned i = 0; i < 2; i++)
-		state->dynamic.fragment_shading_rate.combiner_ops[i] = combinerOps[i];
-
-	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
-}
-
-void radv_CmdExecuteCommands(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    commandBufferCount,
-	const VkCommandBuffer*                      pCmdBuffers)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer);
-
-	assert(commandBufferCount > 0);
-
-	radv_emit_mip_change_flush_default(primary);
-
-	/* Emit pending flushes on primary prior to executing secondary */
-	si_emit_cache_flush(primary);
-
-	for (uint32_t i = 0; i < commandBufferCount; i++) {
-		RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
-
-		primary->scratch_size_per_wave_needed = MAX2(primary->scratch_size_per_wave_needed,
-		                                             secondary->scratch_size_per_wave_needed);
-		primary->scratch_waves_wanted = MAX2(primary->scratch_waves_wanted,
-		                                     secondary->scratch_waves_wanted);
-		primary->compute_scratch_size_per_wave_needed = MAX2(primary->compute_scratch_size_per_wave_needed,
-		                                                     secondary->compute_scratch_size_per_wave_needed);
-		primary->compute_scratch_waves_wanted = MAX2(primary->compute_scratch_waves_wanted,
-		                                             secondary->compute_scratch_waves_wanted);
-
-		if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
-			primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
-		if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
-			primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
-		if (secondary->tess_rings_needed)
-			primary->tess_rings_needed = true;
-		if (secondary->sample_positions_needed)
-			primary->sample_positions_needed = true;
-		if (secondary->gds_needed)
-			primary->gds_needed = true;
-
-		if (!secondary->state.framebuffer &&
-		    (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) {
-			/* Emit the framebuffer state from primary if secondary
-			 * has been recorded without a framebuffer, otherwise
-			 * fast color/depth clears can't work.
-			 */
-			radv_emit_fb_mip_change_flush(primary);
-			radv_emit_framebuffer_state(primary);
-		}
-
-		primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs);
-
-
-		/* When the secondary command buffer is compute only we don't
-		 * need to re-emit the current graphics pipeline.
-		 */
-		if (secondary->state.emitted_pipeline) {
-			primary->state.emitted_pipeline =
-				secondary->state.emitted_pipeline;
-		}
-
-		/* When the secondary command buffer is graphics only we don't
-		 * need to re-emit the current compute pipeline.
-		 */
-		if (secondary->state.emitted_compute_pipeline) {
-			primary->state.emitted_compute_pipeline =
-				secondary->state.emitted_compute_pipeline;
-		}
-
-		/* Only re-emit the draw packets when needed. */
-		if (secondary->state.last_primitive_reset_en != -1) {
-			primary->state.last_primitive_reset_en =
-				secondary->state.last_primitive_reset_en;
-		}
-
-		if (secondary->state.last_primitive_reset_index) {
-			primary->state.last_primitive_reset_index =
-				secondary->state.last_primitive_reset_index;
-		}
-
-		if (secondary->state.last_ia_multi_vgt_param) {
-			primary->state.last_ia_multi_vgt_param =
-				secondary->state.last_ia_multi_vgt_param;
-		}
-
-		primary->state.last_first_instance = secondary->state.last_first_instance;
-		primary->state.last_num_instances = secondary->state.last_num_instances;
-		primary->state.last_drawid = secondary->state.last_drawid;
-		primary->state.last_vertex_offset = secondary->state.last_vertex_offset;
-		primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert;
-		primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon;
-		primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control;
-
-		if (secondary->state.last_index_type != -1) {
-			primary->state.last_index_type =
-				secondary->state.last_index_type;
-		}
-	}
-
-	/* After executing commands from secondary buffers we have to dirty
-	 * some states.
-	 */
-	primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE |
-				RADV_CMD_DIRTY_INDEX_BUFFER |
-				RADV_CMD_DIRTY_DYNAMIC_ALL;
-	radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS);
-	radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE);
-}
-
-VkResult radv_CreateCommandPool(
-	VkDevice                                    _device,
-	const VkCommandPoolCreateInfo*              pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkCommandPool*                              pCmdPool)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_cmd_pool *pool;
-
-	pool = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8,
-			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (pool == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &pool->base,
-			    VK_OBJECT_TYPE_COMMAND_POOL);
-
-	if (pAllocator)
-		pool->alloc = *pAllocator;
-	else
-		pool->alloc = device->vk.alloc;
-
-	list_inithead(&pool->cmd_buffers);
-	list_inithead(&pool->free_cmd_buffers);
-
-	pool->queue_family_index = pCreateInfo->queueFamilyIndex;
+   if (state->dynamic.fragment_shading_rate.size.width == pFragmentSize->width &&
+       state->dynamic.fragment_shading_rate.size.height == pFragmentSize->height &&
+       state->dynamic.fragment_shading_rate.combiner_ops[0] == combinerOps[0] &&
+       state->dynamic.fragment_shading_rate.combiner_ops[1] == combinerOps[1])
+      return;
 
-	*pCmdPool = radv_cmd_pool_to_handle(pool);
+   state->dynamic.fragment_shading_rate.size = *pFragmentSize;
+   for (unsigned i = 0; i < 2; i++)
+      state->dynamic.fragment_shading_rate.combiner_ops[i] = combinerOps[i];
 
-	return VK_SUCCESS;
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
+}
 
+void
+radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
+                        const VkCommandBuffer *pCmdBuffers)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer);
+
+   assert(commandBufferCount > 0);
+
+   radv_emit_mip_change_flush_default(primary);
+
+   /* Emit pending flushes on primary prior to executing secondary */
+   si_emit_cache_flush(primary);
+
+   for (uint32_t i = 0; i < commandBufferCount; i++) {
+      RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
+
+      primary->scratch_size_per_wave_needed =
+         MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed);
+      primary->scratch_waves_wanted =
+         MAX2(primary->scratch_waves_wanted, secondary->scratch_waves_wanted);
+      primary->compute_scratch_size_per_wave_needed =
+         MAX2(primary->compute_scratch_size_per_wave_needed,
+              secondary->compute_scratch_size_per_wave_needed);
+      primary->compute_scratch_waves_wanted =
+         MAX2(primary->compute_scratch_waves_wanted, secondary->compute_scratch_waves_wanted);
+
+      if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
+         primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
+      if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
+         primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
+      if (secondary->tess_rings_needed)
+         primary->tess_rings_needed = true;
+      if (secondary->sample_positions_needed)
+         primary->sample_positions_needed = true;
+      if (secondary->gds_needed)
+         primary->gds_needed = true;
+
+      if (!secondary->state.framebuffer && (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) {
+         /* Emit the framebuffer state from primary if secondary
+          * has been recorded without a framebuffer, otherwise
+          * fast color/depth clears can't work.
+          */
+         radv_emit_fb_mip_change_flush(primary);
+         radv_emit_framebuffer_state(primary);
+      }
+
+      primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs);
+
+      /* When the secondary command buffer is compute only we don't
+       * need to re-emit the current graphics pipeline.
+       */
+      if (secondary->state.emitted_pipeline) {
+         primary->state.emitted_pipeline = secondary->state.emitted_pipeline;
+      }
+
+      /* When the secondary command buffer is graphics only we don't
+       * need to re-emit the current compute pipeline.
+       */
+      if (secondary->state.emitted_compute_pipeline) {
+         primary->state.emitted_compute_pipeline = secondary->state.emitted_compute_pipeline;
+      }
+
+      /* Only re-emit the draw packets when needed. */
+      if (secondary->state.last_primitive_reset_en != -1) {
+         primary->state.last_primitive_reset_en = secondary->state.last_primitive_reset_en;
+      }
+
+      if (secondary->state.last_primitive_reset_index) {
+         primary->state.last_primitive_reset_index = secondary->state.last_primitive_reset_index;
+      }
+
+      if (secondary->state.last_ia_multi_vgt_param) {
+         primary->state.last_ia_multi_vgt_param = secondary->state.last_ia_multi_vgt_param;
+      }
+
+      primary->state.last_first_instance = secondary->state.last_first_instance;
+      primary->state.last_num_instances = secondary->state.last_num_instances;
+      primary->state.last_drawid = secondary->state.last_drawid;
+      primary->state.last_vertex_offset = secondary->state.last_vertex_offset;
+      primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert;
+      primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon;
+      primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control;
+
+      if (secondary->state.last_index_type != -1) {
+         primary->state.last_index_type = secondary->state.last_index_type;
+      }
+   }
+
+   /* After executing commands from secondary buffers we have to dirty
+    * some states.
+    */
+   primary->state.dirty |=
+      RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_DYNAMIC_ALL;
+   radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS);
+   radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE);
+}
+
+VkResult
+radv_CreateCommandPool(VkDevice _device, const VkCommandPoolCreateInfo *pCreateInfo,
+                       const VkAllocationCallbacks *pAllocator, VkCommandPool *pCmdPool)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_cmd_pool *pool;
+
+   pool =
+      vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (pool == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_COMMAND_POOL);
+
+   if (pAllocator)
+      pool->alloc = *pAllocator;
+   else
+      pool->alloc = device->vk.alloc;
+
+   list_inithead(&pool->cmd_buffers);
+   list_inithead(&pool->free_cmd_buffers);
+
+   pool->queue_family_index = pCreateInfo->queueFamilyIndex;
+
+   *pCmdPool = radv_cmd_pool_to_handle(pool);
+
+   return VK_SUCCESS;
 }
 
-void radv_DestroyCommandPool(
-	VkDevice                                    _device,
-	VkCommandPool                               commandPool,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool,
+                        const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
 
-	if (!pool)
-		return;
+   if (!pool)
+      return;
 
-	list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer,
-				 &pool->cmd_buffers, pool_link) {
-		radv_destroy_cmd_buffer(cmd_buffer);
-	}
+   list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link)
+   {
+      radv_destroy_cmd_buffer(cmd_buffer);
+   }
 
-	list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer,
-				 &pool->free_cmd_buffers, pool_link) {
-		radv_destroy_cmd_buffer(cmd_buffer);
-	}
+   list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->free_cmd_buffers, pool_link)
+   {
+      radv_destroy_cmd_buffer(cmd_buffer);
+   }
 
-	vk_object_base_finish(&pool->base);
-	vk_free2(&device->vk.alloc, pAllocator, pool);
+   vk_object_base_finish(&pool->base);
+   vk_free2(&device->vk.alloc, pAllocator, pool);
 }
 
-VkResult radv_ResetCommandPool(
-	VkDevice                                    device,
-	VkCommandPool                               commandPool,
-	VkCommandPoolResetFlags                     flags)
+VkResult
+radv_ResetCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags)
 {
-	RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
-	VkResult result;
+   RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
+   VkResult result;
 
-	list_for_each_entry(struct radv_cmd_buffer, cmd_buffer,
-			    &pool->cmd_buffers, pool_link) {
-		result = radv_reset_cmd_buffer(cmd_buffer);
-		if (result != VK_SUCCESS)
-			return result;
-	}
+   list_for_each_entry(struct radv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link)
+   {
+      result = radv_reset_cmd_buffer(cmd_buffer);
+      if (result != VK_SUCCESS)
+         return result;
+   }
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-void radv_TrimCommandPool(
-    VkDevice                                    device,
-    VkCommandPool                               commandPool,
-    VkCommandPoolTrimFlags                      flags)
+void
+radv_TrimCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlags flags)
 {
-	RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
+   RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
 
-	if (!pool)
-		return;
+   if (!pool)
+      return;
 
-	list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer,
-				 &pool->free_cmd_buffers, pool_link) {
-		radv_destroy_cmd_buffer(cmd_buffer);
-	}
+   list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->free_cmd_buffers, pool_link)
+   {
+      radv_destroy_cmd_buffer(cmd_buffer);
+   }
 }
 
 static void
-radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer,
-			      uint32_t subpass_id)
+radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer, uint32_t subpass_id)
 {
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	struct radv_subpass *subpass = &state->pass->subpasses[subpass_id];
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   struct radv_subpass *subpass = &state->pass->subpasses[subpass_id];
 
-	ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
-							   cmd_buffer->cs, 4096);
+   ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096);
 
-	radv_subpass_barrier(cmd_buffer, &subpass->start_barrier);
+   radv_subpass_barrier(cmd_buffer, &subpass->start_barrier);
 
-	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
 
-	radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC);
+   radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC);
 
-	for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
-		const uint32_t a = subpass->attachments[i].attachment;
-		if (a == VK_ATTACHMENT_UNUSED)
-			continue;
+   for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+      const uint32_t a = subpass->attachments[i].attachment;
+      if (a == VK_ATTACHMENT_UNUSED)
+         continue;
 
-		radv_handle_subpass_image_transition(cmd_buffer,
-						     subpass->attachments[i],
-						     true);
-	}
+      radv_handle_subpass_image_transition(cmd_buffer, subpass->attachments[i], true);
+   }
 
-	radv_describe_barrier_end(cmd_buffer);
+   radv_describe_barrier_end(cmd_buffer);
 
-	radv_cmd_buffer_clear_subpass(cmd_buffer);
+   radv_cmd_buffer_clear_subpass(cmd_buffer);
 
-	assert(cmd_buffer->cs->cdw <= cdw_max);
+   assert(cmd_buffer->cs->cdw <= cdw_max);
 }
 
 static void
 radv_cmd_buffer_end_subpass(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	const struct radv_subpass *subpass = state->subpass;
-	uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   const struct radv_subpass *subpass = state->subpass;
+   uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
 
-	radv_cmd_buffer_resolve_subpass(cmd_buffer);
+   radv_cmd_buffer_resolve_subpass(cmd_buffer);
 
-	radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC);
+   radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC);
 
-	for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
-		const uint32_t a = subpass->attachments[i].attachment;
-		if (a == VK_ATTACHMENT_UNUSED)
-			continue;
+   for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+      const uint32_t a = subpass->attachments[i].attachment;
+      if (a == VK_ATTACHMENT_UNUSED)
+         continue;
 
-		if (state->pass->attachments[a].last_subpass_idx != subpass_id)
-			continue;
+      if (state->pass->attachments[a].last_subpass_idx != subpass_id)
+         continue;
 
-		VkImageLayout layout = state->pass->attachments[a].final_layout;
-		VkImageLayout stencil_layout = state->pass->attachments[a].stencil_final_layout;
-		struct radv_subpass_attachment att = { a, layout, stencil_layout };
-		radv_handle_subpass_image_transition(cmd_buffer, att, false);
-	}
+      VkImageLayout layout = state->pass->attachments[a].final_layout;
+      VkImageLayout stencil_layout = state->pass->attachments[a].stencil_final_layout;
+      struct radv_subpass_attachment att = {a, layout, stencil_layout};
+      radv_handle_subpass_image_transition(cmd_buffer, att, false);
+   }
 
-	radv_describe_barrier_end(cmd_buffer);
+   radv_describe_barrier_end(cmd_buffer);
 }
 
 void
 radv_cmd_buffer_begin_render_pass(struct radv_cmd_buffer *cmd_buffer,
-				  const VkRenderPassBeginInfo *pRenderPassBegin,
-				  const struct radv_extra_render_pass_begin_info *extra_info)
+                                  const VkRenderPassBeginInfo *pRenderPassBegin,
+                                  const struct radv_extra_render_pass_begin_info *extra_info)
 {
-	RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass);
-	RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
-	VkResult result;
+   RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass);
+   RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
+   VkResult result;
 
-	cmd_buffer->state.framebuffer = framebuffer;
-	cmd_buffer->state.pass = pass;
-	cmd_buffer->state.render_area = pRenderPassBegin->renderArea;
+   cmd_buffer->state.framebuffer = framebuffer;
+   cmd_buffer->state.pass = pass;
+   cmd_buffer->state.render_area = pRenderPassBegin->renderArea;
 
-	result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin, extra_info);
-	if (result != VK_SUCCESS)
-		return;
+   result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin, extra_info);
+   if (result != VK_SUCCESS)
+      return;
 
-	result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin);
-	if (result != VK_SUCCESS)
-		return;
+   result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin);
+   if (result != VK_SUCCESS)
+      return;
 }
 
-void radv_CmdBeginRenderPass2(
-    VkCommandBuffer                             commandBuffer,
-    const VkRenderPassBeginInfo*                pRenderPassBeginInfo,
-    const VkSubpassBeginInfo*                   pSubpassBeginInfo)
+void
+radv_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
+                         const VkRenderPassBeginInfo *pRenderPassBeginInfo,
+                         const VkSubpassBeginInfo *pSubpassBeginInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 
-	radv_cmd_buffer_begin_render_pass(cmd_buffer, pRenderPassBeginInfo, NULL);
+   radv_cmd_buffer_begin_render_pass(cmd_buffer, pRenderPassBeginInfo, NULL);
 
-	radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
+   radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
 }
 
-void radv_CmdNextSubpass2(
-    VkCommandBuffer                             commandBuffer,
-    const VkSubpassBeginInfo*                   pSubpassBeginInfo,
-    const VkSubpassEndInfo*                     pSubpassEndInfo)
+void
+radv_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo,
+                     const VkSubpassEndInfo *pSubpassEndInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 
-	uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer);
-	radv_cmd_buffer_end_subpass(cmd_buffer);
-	radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
+   uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer);
+   radv_cmd_buffer_end_subpass(cmd_buffer);
+   radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
 }
 
-static void radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index)
-{
-	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
-	for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
-		if (!radv_get_shader(pipeline, stage))
-			continue;
-
-		struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_VIEW_INDEX);
-		if (loc->sgpr_idx == -1)
-			continue;
-		uint32_t base_reg = pipeline->user_data_0[stage];
-		radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
-
-	}
-	if (radv_pipeline_has_gs_copy_shader(pipeline)) {
-		struct radv_userdata_info *loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX];
-		if (loc->sgpr_idx != -1) {
-			uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
-			radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
-		}
-	}
+static void
+radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index)
+{
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
+      if (!radv_get_shader(pipeline, stage))
+         continue;
+
+      struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_VIEW_INDEX);
+      if (loc->sgpr_idx == -1)
+         continue;
+      uint32_t base_reg = pipeline->user_data_0[stage];
+      radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
+   }
+   if (radv_pipeline_has_gs_copy_shader(pipeline)) {
+      struct radv_userdata_info *loc =
+         &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX];
+      if (loc->sgpr_idx != -1) {
+         uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
+         radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
+      }
+   }
 }
 
 static void
-radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer,
-                         uint32_t vertex_count,
-			 uint32_t use_opaque)
+radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count,
+                         uint32_t use_opaque)
 {
-	radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating));
-	radeon_emit(cmd_buffer->cs, vertex_count);
-	radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating));
+   radeon_emit(cmd_buffer->cs, vertex_count);
+   radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
 }
 
 /**
@@ -5283,216 +4976,194 @@ radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer,
  * Hardware uses this information to return 0 for out-of-bounds reads.
  */
 static void
-radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer,
-                                 uint64_t index_va,
-                                 uint32_t max_index_count,
-                                 uint32_t index_count)
+radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t index_va,
+                                 uint32_t max_index_count, uint32_t index_count)
 {
-	radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating));
-	radeon_emit(cmd_buffer->cs, max_index_count);
-	radeon_emit(cmd_buffer->cs, index_va);
-	radeon_emit(cmd_buffer->cs, index_va >> 32);
-	radeon_emit(cmd_buffer->cs, index_count);
-	radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating));
+   radeon_emit(cmd_buffer->cs, max_index_count);
+   radeon_emit(cmd_buffer->cs, index_va);
+   radeon_emit(cmd_buffer->cs, index_va >> 32);
+   radeon_emit(cmd_buffer->cs, index_count);
+   radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
 }
 
 /* MUST inline this function to avoid massive perf loss in drawoverhead */
 ALWAYS_INLINE static void
-radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer,
-				  bool indexed,
-				  uint32_t draw_count,
-				  uint64_t count_va,
-				  uint32_t stride)
-{
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
-	bool draw_id_enable = cmd_buffer->state.pipeline->graphics.uses_drawid;
-	uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr;
-	uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0;
-	bool predicating = cmd_buffer->state.predicating;
-	assert(base_reg);
-
-	/* just reset draw state for vertex data */
-	cmd_buffer->state.last_first_instance = -1;
-	cmd_buffer->state.last_num_instances = -1;
-	cmd_buffer->state.last_drawid = -1;
-	cmd_buffer->state.last_vertex_offset = -1;
-
-	vertex_offset_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
-	if (cmd_buffer->state.pipeline->graphics.uses_baseinstance)
-		start_instance_reg = ((base_reg + (draw_id_enable ? 8 : 4)) - SI_SH_REG_OFFSET) >> 2;
-	if (draw_id_enable)
-		draw_id_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2;
-
-	if (draw_count == 1 && !count_va && !draw_id_enable) {
-		radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT :
-					       PKT3_DRAW_INDIRECT, 3, predicating));
-		radeon_emit(cs, 0);
-		radeon_emit(cs, vertex_offset_reg);
-		radeon_emit(cs, start_instance_reg);
-		radeon_emit(cs, di_src_sel);
-	} else {
-		radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
-					       PKT3_DRAW_INDIRECT_MULTI,
-				8, predicating));
-		radeon_emit(cs, 0);
-		radeon_emit(cs, vertex_offset_reg);
-		radeon_emit(cs, start_instance_reg);
-		radeon_emit(cs, draw_id_reg |
-			    S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
-			    S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
-		radeon_emit(cs, draw_count); /* count */
-		radeon_emit(cs, count_va); /* count_addr */
-		radeon_emit(cs, count_va >> 32);
-		radeon_emit(cs, stride); /* stride */
-		radeon_emit(cs, di_src_sel);
-	}
+radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed,
+                                  uint32_t draw_count, uint64_t count_va, uint32_t stride)
+{
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
+   bool draw_id_enable = cmd_buffer->state.pipeline->graphics.uses_drawid;
+   uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr;
+   uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0;
+   bool predicating = cmd_buffer->state.predicating;
+   assert(base_reg);
+
+   /* just reset draw state for vertex data */
+   cmd_buffer->state.last_first_instance = -1;
+   cmd_buffer->state.last_num_instances = -1;
+   cmd_buffer->state.last_drawid = -1;
+   cmd_buffer->state.last_vertex_offset = -1;
+
+   vertex_offset_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
+   if (cmd_buffer->state.pipeline->graphics.uses_baseinstance)
+      start_instance_reg = ((base_reg + (draw_id_enable ? 8 : 4)) - SI_SH_REG_OFFSET) >> 2;
+   if (draw_id_enable)
+      draw_id_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2;
+
+   if (draw_count == 1 && !count_va && !draw_id_enable) {
+      radeon_emit(cs,
+                  PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating));
+      radeon_emit(cs, 0);
+      radeon_emit(cs, vertex_offset_reg);
+      radeon_emit(cs, start_instance_reg);
+      radeon_emit(cs, di_src_sel);
+   } else {
+      radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : PKT3_DRAW_INDIRECT_MULTI, 8,
+                           predicating));
+      radeon_emit(cs, 0);
+      radeon_emit(cs, vertex_offset_reg);
+      radeon_emit(cs, start_instance_reg);
+      radeon_emit(cs, draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
+                         S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
+      radeon_emit(cs, draw_count); /* count */
+      radeon_emit(cs, count_va);   /* count_addr */
+      radeon_emit(cs, count_va >> 32);
+      radeon_emit(cs, stride); /* stride */
+      radeon_emit(cs, di_src_sel);
+   }
 }
 
 static inline void
 radv_emit_userdata_vertex_internal(struct radv_cmd_buffer *cmd_buffer,
-				   const struct radv_draw_info *info,
-				   const uint32_t vertex_offset)
-{
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance;
-	const bool uses_drawid = state->pipeline->graphics.uses_drawid;
-	radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr,
-			      state->pipeline->graphics.vtx_emit_num);
-
-	radeon_emit(cs, vertex_offset);
-	state->last_vertex_offset = vertex_offset;
-	if (uses_drawid) {
-		radeon_emit(cs, 0);
-		state->last_drawid = 0;
-	}
-	if (uses_baseinstance) {
-		radeon_emit(cs, info->first_instance);
-		state->last_first_instance = info->first_instance;
-	}
+                                   const struct radv_draw_info *info, const uint32_t vertex_offset)
+{
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance;
+   const bool uses_drawid = state->pipeline->graphics.uses_drawid;
+   radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr,
+                         state->pipeline->graphics.vtx_emit_num);
+
+   radeon_emit(cs, vertex_offset);
+   state->last_vertex_offset = vertex_offset;
+   if (uses_drawid) {
+      radeon_emit(cs, 0);
+      state->last_drawid = 0;
+   }
+   if (uses_baseinstance) {
+      radeon_emit(cs, info->first_instance);
+      state->last_first_instance = info->first_instance;
+   }
 }
 
 static inline void
-radv_emit_userdata_vertex(struct radv_cmd_buffer *cmd_buffer,
-			  const struct radv_draw_info *info,
-			  const uint32_t vertex_offset)
-{
-	const struct radv_cmd_state *state = &cmd_buffer->state;
-	const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance;
-	const bool uses_drawid = state->pipeline->graphics.uses_drawid;
-
-        /* this looks very dumb, but it allows the compiler to optimize better and yields
-         * ~3-4% perf increase in drawoverhead
-         */
-	if (vertex_offset != state->last_vertex_offset) {
-		radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
-	} else if (uses_drawid && 0 != state->last_drawid) {
-		radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
-	} else if (uses_baseinstance && info->first_instance != state->last_first_instance) {
-		radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
-	}
+radv_emit_userdata_vertex(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
+                          const uint32_t vertex_offset)
+{
+   const struct radv_cmd_state *state = &cmd_buffer->state;
+   const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance;
+   const bool uses_drawid = state->pipeline->graphics.uses_drawid;
+
+   /* this looks very dumb, but it allows the compiler to optimize better and yields
+    * ~3-4% perf increase in drawoverhead
+    */
+   if (vertex_offset != state->last_vertex_offset) {
+      radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
+   } else if (uses_drawid && 0 != state->last_drawid) {
+      radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
+   } else if (uses_baseinstance && info->first_instance != state->last_first_instance) {
+      radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
+   }
 }
 
 ALWAYS_INLINE static void
 radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer,
-			       const struct radv_draw_info *info,
-			       uint32_t count,
-			       uint32_t first_index)
-{
-	const struct radv_cmd_state *state = &cmd_buffer->state;
-	const int index_size = radv_get_vgt_index_size(state->index_type);
-	uint64_t index_va;
-
-	uint32_t remaining_indexes = cmd_buffer->state.max_index_count;
-	remaining_indexes = MAX2(remaining_indexes, info->first_index) - info->first_index;
-
-	/* Skip draw calls with 0-sized index buffers if the GPU can't handle them */
-	if (!remaining_indexes &&
-	    cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
-		return;
-
-	index_va = state->index_va;
-	index_va += first_index * index_size;
-
-	if (!state->subpass->view_mask) {
-		radv_cs_emit_draw_indexed_packet(cmd_buffer,
-						 index_va,
-						 remaining_indexes,
-						 count);
-	} else {
-		u_foreach_bit(i, state->subpass->view_mask) {
-			radv_emit_view_index(cmd_buffer, i);
-
-			radv_cs_emit_draw_indexed_packet(cmd_buffer,
-							 index_va,
-							 remaining_indexes,
-							 count);
-		}
-	}
+                               const struct radv_draw_info *info, uint32_t count,
+                               uint32_t first_index)
+{
+   const struct radv_cmd_state *state = &cmd_buffer->state;
+   const int index_size = radv_get_vgt_index_size(state->index_type);
+   uint64_t index_va;
+
+   uint32_t remaining_indexes = cmd_buffer->state.max_index_count;
+   remaining_indexes = MAX2(remaining_indexes, info->first_index) - info->first_index;
+
+   /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */
+   if (!remaining_indexes &&
+       cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
+      return;
+
+   index_va = state->index_va;
+   index_va += first_index * index_size;
+
+   if (!state->subpass->view_mask) {
+      radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, count);
+   } else {
+      u_foreach_bit(i, state->subpass->view_mask)
+      {
+         radv_emit_view_index(cmd_buffer, i);
+
+         radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, count);
+      }
+   }
 }
 
 ALWAYS_INLINE static void
-radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer,
-			      const struct radv_draw_info *info,
-			      uint32_t count,
-			      uint32_t use_opaque)
-{
-	const struct radv_cmd_state *state = &cmd_buffer->state;
-	if (!state->subpass->view_mask) {
-		radv_cs_emit_draw_packet(cmd_buffer,
-					 count,
-					 use_opaque);
-	} else {
-		u_foreach_bit(i, state->subpass->view_mask) {
-			radv_emit_view_index(cmd_buffer, i);
-
-			radv_cs_emit_draw_packet(cmd_buffer,
-						 count,
-						 use_opaque);
-		}
-	}
+radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
+                              uint32_t count, uint32_t use_opaque)
+{
+   const struct radv_cmd_state *state = &cmd_buffer->state;
+   if (!state->subpass->view_mask) {
+      radv_cs_emit_draw_packet(cmd_buffer, count, use_opaque);
+   } else {
+      u_foreach_bit(i, state->subpass->view_mask)
+      {
+         radv_emit_view_index(cmd_buffer, i);
+
+         radv_cs_emit_draw_packet(cmd_buffer, count, use_opaque);
+      }
+   }
 }
 
 static void
 radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer,
-				const struct radv_draw_info *info)
-{
-	const struct radv_cmd_state *state = &cmd_buffer->state;
-	struct radeon_winsys *ws = cmd_buffer->device->ws;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-        const uint64_t va = radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
-        const uint64_t count_va = info->count_buffer ? radv_buffer_get_va(info->count_buffer->bo) + info->count_buffer->offset +
-                            info->count_buffer_offset : 0;
-
-        radv_cs_add_buffer(ws, cs, info->indirect->bo);
-
-        radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
-        radeon_emit(cs, 1);
-        radeon_emit(cs, va);
-        radeon_emit(cs, va >> 32);
-
-        if (info->count_buffer) {
-                radv_cs_add_buffer(ws, cs, info->count_buffer->bo);
-        }
-
-        if (!state->subpass->view_mask) {
-                radv_cs_emit_indirect_draw_packet(cmd_buffer,
-                                                  info->indexed,
-                                                  info->count,
-                                                  count_va,
-                                                  info->stride);
-        } else {
-                u_foreach_bit(i, state->subpass->view_mask) {
-                        radv_emit_view_index(cmd_buffer, i);
-
-                        radv_cs_emit_indirect_draw_packet(cmd_buffer,
-                                                          info->indexed,
-                                                          info->count,
-                                                          count_va,
-                                                          info->stride);
-                }
-        }
+                                const struct radv_draw_info *info)
+{
+   const struct radv_cmd_state *state = &cmd_buffer->state;
+   struct radeon_winsys *ws = cmd_buffer->device->ws;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   const uint64_t va =
+      radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
+   const uint64_t count_va = info->count_buffer
+                                ? radv_buffer_get_va(info->count_buffer->bo) +
+                                     info->count_buffer->offset + info->count_buffer_offset
+                                : 0;
+
+   radv_cs_add_buffer(ws, cs, info->indirect->bo);
+
+   radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
+   radeon_emit(cs, 1);
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+
+   if (info->count_buffer) {
+      radv_cs_add_buffer(ws, cs, info->count_buffer->bo);
+   }
+
+   if (!state->subpass->view_mask) {
+      radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va,
+                                        info->stride);
+   } else {
+      u_foreach_bit(i, state->subpass->view_mask)
+      {
+         radv_emit_view_index(cmd_buffer, i);
+
+         radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va,
+                                           info->stride);
+      }
+   }
 }
 
 /*
@@ -5511,671 +5182,602 @@ radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer,
  * return true on affected HW if radv_emit_all_graphics_states sets
  * any context registers.
  */
-static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
-                                            const struct radv_draw_info *info)
+static bool
+radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
+                                const struct radv_draw_info *info)
 {
-	struct radv_cmd_state *state = &cmd_buffer->state;
+   struct radv_cmd_state *state = &cmd_buffer->state;
 
-	if (!cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
-		return false;
+   if (!cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
+      return false;
 
-	if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer)
-		return true;
+   if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer)
+      return true;
 
-	uint64_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
+   uint64_t used_states =
+      cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
 
-	/* Index, vertex and streamout buffers don't change context regs, and
-	 * pipeline is already handled.
-	 */
-	used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER |
-			 RADV_CMD_DIRTY_VERTEX_BUFFER |
-			 RADV_CMD_DIRTY_STREAMOUT_BUFFER |
-			 RADV_CMD_DIRTY_PIPELINE);
+   /* Index, vertex and streamout buffers don't change context regs, and
+    * pipeline is already handled.
+    */
+   used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER |
+                    RADV_CMD_DIRTY_STREAMOUT_BUFFER | RADV_CMD_DIRTY_PIPELINE);
 
-	if (cmd_buffer->state.dirty & used_states)
-		return true;
+   if (cmd_buffer->state.dirty & used_states)
+      return true;
 
-	uint32_t primitive_reset_index =
-		radv_get_primitive_reset_index(cmd_buffer);
+   uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer);
 
-	if (info->indexed && state->pipeline->graphics.prim_restart_enable &&
-	    primitive_reset_index != state->last_primitive_reset_index)
-		return true;
+   if (info->indexed && state->pipeline->graphics.prim_restart_enable &&
+       primitive_reset_index != state->last_primitive_reset_index)
+      return true;
 
-	return false;
+   return false;
 }
 
 static void
-radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
-			      const struct radv_draw_info *info)
+radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info)
 {
-	bool late_scissor_emission;
+   bool late_scissor_emission;
 
-	if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
-	    cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
-		radv_emit_rbplus_state(cmd_buffer);
+   if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
+       cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
+      radv_emit_rbplus_state(cmd_buffer);
 
-	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
-		radv_emit_graphics_pipeline(cmd_buffer);
+   if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
+      radv_emit_graphics_pipeline(cmd_buffer);
 
-	/* This should be before the cmd_buffer->state.dirty is cleared
-	 * (excluding RADV_CMD_DIRTY_PIPELINE) and after
-	 * cmd_buffer->state.context_roll_without_scissor_emitted is set. */
-	late_scissor_emission =
-		radv_need_late_scissor_emission(cmd_buffer, info);
+   /* This should be before the cmd_buffer->state.dirty is cleared
+    * (excluding RADV_CMD_DIRTY_PIPELINE) and after
+    * cmd_buffer->state.context_roll_without_scissor_emitted is set. */
+   late_scissor_emission = radv_need_late_scissor_emission(cmd_buffer, info);
 
-	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
-		radv_emit_framebuffer_state(cmd_buffer);
+   if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
+      radv_emit_framebuffer_state(cmd_buffer);
 
-	if (info->indexed) {
-		if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER)
-			radv_emit_index_buffer(cmd_buffer, info->indirect);
-	} else {
-		/* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE,
-		 * so the state must be re-emitted before the next indexed
-		 * draw.
-		 */
-		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
-			cmd_buffer->state.last_index_type = -1;
-			cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
-		}
-	}
+   if (info->indexed) {
+      if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER)
+         radv_emit_index_buffer(cmd_buffer, info->indirect);
+   } else {
+      /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE,
+       * so the state must be re-emitted before the next indexed
+       * draw.
+       */
+      if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+         cmd_buffer->state.last_index_type = -1;
+         cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
+      }
+   }
 
-	radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
+   radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
 
-	radv_emit_draw_registers(cmd_buffer, info);
+   radv_emit_draw_registers(cmd_buffer, info);
 
-	if (late_scissor_emission)
-		radv_emit_scissor(cmd_buffer);
+   if (late_scissor_emission)
+      radv_emit_scissor(cmd_buffer);
 }
 
 /* MUST inline this function to avoid massive perf loss in drawoverhead */
 ALWAYS_INLINE static bool
-radv_before_draw(struct radv_cmd_buffer *cmd_buffer,
-	      const struct radv_draw_info *info,
-	      uint32_t vertex_offset)
-{
-	const bool has_prefetch =
-		cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
-	const bool pipeline_is_dirty =
-		(cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) &&
-		cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline;
-
-	ASSERTED const unsigned cdw_max =
-		radeon_check_space(cmd_buffer->device->ws,
-				   cmd_buffer->cs, 4096);
-
-	if (likely(!info->indirect)) {
-		/* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is
-		 * no workaround for indirect draws, but we can at least skip
-		 * direct draws.
-		 */
-		if (unlikely(!info->instance_count))
-			return false;
-
-		/* Handle count == 0. */
-		if (unlikely(!info->count && !info->strmout_buffer))
-			return false;
-	}
-
-	/* Need to apply this workaround early as it can set flush flags. */
-	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
-		radv_emit_fb_mip_change_flush(cmd_buffer);
-
-	/* Use optimal packet order based on whether we need to sync the
-	 * pipeline.
-	 */
-	if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-					    RADV_CMD_FLAG_FLUSH_AND_INV_DB |
-					    RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-					    RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
-		/* If we have to wait for idle, set all states first, so that
-		 * all SET packets are processed in parallel with previous draw
-		 * calls. Then upload descriptors, set shader pointers, and
-		 * draw, and prefetch at the end. This ensures that the time
-		 * the CUs are idle is very short. (there are only SET_SH
-		 * packets between the wait and the draw)
-		 */
-		radv_emit_all_graphics_states(cmd_buffer, info);
-		si_emit_cache_flush(cmd_buffer);
-		/* <-- CUs are idle here --> */
-
-		radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty);
-	} else {
-		/* If we don't wait for idle, start prefetches first, then set
-		 * states, and draw at the end.
-		 */
-		si_emit_cache_flush(cmd_buffer);
-
-		if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) {
-			/* Only prefetch the vertex shader and VBO descriptors
-			 * in order to start the draw as soon as possible.
-			 */
-			radv_emit_prefetch_L2(cmd_buffer,
-					      cmd_buffer->state.pipeline, true);
-		}
-
-		radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty);
-
-		radv_emit_all_graphics_states(cmd_buffer, info);
-	}
-
-	radv_describe_draw(cmd_buffer);
-	if (likely(!info->indirect)) {
-		struct radv_cmd_state *state = &cmd_buffer->state;
-		struct radeon_cmdbuf *cs = cmd_buffer->cs;
-		assert(state->pipeline->graphics.vtx_base_sgpr);
-		if (state->last_num_instances != info->instance_count) {
-			radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false));
-			radeon_emit(cs, info->instance_count);
-			state->last_num_instances = info->instance_count;
-		}
-		radv_emit_userdata_vertex(cmd_buffer, info, vertex_offset);
-	}
-	assert(cmd_buffer->cs->cdw <= cdw_max);
-
-	return true;
+radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
+                 uint32_t vertex_offset)
+{
+   const bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
+   const bool pipeline_is_dirty = (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) &&
+                                  cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline;
+
+   ASSERTED const unsigned cdw_max =
+      radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096);
+
+   if (likely(!info->indirect)) {
+      /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is
+       * no workaround for indirect draws, but we can at least skip
+       * direct draws.
+       */
+      if (unlikely(!info->instance_count))
+         return false;
+
+      /* Handle count == 0. */
+      if (unlikely(!info->count && !info->strmout_buffer))
+         return false;
+   }
+
+   /* Need to apply this workaround early as it can set flush flags. */
+   if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
+      radv_emit_fb_mip_change_flush(cmd_buffer);
+
+   /* Use optimal packet order based on whether we need to sync the
+    * pipeline.
+    */
+   if (cmd_buffer->state.flush_bits &
+       (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+        RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
+      /* If we have to wait for idle, set all states first, so that
+       * all SET packets are processed in parallel with previous draw
+       * calls. Then upload descriptors, set shader pointers, and
+       * draw, and prefetch at the end. This ensures that the time
+       * the CUs are idle is very short. (there are only SET_SH
+       * packets between the wait and the draw)
+       */
+      radv_emit_all_graphics_states(cmd_buffer, info);
+      si_emit_cache_flush(cmd_buffer);
+      /* <-- CUs are idle here --> */
+
+      radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty);
+   } else {
+      /* If we don't wait for idle, start prefetches first, then set
+       * states, and draw at the end.
+       */
+      si_emit_cache_flush(cmd_buffer);
+
+      if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) {
+         /* Only prefetch the vertex shader and VBO descriptors
+          * in order to start the draw as soon as possible.
+          */
+         radv_emit_prefetch_L2(cmd_buffer, cmd_buffer->state.pipeline, true);
+      }
+
+      radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty);
+
+      radv_emit_all_graphics_states(cmd_buffer, info);
+   }
+
+   radv_describe_draw(cmd_buffer);
+   if (likely(!info->indirect)) {
+      struct radv_cmd_state *state = &cmd_buffer->state;
+      struct radeon_cmdbuf *cs = cmd_buffer->cs;
+      assert(state->pipeline->graphics.vtx_base_sgpr);
+      if (state->last_num_instances != info->instance_count) {
+         radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false));
+         radeon_emit(cs, info->instance_count);
+         state->last_num_instances = info->instance_count;
+      }
+      radv_emit_userdata_vertex(cmd_buffer, info, vertex_offset);
+   }
+   assert(cmd_buffer->cs->cdw <= cdw_max);
+
+   return true;
 }
 
 static void
 radv_after_draw(struct radv_cmd_buffer *cmd_buffer)
 {
-	const struct radeon_info *rad_info =
-		&cmd_buffer->device->physical_device->rad_info;
-	bool has_prefetch =
-		cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
-	/* Start prefetches after the draw has been started. Both will
-	 * run in parallel, but starting the draw first is more
-	 * important.
-	 */
-	if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) {
-		radv_emit_prefetch_L2(cmd_buffer,
-				      cmd_buffer->state.pipeline, false);
-	}
-
-	/* Workaround for a VGT hang when streamout is enabled.
-	 * It must be done after drawing.
-	 */
-	if (cmd_buffer->state.streamout.streamout_enabled &&
-	    (rad_info->family == CHIP_HAWAII ||
-	     rad_info->family == CHIP_TONGA ||
-	     rad_info->family == CHIP_FIJI)) {
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC;
-	}
-
-	radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH);
-}
-
-void radv_CmdDraw(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    vertexCount,
-	uint32_t                                    instanceCount,
-	uint32_t                                    firstVertex,
-	uint32_t                                    firstInstance)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_draw_info info;
-
-	info.count = vertexCount;
-	info.instance_count = instanceCount;
-	info.first_instance = firstInstance;
-	info.strmout_buffer = NULL;
-	info.indirect = NULL;
-	info.indexed = false;
-
-	if (!radv_before_draw(cmd_buffer, &info, firstVertex))
-	   return;
-	radv_emit_direct_draw_packets(cmd_buffer, &info,
-				      vertexCount, 0);
-	radv_after_draw(cmd_buffer);
-}
-
-void radv_CmdDrawIndexed(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    indexCount,
-	uint32_t                                    instanceCount,
-	uint32_t                                    firstIndex,
-	int32_t                                     vertexOffset,
-	uint32_t                                    firstInstance)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_draw_info info;
-
-	info.indexed = true;
-	info.count = indexCount;
-	info.instance_count = instanceCount;
-	info.first_index = firstIndex;
-	info.first_instance = firstInstance;
-	info.strmout_buffer = NULL;
-	info.indirect = NULL;
-
-	if (!radv_before_draw(cmd_buffer, &info, vertexOffset))
-	   return;
-	radv_emit_draw_packets_indexed(cmd_buffer, &info,
-				      indexCount, firstIndex);
-	radv_after_draw(cmd_buffer);
-}
-
-void radv_CmdDrawIndirect(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    _buffer,
-	VkDeviceSize                                offset,
-	uint32_t                                    drawCount,
-	uint32_t                                    stride)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
-	struct radv_draw_info info;
-
-	info.count = drawCount;
-	info.indirect = buffer;
-	info.indirect_offset = offset;
-	info.stride = stride;
-	info.strmout_buffer = NULL;
-	info.count_buffer = NULL;
-	info.indexed = false;
-
-	if (!radv_before_draw(cmd_buffer, &info, 0))
-	   return;
-	radv_emit_indirect_draw_packets(cmd_buffer, &info);
-	radv_after_draw(cmd_buffer);
-}
-
-void radv_CmdDrawIndexedIndirect(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    _buffer,
-	VkDeviceSize                                offset,
-	uint32_t                                    drawCount,
-	uint32_t                                    stride)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
-	struct radv_draw_info info;
-
-	info.indexed = true;
-	info.count = drawCount;
-	info.indirect = buffer;
-	info.indirect_offset = offset;
-	info.stride = stride;
-        info.count_buffer = NULL;
-	info.strmout_buffer = NULL;
-
-	if (!radv_before_draw(cmd_buffer, &info, 0))
-	   return;
-	radv_emit_indirect_draw_packets(cmd_buffer, &info);
-	radv_after_draw(cmd_buffer);
-}
-
-void radv_CmdDrawIndirectCount(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    _buffer,
-	VkDeviceSize                                offset,
-	VkBuffer                                    _countBuffer,
-	VkDeviceSize                                countBufferOffset,
-	uint32_t                                    maxDrawCount,
-	uint32_t                                    stride)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
-	RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
-	struct radv_draw_info info;
-
-	info.count = maxDrawCount;
-	info.indirect = buffer;
-	info.indirect_offset = offset;
-	info.count_buffer = count_buffer;
-	info.count_buffer_offset = countBufferOffset;
-	info.stride = stride;
-	info.strmout_buffer = NULL;
-	info.indexed = false;
-
-	if (!radv_before_draw(cmd_buffer, &info, 0))
-	   return;
-	radv_emit_indirect_draw_packets(cmd_buffer, &info);
-	radv_after_draw(cmd_buffer);
-}
-
-void radv_CmdDrawIndexedIndirectCount(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    _buffer,
-	VkDeviceSize                                offset,
-	VkBuffer                                    _countBuffer,
-	VkDeviceSize                                countBufferOffset,
-	uint32_t                                    maxDrawCount,
-	uint32_t                                    stride)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
-	RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
-	struct radv_draw_info info;
-
-	info.indexed = true;
-	info.count = maxDrawCount;
-	info.indirect = buffer;
-	info.indirect_offset = offset;
-	info.count_buffer = count_buffer;
-	info.count_buffer_offset = countBufferOffset;
-	info.stride = stride;
-	info.strmout_buffer = NULL;
-
-	if (!radv_before_draw(cmd_buffer, &info, 0))
-	   return;
-	radv_emit_indirect_draw_packets(cmd_buffer, &info);
-	radv_after_draw(cmd_buffer);
+   const struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
+   bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
+   /* Start prefetches after the draw has been started. Both will
+    * run in parallel, but starting the draw first is more
+    * important.
+    */
+   if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) {
+      radv_emit_prefetch_L2(cmd_buffer, cmd_buffer->state.pipeline, false);
+   }
+
+   /* Workaround for a VGT hang when streamout is enabled.
+    * It must be done after drawing.
+    */
+   if (cmd_buffer->state.streamout.streamout_enabled &&
+       (rad_info->family == CHIP_HAWAII || rad_info->family == CHIP_TONGA ||
+        rad_info->family == CHIP_FIJI)) {
+      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC;
+   }
+
+   radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH);
 }
 
-struct radv_dispatch_info {
-	/**
-	 * Determine the layout of the grid (in block units) to be used.
-	 */
-	uint32_t blocks[3];
-
-	/**
-	 * A starting offset for the grid. If unaligned is set, the offset
-	 * must still be aligned.
-	 */
-	uint32_t offsets[3];
-	/**
-	 * Whether it's an unaligned compute dispatch.
-	 */
-	bool unaligned;
-
-	/**
-	 * Indirect compute parameters resource.
-	 */
-	struct radv_buffer *indirect;
-	uint64_t indirect_offset;
-};
+void
+radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
+             uint32_t firstVertex, uint32_t firstInstance)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_draw_info info;
 
-static void
-radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
-			   const struct radv_dispatch_info *info)
-{
-	struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
-	struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
-	unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator;
-	struct radeon_winsys *ws = cmd_buffer->device->ws;
-	bool predicating = cmd_buffer->state.predicating;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	struct radv_userdata_info *loc;
-
-	radv_describe_dispatch(cmd_buffer, info->blocks[0], info->blocks[1],
-	                       info->blocks[2]);
-
-	loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE,
-				    AC_UD_CS_GRID_SIZE);
-
-	ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25);
-
-	if (compute_shader->info.wave_size == 32) {
-		assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
-		dispatch_initiator |= S_00B800_CS_W32_EN(1);
-	}
-
-	if (info->indirect) {
-		uint64_t va = radv_buffer_get_va(info->indirect->bo);
-
-		va += info->indirect->offset + info->indirect_offset;
-
-		radv_cs_add_buffer(ws, cs, info->indirect->bo);
-
-		if (loc->sgpr_idx != -1) {
-			for (unsigned i = 0; i < 3; ++i) {
-				radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-				radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-						COPY_DATA_DST_SEL(COPY_DATA_REG));
-				radeon_emit(cs, (va +  4 * i));
-				radeon_emit(cs, (va + 4 * i) >> 32);
-				radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0
-						 + loc->sgpr_idx * 4) >> 2) + i);
-				radeon_emit(cs, 0);
-			}
-		}
-
-		if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
-			radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, predicating) |
-					PKT3_SHADER_TYPE_S(1));
-			radeon_emit(cs, va);
-			radeon_emit(cs, va >> 32);
-			radeon_emit(cs, dispatch_initiator);
-		} else {
-			radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) |
-					PKT3_SHADER_TYPE_S(1));
-			radeon_emit(cs, 1);
-			radeon_emit(cs, va);
-			radeon_emit(cs, va >> 32);
-
-			radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, predicating) |
-					PKT3_SHADER_TYPE_S(1));
-			radeon_emit(cs, 0);
-			radeon_emit(cs, dispatch_initiator);
-		}
-	} else {
-		unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] };
-		unsigned offsets[3] = { info->offsets[0], info->offsets[1], info->offsets[2] };
-
-		if (info->unaligned) {
-			unsigned *cs_block_size = compute_shader->info.cs.block_size;
-			unsigned remainder[3];
-
-			/* If aligned, these should be an entire block size,
-			 * not 0.
-			 */
-			remainder[0] = blocks[0] + cs_block_size[0] -
-				       align_u32_npot(blocks[0], cs_block_size[0]);
-			remainder[1] = blocks[1] + cs_block_size[1] -
-				       align_u32_npot(blocks[1], cs_block_size[1]);
-			remainder[2] = blocks[2] + cs_block_size[2] -
-				       align_u32_npot(blocks[2], cs_block_size[2]);
-
-			blocks[0] = round_up_u32(blocks[0], cs_block_size[0]);
-			blocks[1] = round_up_u32(blocks[1], cs_block_size[1]);
-			blocks[2] = round_up_u32(blocks[2], cs_block_size[2]);
-
-			for(unsigned i = 0; i < 3; ++i) {
-				assert(offsets[i] % cs_block_size[i] == 0);
-				offsets[i] /= cs_block_size[i];
-			}
-
-			radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
-			radeon_emit(cs,
-				    S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
-				    S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
-			radeon_emit(cs,
-				    S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) |
-				    S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
-			radeon_emit(cs,
-				    S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) |
-				    S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
-
-			dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
-		}
-
-		if (loc->sgpr_idx != -1) {
-			assert(loc->num_sgprs == 3);
-
-			radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
-						  loc->sgpr_idx * 4, 3);
-			radeon_emit(cs, blocks[0]);
-			radeon_emit(cs, blocks[1]);
-			radeon_emit(cs, blocks[2]);
-		}
-
-		if (offsets[0] || offsets[1] || offsets[2]) {
-			radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
-			radeon_emit(cs, offsets[0]);
-			radeon_emit(cs, offsets[1]);
-			radeon_emit(cs, offsets[2]);
-
-			/* The blocks in the packet are not counts but end values. */
-			for (unsigned i = 0; i < 3; ++i)
-				blocks[i] += offsets[i];
-		} else {
-			dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
-		}
-
-		radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) |
-				PKT3_SHADER_TYPE_S(1));
-		radeon_emit(cs, blocks[0]);
-		radeon_emit(cs, blocks[1]);
-		radeon_emit(cs, blocks[2]);
-		radeon_emit(cs, dispatch_initiator);
-	}
-
-	assert(cmd_buffer->cs->cdw <= cdw_max);
+   info.count = vertexCount;
+   info.instance_count = instanceCount;
+   info.first_instance = firstInstance;
+   info.strmout_buffer = NULL;
+   info.indirect = NULL;
+   info.indexed = false;
+
+   if (!radv_before_draw(cmd_buffer, &info, firstVertex))
+      return;
+   radv_emit_direct_draw_packets(cmd_buffer, &info, vertexCount, 0);
+   radv_after_draw(cmd_buffer);
 }
 
-static void
-radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer)
+void
+radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
+                    uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
 {
-	radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
-	radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_draw_info info;
+
+   info.indexed = true;
+   info.count = indexCount;
+   info.instance_count = instanceCount;
+   info.first_index = firstIndex;
+   info.first_instance = firstInstance;
+   info.strmout_buffer = NULL;
+   info.indirect = NULL;
+
+   if (!radv_before_draw(cmd_buffer, &info, vertexOffset))
+      return;
+   radv_emit_draw_packets_indexed(cmd_buffer, &info, indexCount, firstIndex);
+   radv_after_draw(cmd_buffer);
 }
 
-static void
-radv_dispatch(struct radv_cmd_buffer *cmd_buffer,
-	      const struct radv_dispatch_info *info)
-{
-	struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
-	bool has_prefetch =
-		cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
-	bool pipeline_is_dirty = pipeline &&
-				 pipeline != cmd_buffer->state.emitted_compute_pipeline;
-
-	if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-					    RADV_CMD_FLAG_FLUSH_AND_INV_DB |
-					    RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-					    RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
-		/* If we have to wait for idle, set all states first, so that
-		 * all SET packets are processed in parallel with previous draw
-		 * calls. Then upload descriptors, set shader pointers, and
-		 * dispatch, and prefetch at the end. This ensures that the
-		 * time the CUs are idle is very short. (there are only SET_SH
-		 * packets between the wait and the draw)
-		 */
-		radv_emit_compute_pipeline(cmd_buffer);
-		si_emit_cache_flush(cmd_buffer);
-		/* <-- CUs are idle here --> */
+void
+radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
+                     uint32_t drawCount, uint32_t stride)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+   struct radv_draw_info info;
 
-		radv_upload_compute_shader_descriptors(cmd_buffer);
+   info.count = drawCount;
+   info.indirect = buffer;
+   info.indirect_offset = offset;
+   info.stride = stride;
+   info.strmout_buffer = NULL;
+   info.count_buffer = NULL;
+   info.indexed = false;
 
-		radv_emit_dispatch_packets(cmd_buffer, info);
-		/* <-- CUs are busy here --> */
+   if (!radv_before_draw(cmd_buffer, &info, 0))
+      return;
+   radv_emit_indirect_draw_packets(cmd_buffer, &info);
+   radv_after_draw(cmd_buffer);
+}
 
-		/* Start prefetches after the dispatch has been started. Both
-		 * will run in parallel, but starting the dispatch first is
-		 * more important.
-		 */
-		if (has_prefetch && pipeline_is_dirty) {
-			radv_emit_shader_prefetch(cmd_buffer,
-						  pipeline->shaders[MESA_SHADER_COMPUTE]);
-		}
-	} else {
-		/* If we don't wait for idle, start prefetches first, then set
-		 * states, and dispatch at the end.
-		 */
-		si_emit_cache_flush(cmd_buffer);
+void
+radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
+                            uint32_t drawCount, uint32_t stride)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+   struct radv_draw_info info;
 
-		if (has_prefetch && pipeline_is_dirty) {
-			radv_emit_shader_prefetch(cmd_buffer,
-						  pipeline->shaders[MESA_SHADER_COMPUTE]);
-		}
+   info.indexed = true;
+   info.count = drawCount;
+   info.indirect = buffer;
+   info.indirect_offset = offset;
+   info.stride = stride;
+   info.count_buffer = NULL;
+   info.strmout_buffer = NULL;
 
-		radv_upload_compute_shader_descriptors(cmd_buffer);
+   if (!radv_before_draw(cmd_buffer, &info, 0))
+      return;
+   radv_emit_indirect_draw_packets(cmd_buffer, &info);
+   radv_after_draw(cmd_buffer);
+}
 
-		radv_emit_compute_pipeline(cmd_buffer);
-		radv_emit_dispatch_packets(cmd_buffer, info);
-	}
+void
+radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
+                          VkBuffer _countBuffer, VkDeviceSize countBufferOffset,
+                          uint32_t maxDrawCount, uint32_t stride)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+   RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
+   struct radv_draw_info info;
+
+   info.count = maxDrawCount;
+   info.indirect = buffer;
+   info.indirect_offset = offset;
+   info.count_buffer = count_buffer;
+   info.count_buffer_offset = countBufferOffset;
+   info.stride = stride;
+   info.strmout_buffer = NULL;
+   info.indexed = false;
+
+   if (!radv_before_draw(cmd_buffer, &info, 0))
+      return;
+   radv_emit_indirect_draw_packets(cmd_buffer, &info);
+   radv_after_draw(cmd_buffer);
+}
 
-	radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
+void
+radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer,
+                                 VkDeviceSize offset, VkBuffer _countBuffer,
+                                 VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
+                                 uint32_t stride)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+   RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
+   struct radv_draw_info info;
+
+   info.indexed = true;
+   info.count = maxDrawCount;
+   info.indirect = buffer;
+   info.indirect_offset = offset;
+   info.count_buffer = count_buffer;
+   info.count_buffer_offset = countBufferOffset;
+   info.stride = stride;
+   info.strmout_buffer = NULL;
+
+   if (!radv_before_draw(cmd_buffer, &info, 0))
+      return;
+   radv_emit_indirect_draw_packets(cmd_buffer, &info);
+   radv_after_draw(cmd_buffer);
 }
 
-void radv_CmdDispatchBase(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    base_x,
-	uint32_t                                    base_y,
-	uint32_t                                    base_z,
-	uint32_t                                    x,
-	uint32_t                                    y,
-	uint32_t                                    z)
+struct radv_dispatch_info {
+   /**
+    * Determine the layout of the grid (in block units) to be used.
+    */
+   uint32_t blocks[3];
+
+   /**
+    * A starting offset for the grid. If unaligned is set, the offset
+    * must still be aligned.
+    */
+   uint32_t offsets[3];
+   /**
+    * Whether it's an unaligned compute dispatch.
+    */
+   bool unaligned;
+
+   /**
+    * Indirect compute parameters resource.
+    */
+   struct radv_buffer *indirect;
+   uint64_t indirect_offset;
+};
+
+static void
+radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
+                           const struct radv_dispatch_info *info)
+{
+   struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+   struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+   unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator;
+   struct radeon_winsys *ws = cmd_buffer->device->ws;
+   bool predicating = cmd_buffer->state.predicating;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct radv_userdata_info *loc;
+
+   radv_describe_dispatch(cmd_buffer, info->blocks[0], info->blocks[1], info->blocks[2]);
+
+   loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
+
+   ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25);
+
+   if (compute_shader->info.wave_size == 32) {
+      assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
+      dispatch_initiator |= S_00B800_CS_W32_EN(1);
+   }
+
+   if (info->indirect) {
+      uint64_t va = radv_buffer_get_va(info->indirect->bo);
+
+      va += info->indirect->offset + info->indirect_offset;
+
+      radv_cs_add_buffer(ws, cs, info->indirect->bo);
+
+      if (loc->sgpr_idx != -1) {
+         for (unsigned i = 0; i < 3; ++i) {
+            radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+            radeon_emit(cs,
+                        COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG));
+            radeon_emit(cs, (va + 4 * i));
+            radeon_emit(cs, (va + 4 * i) >> 32);
+            radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i);
+            radeon_emit(cs, 0);
+         }
+      }
+
+      if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
+         radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, predicating) | PKT3_SHADER_TYPE_S(1));
+         radeon_emit(cs, va);
+         radeon_emit(cs, va >> 32);
+         radeon_emit(cs, dispatch_initiator);
+      } else {
+         radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | PKT3_SHADER_TYPE_S(1));
+         radeon_emit(cs, 1);
+         radeon_emit(cs, va);
+         radeon_emit(cs, va >> 32);
+
+         radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, predicating) | PKT3_SHADER_TYPE_S(1));
+         radeon_emit(cs, 0);
+         radeon_emit(cs, dispatch_initiator);
+      }
+   } else {
+      unsigned blocks[3] = {info->blocks[0], info->blocks[1], info->blocks[2]};
+      unsigned offsets[3] = {info->offsets[0], info->offsets[1], info->offsets[2]};
+
+      if (info->unaligned) {
+         unsigned *cs_block_size = compute_shader->info.cs.block_size;
+         unsigned remainder[3];
+
+         /* If aligned, these should be an entire block size,
+          * not 0.
+          */
+         remainder[0] = blocks[0] + cs_block_size[0] - align_u32_npot(blocks[0], cs_block_size[0]);
+         remainder[1] = blocks[1] + cs_block_size[1] - align_u32_npot(blocks[1], cs_block_size[1]);
+         remainder[2] = blocks[2] + cs_block_size[2] - align_u32_npot(blocks[2], cs_block_size[2]);
+
+         blocks[0] = round_up_u32(blocks[0], cs_block_size[0]);
+         blocks[1] = round_up_u32(blocks[1], cs_block_size[1]);
+         blocks[2] = round_up_u32(blocks[2], cs_block_size[2]);
+
+         for (unsigned i = 0; i < 3; ++i) {
+            assert(offsets[i] % cs_block_size[i] == 0);
+            offsets[i] /= cs_block_size[i];
+         }
+
+         radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+         radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
+                            S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
+         radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) |
+                            S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
+         radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) |
+                            S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
+
+         dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
+      }
+
+      if (loc->sgpr_idx != -1) {
+         assert(loc->num_sgprs == 3);
+
+         radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
+         radeon_emit(cs, blocks[0]);
+         radeon_emit(cs, blocks[1]);
+         radeon_emit(cs, blocks[2]);
+      }
+
+      if (offsets[0] || offsets[1] || offsets[2]) {
+         radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
+         radeon_emit(cs, offsets[0]);
+         radeon_emit(cs, offsets[1]);
+         radeon_emit(cs, offsets[2]);
+
+         /* The blocks in the packet are not counts but end values. */
+         for (unsigned i = 0; i < 3; ++i)
+            blocks[i] += offsets[i];
+      } else {
+         dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
+      }
+
+      radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1));
+      radeon_emit(cs, blocks[0]);
+      radeon_emit(cs, blocks[1]);
+      radeon_emit(cs, blocks[2]);
+      radeon_emit(cs, dispatch_initiator);
+   }
+
+   assert(cmd_buffer->cs->cdw <= cdw_max);
+}
+
+static void
+radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_dispatch_info info = {0};
+   radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
+   radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
+}
+
+static void
+radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info)
+{
+   struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+   bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
+   bool pipeline_is_dirty = pipeline && pipeline != cmd_buffer->state.emitted_compute_pipeline;
+
+   if (cmd_buffer->state.flush_bits &
+       (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+        RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
+      /* If we have to wait for idle, set all states first, so that
+       * all SET packets are processed in parallel with previous draw
+       * calls. Then upload descriptors, set shader pointers, and
+       * dispatch, and prefetch at the end. This ensures that the
+       * time the CUs are idle is very short. (there are only SET_SH
+       * packets between the wait and the draw)
+       */
+      radv_emit_compute_pipeline(cmd_buffer);
+      si_emit_cache_flush(cmd_buffer);
+      /* <-- CUs are idle here --> */
+
+      radv_upload_compute_shader_descriptors(cmd_buffer);
+
+      radv_emit_dispatch_packets(cmd_buffer, info);
+      /* <-- CUs are busy here --> */
+
+      /* Start prefetches after the dispatch has been started. Both
+       * will run in parallel, but starting the dispatch first is
+       * more important.
+       */
+      if (has_prefetch && pipeline_is_dirty) {
+         radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_COMPUTE]);
+      }
+   } else {
+      /* If we don't wait for idle, start prefetches first, then set
+       * states, and dispatch at the end.
+       */
+      si_emit_cache_flush(cmd_buffer);
+
+      if (has_prefetch && pipeline_is_dirty) {
+         radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_COMPUTE]);
+      }
+
+      radv_upload_compute_shader_descriptors(cmd_buffer);
+
+      radv_emit_compute_pipeline(cmd_buffer);
+      radv_emit_dispatch_packets(cmd_buffer, info);
+   }
+
+   radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
+}
 
-	info.blocks[0] = x;
-	info.blocks[1] = y;
-	info.blocks[2] = z;
+void
+radv_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t base_x, uint32_t base_y,
+                     uint32_t base_z, uint32_t x, uint32_t y, uint32_t z)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_dispatch_info info = {0};
 
-	info.offsets[0] = base_x;
-	info.offsets[1] = base_y;
-	info.offsets[2] = base_z;
-	radv_dispatch(cmd_buffer, &info);
+   info.blocks[0] = x;
+   info.blocks[1] = y;
+   info.blocks[2] = z;
+
+   info.offsets[0] = base_x;
+   info.offsets[1] = base_y;
+   info.offsets[2] = base_z;
+   radv_dispatch(cmd_buffer, &info);
 }
 
-void radv_CmdDispatch(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    x,
-	uint32_t                                    y,
-	uint32_t                                    z)
+void
+radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
 {
-	radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
+   radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
 }
 
-void radv_CmdDispatchIndirect(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    _buffer,
-	VkDeviceSize                                offset)
+void
+radv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
-	struct radv_dispatch_info info = {0};
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+   struct radv_dispatch_info info = {0};
 
-	info.indirect = buffer;
-	info.indirect_offset = offset;
+   info.indirect = buffer;
+   info.indirect_offset = offset;
 
-	radv_dispatch(cmd_buffer, &info);
+   radv_dispatch(cmd_buffer, &info);
 }
 
-void radv_unaligned_dispatch(
-	struct radv_cmd_buffer                      *cmd_buffer,
-	uint32_t                                    x,
-	uint32_t                                    y,
-	uint32_t                                    z)
+void
+radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z)
 {
-	struct radv_dispatch_info info = {0};
+   struct radv_dispatch_info info = {0};
 
-	info.blocks[0] = x;
-	info.blocks[1] = y;
-	info.blocks[2] = z;
-	info.unaligned = 1;
+   info.blocks[0] = x;
+   info.blocks[1] = y;
+   info.blocks[2] = z;
+   info.unaligned = 1;
 
-	radv_dispatch(cmd_buffer, &info);
+   radv_dispatch(cmd_buffer, &info);
 }
 
 void
 radv_cmd_buffer_end_render_pass(struct radv_cmd_buffer *cmd_buffer)
 {
-	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
-	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
+   vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
+   vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
 
-	cmd_buffer->state.pass = NULL;
-	cmd_buffer->state.subpass = NULL;
-	cmd_buffer->state.attachments = NULL;
-	cmd_buffer->state.framebuffer = NULL;
-	cmd_buffer->state.subpass_sample_locs = NULL;
+   cmd_buffer->state.pass = NULL;
+   cmd_buffer->state.subpass = NULL;
+   cmd_buffer->state.attachments = NULL;
+   cmd_buffer->state.framebuffer = NULL;
+   cmd_buffer->state.subpass_sample_locs = NULL;
 }
 
-void radv_CmdEndRenderPass2(
-    VkCommandBuffer                             commandBuffer,
-    const VkSubpassEndInfo*                     pSubpassEndInfo)
+void
+radv_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 
-	radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier);
+   radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier);
 
-	radv_cmd_buffer_end_subpass(cmd_buffer);
+   radv_cmd_buffer_end_subpass(cmd_buffer);
 
-	radv_cmd_buffer_end_render_pass(cmd_buffer);
+   radv_cmd_buffer_end_render_pass(cmd_buffer);
 }
 
 /*
@@ -6185,1194 +5787,1073 @@ void radv_CmdEndRenderPass2(
  *   0xfffffff0: Clear depth to 1.0
  *   0x00000000: Clear depth to 0.0
  */
-static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
-                                  struct radv_image *image,
-                                  const VkImageSubresourceRange *range)
-{
-	VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
-	VkClearDepthStencilValue value = {0};
-	struct radv_barrier_data barrier = {0};
-
-	barrier.layout_transitions.init_mask_ram = 1;
-	radv_describe_layout_transition(cmd_buffer, &barrier);
-
-	/* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent
-	 * in considering previous rendering work for WAW hazards. */
-	state->flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image);
-
-	state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value);
-
-	if (vk_format_has_stencil(image->vk_format))
-		aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+static void
+radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                      const VkImageSubresourceRange *range)
+{
+   VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
+   VkClearDepthStencilValue value = {0};
+   struct radv_barrier_data barrier = {0};
+
+   barrier.layout_transitions.init_mask_ram = 1;
+   radv_describe_layout_transition(cmd_buffer, &barrier);
+
+   /* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent
+    * in considering previous rendering work for WAW hazards. */
+   state->flush_bits |=
+      radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image);
+
+   state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value);
+
+   if (vk_format_has_stencil(image->vk_format))
+      aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+
+   radv_set_ds_clear_metadata(cmd_buffer, image, range, value, aspects);
+
+   if (radv_image_is_tc_compat_htile(image)) {
+      /* Initialize the TC-compat metada value to 0 because by
+       * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
+       * need have to conditionally update its value when performing
+       * a fast depth clear.
+       */
+      radv_set_tc_compat_zrange_metadata(cmd_buffer, image, range, 0);
+   }
+}
+
+static void
+radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                                   VkImageLayout src_layout, bool src_render_loop,
+                                   VkImageLayout dst_layout, bool dst_render_loop,
+                                   unsigned src_queue_mask, unsigned dst_queue_mask,
+                                   const VkImageSubresourceRange *range,
+                                   struct radv_sample_locations_state *sample_locs)
+{
+   struct radv_device *device = cmd_buffer->device;
+
+   if (!radv_htile_enabled(image, range->baseMipLevel))
+      return;
+
+   if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+      radv_initialize_htile(cmd_buffer, image, range);
+   } else if (!radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop,
+                                               src_queue_mask) &&
+              radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop,
+                                              dst_queue_mask)) {
+      radv_initialize_htile(cmd_buffer, image, range);
+   } else if (radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop,
+                                              src_queue_mask) &&
+              !radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop,
+                                               dst_queue_mask)) {
+      cmd_buffer->state.flush_bits |=
+         RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+
+      radv_decompress_depth_stencil(cmd_buffer, image, range, sample_locs);
+
+      cmd_buffer->state.flush_bits |=
+         RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+   }
+}
+
+static uint32_t
+radv_init_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                const VkImageSubresourceRange *range)
+{
+   static const uint32_t cmask_clear_values[4] = {0xffffffff, 0xdddddddd, 0xeeeeeeee, 0xffffffff};
+   uint32_t log2_samples = util_logbase2(image->info.samples);
+   uint32_t value = cmask_clear_values[log2_samples];
+   struct radv_barrier_data barrier = {0};
+
+   barrier.layout_transitions.init_mask_ram = 1;
+   radv_describe_layout_transition(cmd_buffer, &barrier);
+
+   return radv_clear_cmask(cmd_buffer, image, range, value);
+}
+
+uint32_t
+radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                const VkImageSubresourceRange *range)
+{
+   static const uint32_t fmask_clear_values[4] = {0x00000000, 0x02020202, 0xE4E4E4E4, 0x76543210};
+   uint32_t log2_samples = util_logbase2(image->info.samples);
+   uint32_t value = fmask_clear_values[log2_samples];
+   struct radv_barrier_data barrier = {0};
+
+   barrier.layout_transitions.init_mask_ram = 1;
+   radv_describe_layout_transition(cmd_buffer, &barrier);
+
+   return radv_clear_fmask(cmd_buffer, image, range, value);
+}
+
+uint32_t
+radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+              const VkImageSubresourceRange *range, uint32_t value)
+{
+   struct radv_barrier_data barrier = {0};
+   uint32_t flush_bits = 0;
+   unsigned size = 0;
+
+   barrier.layout_transitions.init_mask_ram = 1;
+   radv_describe_layout_transition(cmd_buffer, &barrier);
+
+   flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value);
+
+   if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
+      /* When DCC is enabled with mipmaps, some levels might not
+       * support fast clears and we have to initialize them as "fully
+       * expanded".
+       */
+      /* Compute the size of all fast clearable DCC levels. */
+      for (unsigned i = 0; i < image->planes[0].surface.num_dcc_levels; i++) {
+         struct legacy_surf_level *surf_level = &image->planes[0].surface.u.legacy.level[i];
+         unsigned dcc_fast_clear_size =
+            surf_level->dcc_slice_fast_clear_size * image->info.array_size;
+
+         if (!dcc_fast_clear_size)
+            break;
 
-	radv_set_ds_clear_metadata(cmd_buffer, image, range, value, aspects);
-
-	if (radv_image_is_tc_compat_htile(image)) {
-		/* Initialize the TC-compat metada value to 0 because by
-		 * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
-		 * need have to conditionally update its value when performing
-		 * a fast depth clear.
-		 */
-		radv_set_tc_compat_zrange_metadata(cmd_buffer, image, range, 0);
-	}
-}
-
-static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
-					       struct radv_image *image,
-					       VkImageLayout src_layout,
-					       bool src_render_loop,
-					       VkImageLayout dst_layout,
-					       bool dst_render_loop,
-					       unsigned src_queue_mask,
-					       unsigned dst_queue_mask,
-					       const VkImageSubresourceRange *range,
-					       struct radv_sample_locations_state *sample_locs)
-{
-	struct radv_device *device = cmd_buffer->device;
-
-	if (!radv_htile_enabled(image, range->baseMipLevel))
-		return;
-
-	if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
-		radv_initialize_htile(cmd_buffer, image, range);
-	} else if (!radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop, src_queue_mask) &&
-	           radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop, dst_queue_mask)) {
-		radv_initialize_htile(cmd_buffer, image, range);
-	} else if (radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop, src_queue_mask) &&
-	           !radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop, dst_queue_mask)) {
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
-		                                RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
-
-		radv_decompress_depth_stencil(cmd_buffer, image, range,
-					      sample_locs);
-
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
-		                                RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
-	}
-}
-
-static uint32_t radv_init_cmask(struct radv_cmd_buffer *cmd_buffer,
-				struct radv_image *image,
-				const VkImageSubresourceRange *range)
-{
-	static const uint32_t cmask_clear_values[4] = {
-		0xffffffff,
-		0xdddddddd,
-		0xeeeeeeee,
-		0xffffffff
-	};
-	uint32_t log2_samples = util_logbase2(image->info.samples);
-	uint32_t value = cmask_clear_values[log2_samples];
-	struct radv_barrier_data barrier = {0};
-
-	barrier.layout_transitions.init_mask_ram = 1;
-	radv_describe_layout_transition(cmd_buffer, &barrier);
-
-	return radv_clear_cmask(cmd_buffer, image, range, value);
-}
-
-uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer,
-			 struct radv_image *image,
-			 const VkImageSubresourceRange *range)
-{
-	static const uint32_t fmask_clear_values[4] = {
-		0x00000000,
-		0x02020202,
-		0xE4E4E4E4,
-		0x76543210
-	};
-	uint32_t log2_samples = util_logbase2(image->info.samples);
-	uint32_t value = fmask_clear_values[log2_samples];
-	struct radv_barrier_data barrier = {0};
-
-	barrier.layout_transitions.init_mask_ram = 1;
-	radv_describe_layout_transition(cmd_buffer, &barrier);
-
-	return radv_clear_fmask(cmd_buffer, image, range, value);
-}
-
-uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer,
-		       struct radv_image *image,
-		       const VkImageSubresourceRange *range,
-		       uint32_t value)
-{
-	struct radv_barrier_data barrier = {0};
-	uint32_t flush_bits = 0;
-	unsigned size = 0;
-
-	barrier.layout_transitions.init_mask_ram = 1;
-	radv_describe_layout_transition(cmd_buffer, &barrier);
-
-	flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value);
-
-	if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
-		/* When DCC is enabled with mipmaps, some levels might not
-		 * support fast clears and we have to initialize them as "fully
-		 * expanded".
-		 */
-		/* Compute the size of all fast clearable DCC levels. */
-		for (unsigned i = 0; i < image->planes[0].surface.num_dcc_levels; i++) {
-			struct legacy_surf_level *surf_level =
-				&image->planes[0].surface.u.legacy.level[i];
-			unsigned dcc_fast_clear_size =
-				surf_level->dcc_slice_fast_clear_size * image->info.array_size;
-
-			if (!dcc_fast_clear_size)
-				break;
-
-			size = surf_level->dcc_offset + dcc_fast_clear_size;
-		}
-
-		/* Initialize the mipmap levels without DCC. */
-		if (size != image->planes[0].surface.dcc_size) {
-			flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo,
-						       image->offset + image->planes[0].surface.dcc_offset + size,
-						       image->planes[0].surface.dcc_size - size,
-						       0xffffffff);
-		}
-	}
-
-	return flush_bits;
+         size = surf_level->dcc_offset + dcc_fast_clear_size;
+      }
+
+      /* Initialize the mipmap levels without DCC. */
+      if (size != image->planes[0].surface.dcc_size) {
+         flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo,
+                                        image->offset + image->planes[0].surface.dcc_offset + size,
+                                        image->planes[0].surface.dcc_size - size, 0xffffffff);
+      }
+   }
+
+   return flush_bits;
 }
 
 /**
  * Initialize DCC/FMASK/CMASK metadata for a color image.
  */
-static void radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer,
-					   struct radv_image *image,
-					   VkImageLayout src_layout,
-					   bool src_render_loop,
-					   VkImageLayout dst_layout,
-					   bool dst_render_loop,
-					   unsigned src_queue_mask,
-					   unsigned dst_queue_mask,
-					   const VkImageSubresourceRange *range)
-{
-	uint32_t flush_bits = 0;
-
-	/* Transitioning from LAYOUT_UNDEFINED layout not everyone is
-	 * consistent in considering previous rendering work for WAW hazards.
-	 */
-	cmd_buffer->state.flush_bits |=
-		radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
-
-	if (radv_image_has_cmask(image)) {
-		flush_bits |= radv_init_cmask(cmd_buffer, image, range);
-	}
-
-	if (radv_image_has_fmask(image)) {
-		flush_bits |= radv_init_fmask(cmd_buffer, image, range);
-	}
-
-	if (radv_dcc_enabled(image, range->baseMipLevel)) {
-		uint32_t value = 0xffffffffu; /* Fully expanded mode. */
-
-		if (radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout,
-					       dst_render_loop,
-					       dst_queue_mask)) {
-			value = 0u;
-		}
-
-		flush_bits |= radv_init_dcc(cmd_buffer, image, range, value);
-	}
-
-	if (radv_image_has_cmask(image) ||
-	    radv_dcc_enabled(image, range->baseMipLevel)) {
-		radv_update_fce_metadata(cmd_buffer, image, range, false);
-
-		uint32_t color_values[2] = {0};
-		radv_set_color_clear_metadata(cmd_buffer, image, range,
-					      color_values);
-	}
-
-	cmd_buffer->state.flush_bits |= flush_bits;
-}
-
-static void radv_retile_transition(struct radv_cmd_buffer *cmd_buffer,
-				   struct radv_image *image,
-				   VkImageLayout src_layout,
-				   VkImageLayout dst_layout,
-				   unsigned dst_queue_mask)
-{
-	if (src_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR &&
-	    (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ||
-	     (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN))))
-		radv_retile_dcc(cmd_buffer, image);
+static void
+radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                               VkImageLayout src_layout, bool src_render_loop,
+                               VkImageLayout dst_layout, bool dst_render_loop,
+                               unsigned src_queue_mask, unsigned dst_queue_mask,
+                               const VkImageSubresourceRange *range)
+{
+   uint32_t flush_bits = 0;
+
+   /* Transitioning from LAYOUT_UNDEFINED layout not everyone is
+    * consistent in considering previous rendering work for WAW hazards.
+    */
+   cmd_buffer->state.flush_bits |=
+      radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
+
+   if (radv_image_has_cmask(image)) {
+      flush_bits |= radv_init_cmask(cmd_buffer, image, range);
+   }
+
+   if (radv_image_has_fmask(image)) {
+      flush_bits |= radv_init_fmask(cmd_buffer, image, range);
+   }
+
+   if (radv_dcc_enabled(image, range->baseMipLevel)) {
+      uint32_t value = 0xffffffffu; /* Fully expanded mode. */
+
+      if (radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout, dst_render_loop,
+                                     dst_queue_mask)) {
+         value = 0u;
+      }
+
+      flush_bits |= radv_init_dcc(cmd_buffer, image, range, value);
+   }
+
+   if (radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel)) {
+      radv_update_fce_metadata(cmd_buffer, image, range, false);
+
+      uint32_t color_values[2] = {0};
+      radv_set_color_clear_metadata(cmd_buffer, image, range, color_values);
+   }
+
+   cmd_buffer->state.flush_bits |= flush_bits;
+}
+
+static void
+radv_retile_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                       VkImageLayout src_layout, VkImageLayout dst_layout, unsigned dst_queue_mask)
+{
+   if (src_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR &&
+       (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ||
+        (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN))))
+      radv_retile_dcc(cmd_buffer, image);
 }
 /**
  * Handle color image transitions for DCC/FMASK/CMASK.
  */
-static void radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer,
-					       struct radv_image *image,
-					       VkImageLayout src_layout,
-					       bool src_render_loop,
-					       VkImageLayout dst_layout,
-					       bool dst_render_loop,
-					       unsigned src_queue_mask,
-					       unsigned dst_queue_mask,
-					       const VkImageSubresourceRange *range)
-{
-	bool dcc_decompressed = false, fast_clear_flushed = false;
-
-	if (!radv_image_has_cmask(image) &&
-	    !radv_image_has_fmask(image) &&
-	    !radv_dcc_enabled(image, range->baseMipLevel))
-		return;
-
-	if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
-		radv_init_color_image_metadata(cmd_buffer, image,
-					       src_layout, src_render_loop,
-					       dst_layout, dst_render_loop,
-					       src_queue_mask, dst_queue_mask,
-					       range);
-
-		if (image->retile_map)
-			radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
-		return;
-	}
-
-	if (radv_dcc_enabled(image, range->baseMipLevel)) {
-		if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
-			cmd_buffer->state.flush_bits |=
-				radv_init_dcc(cmd_buffer, image, range, 0xffffffffu);
-		} else if (radv_layout_dcc_compressed(cmd_buffer->device, image, src_layout, src_render_loop, src_queue_mask) &&
-		           !radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout, dst_render_loop, dst_queue_mask)) {
-			radv_decompress_dcc(cmd_buffer, image, range);
-			dcc_decompressed = true;
-		} else if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout,
-		                                      src_render_loop, src_queue_mask) &&
-		           !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout,
-		                                       dst_render_loop, dst_queue_mask)) {
-			radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
-			fast_clear_flushed = true;
-		}
-
-		if (image->retile_map)
-			radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
-	} else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
-		if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout,
-		                               src_render_loop, src_queue_mask) &&
-		    !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout,
-		                                dst_render_loop, dst_queue_mask)) {
-			radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
-			fast_clear_flushed = true;
-		}
-	}
-
-	/* MSAA color decompress. */
-	if (radv_image_has_fmask(image) &&
-	    (image->usage & (VK_IMAGE_USAGE_STORAGE_BIT |
-			     VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
-	    radv_layout_fmask_compressed(cmd_buffer->device, image,
-					 src_layout, src_queue_mask) &&
-	    !radv_layout_fmask_compressed(cmd_buffer->device, image,
-					  dst_layout, dst_queue_mask)) {
-		if (radv_dcc_enabled(image, range->baseMipLevel) &&
-		    !radv_image_use_dcc_image_stores(cmd_buffer->device, image) &&
-		    !dcc_decompressed) {
-			/* A DCC decompress is required before expanding FMASK
-			 * when DCC stores aren't supported to avoid being in
-			 * a state where DCC is compressed and the main
-			 * surface is uncompressed.
-			 */
-			radv_decompress_dcc(cmd_buffer, image, range);
-		} else if (!fast_clear_flushed) {
-			/* A FMASK decompress is required before expanding
-			 * FMASK.
-			 */
-			radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
-		}
-
-		struct radv_barrier_data barrier = {0};
-		barrier.layout_transitions.fmask_color_expand = 1;
-		radv_describe_layout_transition(cmd_buffer, &barrier);
-
-		radv_expand_fmask_image_inplace(cmd_buffer, image, range);
-	}
+static void
+radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                                   VkImageLayout src_layout, bool src_render_loop,
+                                   VkImageLayout dst_layout, bool dst_render_loop,
+                                   unsigned src_queue_mask, unsigned dst_queue_mask,
+                                   const VkImageSubresourceRange *range)
+{
+   bool dcc_decompressed = false, fast_clear_flushed = false;
+
+   if (!radv_image_has_cmask(image) && !radv_image_has_fmask(image) &&
+       !radv_dcc_enabled(image, range->baseMipLevel))
+      return;
+
+   if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+      radv_init_color_image_metadata(cmd_buffer, image, src_layout, src_render_loop, dst_layout,
+                                     dst_render_loop, src_queue_mask, dst_queue_mask, range);
+
+      if (image->retile_map)
+         radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
+      return;
+   }
+
+   if (radv_dcc_enabled(image, range->baseMipLevel)) {
+      if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
+         cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, range, 0xffffffffu);
+      } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, src_layout, src_render_loop,
+                                            src_queue_mask) &&
+                 !radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout, dst_render_loop,
+                                             dst_queue_mask)) {
+         radv_decompress_dcc(cmd_buffer, image, range);
+         dcc_decompressed = true;
+      } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout, src_render_loop,
+                                            src_queue_mask) &&
+                 !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout, dst_render_loop,
+                                             dst_queue_mask)) {
+         radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
+         fast_clear_flushed = true;
+      }
+
+      if (image->retile_map)
+         radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
+   } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
+      if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout, src_render_loop,
+                                     src_queue_mask) &&
+          !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout, dst_render_loop,
+                                      dst_queue_mask)) {
+         radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
+         fast_clear_flushed = true;
+      }
+   }
+
+   /* MSAA color decompress. */
+   if (radv_image_has_fmask(image) &&
+       (image->usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
+       radv_layout_fmask_compressed(cmd_buffer->device, image, src_layout, src_queue_mask) &&
+       !radv_layout_fmask_compressed(cmd_buffer->device, image, dst_layout, dst_queue_mask)) {
+      if (radv_dcc_enabled(image, range->baseMipLevel) &&
+          !radv_image_use_dcc_image_stores(cmd_buffer->device, image) && !dcc_decompressed) {
+         /* A DCC decompress is required before expanding FMASK
+          * when DCC stores aren't supported to avoid being in
+          * a state where DCC is compressed and the main
+          * surface is uncompressed.
+          */
+         radv_decompress_dcc(cmd_buffer, image, range);
+      } else if (!fast_clear_flushed) {
+         /* A FMASK decompress is required before expanding
+          * FMASK.
+          */
+         radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
+      }
+
+      struct radv_barrier_data barrier = {0};
+      barrier.layout_transitions.fmask_color_expand = 1;
+      radv_describe_layout_transition(cmd_buffer, &barrier);
+
+      radv_expand_fmask_image_inplace(cmd_buffer, image, range);
+   }
 }
 
-static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
-					 struct radv_image *image,
-					 VkImageLayout src_layout,
-					 bool src_render_loop,
-					 VkImageLayout dst_layout,
-					 bool dst_render_loop,
-					 uint32_t src_family,
-					 uint32_t dst_family,
-					 const VkImageSubresourceRange *range,
-					 struct radv_sample_locations_state *sample_locs)
-{
-	if (image->exclusive && src_family != dst_family) {
-		/* This is an acquire or a release operation and there will be
-		 * a corresponding release/acquire. Do the transition in the
-		 * most flexible queue. */
-
-		assert(src_family == cmd_buffer->queue_family_index ||
-		       dst_family == cmd_buffer->queue_family_index);
-
-		if (src_family == VK_QUEUE_FAMILY_EXTERNAL ||
-		    src_family == VK_QUEUE_FAMILY_FOREIGN_EXT)
-			return;
-
-		if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER)
-			return;
-
-		if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
-		    (src_family == RADV_QUEUE_GENERAL ||
-		     dst_family == RADV_QUEUE_GENERAL))
-			return;
-	}
-
-	if (src_layout == dst_layout && src_render_loop == dst_render_loop)
-		return;
-
-	unsigned src_queue_mask =
-		radv_image_queue_family_mask(image, src_family,
-					     cmd_buffer->queue_family_index);
-	unsigned dst_queue_mask =
-		radv_image_queue_family_mask(image, dst_family,
-					     cmd_buffer->queue_family_index);
-
-	if (vk_format_has_depth(image->vk_format)) {
-		radv_handle_depth_image_transition(cmd_buffer, image,
-						   src_layout, src_render_loop,
-						   dst_layout, dst_render_loop,
-						   src_queue_mask, dst_queue_mask,
-						   range, sample_locs);
-	} else {
-		radv_handle_color_image_transition(cmd_buffer, image,
-						   src_layout, src_render_loop,
-						   dst_layout, dst_render_loop,
-						   src_queue_mask, dst_queue_mask,
-						   range);
-	}
+static void
+radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                             VkImageLayout src_layout, bool src_render_loop,
+                             VkImageLayout dst_layout, bool dst_render_loop, uint32_t src_family,
+                             uint32_t dst_family, const VkImageSubresourceRange *range,
+                             struct radv_sample_locations_state *sample_locs)
+{
+   if (image->exclusive && src_family != dst_family) {
+      /* This is an acquire or a release operation and there will be
+       * a corresponding release/acquire. Do the transition in the
+       * most flexible queue. */
+
+      assert(src_family == cmd_buffer->queue_family_index ||
+             dst_family == cmd_buffer->queue_family_index);
+
+      if (src_family == VK_QUEUE_FAMILY_EXTERNAL || src_family == VK_QUEUE_FAMILY_FOREIGN_EXT)
+         return;
+
+      if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER)
+         return;
+
+      if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
+          (src_family == RADV_QUEUE_GENERAL || dst_family == RADV_QUEUE_GENERAL))
+         return;
+   }
+
+   if (src_layout == dst_layout && src_render_loop == dst_render_loop)
+      return;
+
+   unsigned src_queue_mask =
+      radv_image_queue_family_mask(image, src_family, cmd_buffer->queue_family_index);
+   unsigned dst_queue_mask =
+      radv_image_queue_family_mask(image, dst_family, cmd_buffer->queue_family_index);
+
+   if (vk_format_has_depth(image->vk_format)) {
+      radv_handle_depth_image_transition(cmd_buffer, image, src_layout, src_render_loop, dst_layout,
+                                         dst_render_loop, src_queue_mask, dst_queue_mask, range,
+                                         sample_locs);
+   } else {
+      radv_handle_color_image_transition(cmd_buffer, image, src_layout, src_render_loop, dst_layout,
+                                         dst_render_loop, src_queue_mask, dst_queue_mask, range);
+   }
 }
 
 struct radv_barrier_info {
-	enum rgp_barrier_reason reason;
-	uint32_t eventCount;
-	const VkEvent *pEvents;
-	VkPipelineStageFlags srcStageMask;
-	VkPipelineStageFlags dstStageMask;
+   enum rgp_barrier_reason reason;
+   uint32_t eventCount;
+   const VkEvent *pEvents;
+   VkPipelineStageFlags srcStageMask;
+   VkPipelineStageFlags dstStageMask;
 };
 
 static void
-radv_barrier(struct radv_cmd_buffer *cmd_buffer,
-	     uint32_t memoryBarrierCount,
-	     const VkMemoryBarrier *pMemoryBarriers,
-	     uint32_t bufferMemoryBarrierCount,
-	     const VkBufferMemoryBarrier *pBufferMemoryBarriers,
-	     uint32_t imageMemoryBarrierCount,
-	     const VkImageMemoryBarrier *pImageMemoryBarriers,
-	     const struct radv_barrier_info *info)
-{
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	enum radv_cmd_flush_bits src_flush_bits = 0;
-	enum radv_cmd_flush_bits dst_flush_bits = 0;
-
-	radv_describe_barrier_start(cmd_buffer, info->reason);
-
-	for (unsigned i = 0; i < info->eventCount; ++i) {
-		RADV_FROM_HANDLE(radv_event, event, info->pEvents[i]);
-		uint64_t va = radv_buffer_get_va(event->bo);
-
-		radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
-
-		ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
-
-		radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff);
-		assert(cmd_buffer->cs->cdw <= cdw_max);
-	}
-
-	for (uint32_t i = 0; i < memoryBarrierCount; i++) {
-		src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask,
-							NULL);
-		dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask,
-		                                        NULL);
-	}
-
-	for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
-		src_flush_bits |= radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask,
-							NULL);
-		dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask,
-		                                        NULL);
-	}
-
-	for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
-		RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
-
-		src_flush_bits |= radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask,
-							image);
-		dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask,
-		                                        image);
-	}
-
-	/* The Vulkan spec 1.1.98 says:
-	 *
-	 * "An execution dependency with only
-	 *  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask
-	 *  will only prevent that stage from executing in subsequently
-	 *  submitted commands. As this stage does not perform any actual
-	 *  execution, this is not observable - in effect, it does not delay
-	 *  processing of subsequent commands. Similarly an execution dependency
-	 *  with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask
-	 *  will effectively not wait for any prior commands to complete."
-	 */
-	if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
-		radv_stage_flush(cmd_buffer, info->srcStageMask);
-	cmd_buffer->state.flush_bits |= src_flush_bits;
-
-	for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
-		RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
-
-		const struct VkSampleLocationsInfoEXT *sample_locs_info =
-			vk_find_struct_const(pImageMemoryBarriers[i].pNext,
-					     SAMPLE_LOCATIONS_INFO_EXT);
-		struct radv_sample_locations_state sample_locations = {0};
-
-		if (sample_locs_info) {
-			assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
-			sample_locations.per_pixel = sample_locs_info->sampleLocationsPerPixel;
-			sample_locations.grid_size = sample_locs_info->sampleLocationGridSize;
-			sample_locations.count = sample_locs_info->sampleLocationsCount;
-			typed_memcpy(&sample_locations.locations[0],
-				     sample_locs_info->pSampleLocations,
-				     sample_locs_info->sampleLocationsCount);
-		}
-
-		radv_handle_image_transition(cmd_buffer, image,
-					     pImageMemoryBarriers[i].oldLayout,
-					     false, /* Outside of a renderpass we are never in a renderloop */
-					     pImageMemoryBarriers[i].newLayout,
-					     false, /* Outside of a renderpass we are never in a renderloop */
-					     pImageMemoryBarriers[i].srcQueueFamilyIndex,
-					     pImageMemoryBarriers[i].dstQueueFamilyIndex,
-					     &pImageMemoryBarriers[i].subresourceRange,
-					     sample_locs_info ? &sample_locations : NULL);
-	}
-
-	/* Make sure CP DMA is idle because the driver might have performed a
-	 * DMA operation for copying or filling buffers/images.
-	 */
-	if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT |
-				  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
-		si_cp_dma_wait_for_idle(cmd_buffer);
-
-	cmd_buffer->state.flush_bits |= dst_flush_bits;
-
-	radv_describe_barrier_end(cmd_buffer);
-}
-
-void radv_CmdPipelineBarrier(
-	VkCommandBuffer                             commandBuffer,
-	VkPipelineStageFlags                        srcStageMask,
-	VkPipelineStageFlags                        destStageMask,
-	VkBool32                                    byRegion,
-	uint32_t                                    memoryBarrierCount,
-	const VkMemoryBarrier*                      pMemoryBarriers,
-	uint32_t                                    bufferMemoryBarrierCount,
-	const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
-	uint32_t                                    imageMemoryBarrierCount,
-	const VkImageMemoryBarrier*                 pImageMemoryBarriers)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_barrier_info info;
-
-	info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER;
-	info.eventCount = 0;
-	info.pEvents = NULL;
-	info.srcStageMask = srcStageMask;
-	info.dstStageMask = destStageMask;
-
-	radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
-		     bufferMemoryBarrierCount, pBufferMemoryBarriers,
-		     imageMemoryBarrierCount, pImageMemoryBarriers, &info);
-}
-
-
-static void write_event(struct radv_cmd_buffer *cmd_buffer,
-			struct radv_event *event,
-			VkPipelineStageFlags stageMask,
-			unsigned value)
-{
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint64_t va = radv_buffer_get_va(event->bo);
-
-	si_emit_cache_flush(cmd_buffer);
-
-	radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
-
-	ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28);
-
-	/* Flags that only require a top-of-pipe event. */
-	VkPipelineStageFlags top_of_pipe_flags =
-		VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
-
-	/* Flags that only require a post-index-fetch event. */
-	VkPipelineStageFlags post_index_fetch_flags =
-		top_of_pipe_flags |
-		VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
-		VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
-
-	/* Flags that only require signaling post PS. */
-	VkPipelineStageFlags post_ps_flags =
-		post_index_fetch_flags |
-		VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
-		VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
-		VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
-		VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
-		VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT |
-		VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR |
-		VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
-		VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
-
-	/* Flags that only require signaling post CS. */
-	VkPipelineStageFlags post_cs_flags =
-		VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
-
-	/* Make sure CP DMA is idle because the driver might have performed a
-	 * DMA operation for copying or filling buffers/images.
-	 */
-	if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT |
-			 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
-		si_cp_dma_wait_for_idle(cmd_buffer);
-
-	if (!(stageMask & ~top_of_pipe_flags)) {
-		/* Just need to sync the PFP engine. */
-		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
-				S_370_WR_CONFIRM(1) |
-				S_370_ENGINE_SEL(V_370_PFP));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, value);
-	} else if (!(stageMask & ~post_index_fetch_flags)) {
-		/* Sync ME because PFP reads index and indirect buffers. */
-		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
-				S_370_WR_CONFIRM(1) |
-				S_370_ENGINE_SEL(V_370_ME));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, value);
-	} else {
-		unsigned event_type;
-
-		if (!(stageMask & ~post_ps_flags)) {
-			/* Sync previous fragment shaders. */
-			event_type = V_028A90_PS_DONE;
-		} else if (!(stageMask & ~post_cs_flags)) {
-			/* Sync previous compute shaders. */
-			event_type = V_028A90_CS_DONE;
-		} else {
-			/* Otherwise, sync all prior GPU work. */
-			event_type = V_028A90_BOTTOM_OF_PIPE_TS;
-		}
-
-		si_cs_emit_write_event_eop(cs,
-					   cmd_buffer->device->physical_device->rad_info.chip_class,
-					   radv_cmd_buffer_uses_mec(cmd_buffer),
-					   event_type, 0, EOP_DST_SEL_MEM,
-					   EOP_DATA_SEL_VALUE_32BIT, va, value,
-					   cmd_buffer->gfx9_eop_bug_va);
-	}
-
-	assert(cmd_buffer->cs->cdw <= cdw_max);
-}
-
-void radv_CmdSetEvent(VkCommandBuffer commandBuffer,
-		      VkEvent _event,
-		      VkPipelineStageFlags stageMask)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_event, event, _event);
-
-	write_event(cmd_buffer, event, stageMask, 1);
-}
-
-void radv_CmdResetEvent(VkCommandBuffer commandBuffer,
-			VkEvent _event,
-			VkPipelineStageFlags stageMask)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_event, event, _event);
-
-	write_event(cmd_buffer, event, stageMask, 0);
-}
-
-void radv_CmdWaitEvents(VkCommandBuffer commandBuffer,
-			uint32_t eventCount,
-			const VkEvent* pEvents,
-			VkPipelineStageFlags srcStageMask,
-			VkPipelineStageFlags dstStageMask,
-			uint32_t memoryBarrierCount,
-			const VkMemoryBarrier* pMemoryBarriers,
-			uint32_t bufferMemoryBarrierCount,
-			const VkBufferMemoryBarrier* pBufferMemoryBarriers,
-			uint32_t imageMemoryBarrierCount,
-			const VkImageMemoryBarrier* pImageMemoryBarriers)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_barrier_info info;
-
-	info.reason = RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS;
-	info.eventCount = eventCount;
-	info.pEvents = pEvents;
-	info.srcStageMask = 0;
-
-	radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
-		     bufferMemoryBarrierCount, pBufferMemoryBarriers,
-		     imageMemoryBarrierCount, pImageMemoryBarriers, &info);
-}
-
-
-void radv_CmdSetDeviceMask(VkCommandBuffer commandBuffer,
-                           uint32_t deviceMask)
+radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t memoryBarrierCount,
+             const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount,
+             const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount,
+             const VkImageMemoryBarrier *pImageMemoryBarriers, const struct radv_barrier_info *info)
+{
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   enum radv_cmd_flush_bits src_flush_bits = 0;
+   enum radv_cmd_flush_bits dst_flush_bits = 0;
+
+   radv_describe_barrier_start(cmd_buffer, info->reason);
+
+   for (unsigned i = 0; i < info->eventCount; ++i) {
+      RADV_FROM_HANDLE(radv_event, event, info->pEvents[i]);
+      uint64_t va = radv_buffer_get_va(event->bo);
+
+      radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
+
+      ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
+      radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff);
+      assert(cmd_buffer->cs->cdw <= cdw_max);
+   }
+
+   for (uint32_t i = 0; i < memoryBarrierCount; i++) {
+      src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask, NULL);
+      dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask, NULL);
+   }
+
+   for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
+      src_flush_bits |=
+         radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask, NULL);
+      dst_flush_bits |=
+         radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask, NULL);
+   }
+
+   for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
+      RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
+
+      src_flush_bits |=
+         radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask, image);
+      dst_flush_bits |=
+         radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask, image);
+   }
+
+   /* The Vulkan spec 1.1.98 says:
+    *
+    * "An execution dependency with only
+    *  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask
+    *  will only prevent that stage from executing in subsequently
+    *  submitted commands. As this stage does not perform any actual
+    *  execution, this is not observable - in effect, it does not delay
+    *  processing of subsequent commands. Similarly an execution dependency
+    *  with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask
+    *  will effectively not wait for any prior commands to complete."
+    */
+   if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+      radv_stage_flush(cmd_buffer, info->srcStageMask);
+   cmd_buffer->state.flush_bits |= src_flush_bits;
+
+   for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
+      RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
+
+      const struct VkSampleLocationsInfoEXT *sample_locs_info =
+         vk_find_struct_const(pImageMemoryBarriers[i].pNext, SAMPLE_LOCATIONS_INFO_EXT);
+      struct radv_sample_locations_state sample_locations = {0};
+
+      if (sample_locs_info) {
+         assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
+         sample_locations.per_pixel = sample_locs_info->sampleLocationsPerPixel;
+         sample_locations.grid_size = sample_locs_info->sampleLocationGridSize;
+         sample_locations.count = sample_locs_info->sampleLocationsCount;
+         typed_memcpy(&sample_locations.locations[0], sample_locs_info->pSampleLocations,
+                      sample_locs_info->sampleLocationsCount);
+      }
+
+      radv_handle_image_transition(
+         cmd_buffer, image, pImageMemoryBarriers[i].oldLayout,
+         false, /* Outside of a renderpass we are never in a renderloop */
+         pImageMemoryBarriers[i].newLayout,
+         false, /* Outside of a renderpass we are never in a renderloop */
+         pImageMemoryBarriers[i].srcQueueFamilyIndex, pImageMemoryBarriers[i].dstQueueFamilyIndex,
+         &pImageMemoryBarriers[i].subresourceRange, sample_locs_info ? &sample_locations : NULL);
+   }
+
+   /* Make sure CP DMA is idle because the driver might have performed a
+    * DMA operation for copying or filling buffers/images.
+    */
+   if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
+      si_cp_dma_wait_for_idle(cmd_buffer);
+
+   cmd_buffer->state.flush_bits |= dst_flush_bits;
+
+   radv_describe_barrier_end(cmd_buffer);
+}
+
+void
+radv_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask,
+                        VkPipelineStageFlags destStageMask, VkBool32 byRegion,
+                        uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
+                        uint32_t bufferMemoryBarrierCount,
+                        const VkBufferMemoryBarrier *pBufferMemoryBarriers,
+                        uint32_t imageMemoryBarrierCount,
+                        const VkImageMemoryBarrier *pImageMemoryBarriers)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_barrier_info info;
+
+   info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER;
+   info.eventCount = 0;
+   info.pEvents = NULL;
+   info.srcStageMask = srcStageMask;
+   info.dstStageMask = destStageMask;
+
+   radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
+                pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers, &info);
+}
+
+static void
+write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event,
+            VkPipelineStageFlags stageMask, unsigned value)
+{
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t va = radv_buffer_get_va(event->bo);
+
+   si_emit_cache_flush(cmd_buffer);
+
+   radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
+
+   ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28);
+
+   /* Flags that only require a top-of-pipe event. */
+   VkPipelineStageFlags top_of_pipe_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+
+   /* Flags that only require a post-index-fetch event. */
+   VkPipelineStageFlags post_index_fetch_flags =
+      top_of_pipe_flags | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
+
+   /* Flags that only require signaling post PS. */
+   VkPipelineStageFlags post_ps_flags =
+      post_index_fetch_flags | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
+      VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
+      VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
+      VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT |
+      VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR |
+      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+
+   /* Flags that only require signaling post CS. */
+   VkPipelineStageFlags post_cs_flags = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+
+   /* Make sure CP DMA is idle because the driver might have performed a
+    * DMA operation for copying or filling buffers/images.
+    */
+   if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
+      si_cp_dma_wait_for_idle(cmd_buffer);
+
+   if (!(stageMask & ~top_of_pipe_flags)) {
+      /* Just need to sync the PFP engine. */
+      radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+      radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      radeon_emit(cs, value);
+   } else if (!(stageMask & ~post_index_fetch_flags)) {
+      /* Sync ME because PFP reads index and indirect buffers. */
+      radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+      radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      radeon_emit(cs, value);
+   } else {
+      unsigned event_type;
+
+      if (!(stageMask & ~post_ps_flags)) {
+         /* Sync previous fragment shaders. */
+         event_type = V_028A90_PS_DONE;
+      } else if (!(stageMask & ~post_cs_flags)) {
+         /* Sync previous compute shaders. */
+         event_type = V_028A90_CS_DONE;
+      } else {
+         /* Otherwise, sync all prior GPU work. */
+         event_type = V_028A90_BOTTOM_OF_PIPE_TS;
+      }
+
+      si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+                                 radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0,
+                                 EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, value,
+                                 cmd_buffer->gfx9_eop_bug_va);
+   }
+
+   assert(cmd_buffer->cs->cdw <= cdw_max);
+}
+
+void
+radv_CmdSetEvent(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags stageMask)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_event, event, _event);
+
+   write_event(cmd_buffer, event, stageMask, 1);
+}
+
+void
+radv_CmdResetEvent(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags stageMask)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_event, event, _event);
+
+   write_event(cmd_buffer, event, stageMask, 0);
+}
+
+void
+radv_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
+                   VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,
+                   uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
+                   uint32_t bufferMemoryBarrierCount,
+                   const VkBufferMemoryBarrier *pBufferMemoryBarriers,
+                   uint32_t imageMemoryBarrierCount,
+                   const VkImageMemoryBarrier *pImageMemoryBarriers)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_barrier_info info;
+
+   info.reason = RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS;
+   info.eventCount = eventCount;
+   info.pEvents = pEvents;
+   info.srcStageMask = 0;
+
+   radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
+                pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers, &info);
+}
+
+void
+radv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
 {
    /* No-op */
 }
 
 /* VK_EXT_conditional_rendering */
-void radv_CmdBeginConditionalRenderingEXT(
-	VkCommandBuffer                             commandBuffer,
-	const VkConditionalRenderingBeginInfoEXT*   pConditionalRenderingBegin)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer);
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	unsigned pred_op = PREDICATION_OP_BOOL32;
-	bool draw_visible = true;
-	uint64_t va;
-
-	va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset;
-
-	/* By default, if the 32-bit value at offset in buffer memory is zero,
-	 * then the rendering commands are discarded, otherwise they are
-	 * executed as normal. If the inverted flag is set, all commands are
-	 * discarded if the value is non zero.
-	 */
-	if (pConditionalRenderingBegin->flags &
-	    VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) {
-		draw_visible = false;
-	}
-
-	si_emit_cache_flush(cmd_buffer);
-
-	if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL &&
-	    !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) {
-		uint64_t pred_value = 0, pred_va;
-		unsigned pred_offset;
-
-		/* From the Vulkan spec 1.1.107:
-		 *
-		 * "If the 32-bit value at offset in buffer memory is zero,
-		 *  then the rendering commands are discarded, otherwise they
-		 *  are executed as normal. If the value of the predicate in
-		 *  buffer memory changes while conditional rendering is
-		 *  active, the rendering commands may be discarded in an
-		 *  implementation-dependent way. Some implementations may
-		 *  latch the value of the predicate upon beginning conditional
-		 *  rendering while others may read it before every rendering
-		 *  command."
-		 *
-		 * But, the AMD hardware treats the predicate as a 64-bit
-		 * value which means we need a workaround in the driver.
-		 * Luckily, it's not required to support if the value changes
-		 * when predication is active.
-		 *
-		 * The workaround is as follows:
-		 * 1) allocate a 64-value in the upload BO and initialize it
-		 *    to 0
-		 * 2) copy the 32-bit predicate value to the upload BO
-		 * 3) use the new allocated VA address for predication
-		 *
-		 * Based on the conditionalrender demo, it's faster to do the
-		 * COPY_DATA in ME  (+ sync PFP) instead of PFP.
-		 */
-		radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset);
-
-		pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
-
-		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-				COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-				COPY_DATA_WR_CONFIRM);
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, pred_va);
-		radeon_emit(cs, pred_va >> 32);
-
-		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
-		radeon_emit(cs, 0);
-
-		va = pred_va;
-		pred_op = PREDICATION_OP_BOOL64;
-	}
-
-	/* Enable predication for this command buffer. */
-	si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
-	cmd_buffer->state.predicating = true;
-
-	/* Store conditional rendering user info. */
-	cmd_buffer->state.predication_type = draw_visible;
-	cmd_buffer->state.predication_op = pred_op;
-	cmd_buffer->state.predication_va = va;
-}
-
-void radv_CmdEndConditionalRenderingEXT(
-	VkCommandBuffer                             commandBuffer)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-
-	/* Disable predication for this command buffer. */
-	si_emit_set_predication_state(cmd_buffer, false, 0, 0);
-	cmd_buffer->state.predicating = false;
-
-	/* Reset conditional rendering user info. */
-	cmd_buffer->state.predication_type = -1;
-	cmd_buffer->state.predication_op = 0;
-	cmd_buffer->state.predication_va = 0;
+void
+radv_CmdBeginConditionalRenderingEXT(
+   VkCommandBuffer commandBuffer,
+   const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer);
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   unsigned pred_op = PREDICATION_OP_BOOL32;
+   bool draw_visible = true;
+   uint64_t va;
+
+   va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset;
+
+   /* By default, if the 32-bit value at offset in buffer memory is zero,
+    * then the rendering commands are discarded, otherwise they are
+    * executed as normal. If the inverted flag is set, all commands are
+    * discarded if the value is non zero.
+    */
+   if (pConditionalRenderingBegin->flags & VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) {
+      draw_visible = false;
+   }
+
+   si_emit_cache_flush(cmd_buffer);
+
+   if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL &&
+       !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) {
+      uint64_t pred_value = 0, pred_va;
+      unsigned pred_offset;
+
+      /* From the Vulkan spec 1.1.107:
+       *
+       * "If the 32-bit value at offset in buffer memory is zero,
+       *  then the rendering commands are discarded, otherwise they
+       *  are executed as normal. If the value of the predicate in
+       *  buffer memory changes while conditional rendering is
+       *  active, the rendering commands may be discarded in an
+       *  implementation-dependent way. Some implementations may
+       *  latch the value of the predicate upon beginning conditional
+       *  rendering while others may read it before every rendering
+       *  command."
+       *
+       * But, the AMD hardware treats the predicate as a 64-bit
+       * value which means we need a workaround in the driver.
+       * Luckily, it's not required to support if the value changes
+       * when predication is active.
+       *
+       * The workaround is as follows:
+       * 1) allocate a 64-value in the upload BO and initialize it
+       *    to 0
+       * 2) copy the 32-bit predicate value to the upload BO
+       * 3) use the new allocated VA address for predication
+       *
+       * Based on the conditionalrender demo, it's faster to do the
+       * COPY_DATA in ME  (+ sync PFP) instead of PFP.
+       */
+      radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset);
+
+      pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
+
+      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+                         COPY_DATA_WR_CONFIRM);
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      radeon_emit(cs, pred_va);
+      radeon_emit(cs, pred_va >> 32);
+
+      radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+      radeon_emit(cs, 0);
+
+      va = pred_va;
+      pred_op = PREDICATION_OP_BOOL64;
+   }
+
+   /* Enable predication for this command buffer. */
+   si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
+   cmd_buffer->state.predicating = true;
+
+   /* Store conditional rendering user info. */
+   cmd_buffer->state.predication_type = draw_visible;
+   cmd_buffer->state.predication_op = pred_op;
+   cmd_buffer->state.predication_va = va;
+}
+
+void
+radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   /* Disable predication for this command buffer. */
+   si_emit_set_predication_state(cmd_buffer, false, 0, 0);
+   cmd_buffer->state.predicating = false;
+
+   /* Reset conditional rendering user info. */
+   cmd_buffer->state.predication_type = -1;
+   cmd_buffer->state.predication_op = 0;
+   cmd_buffer->state.predication_va = 0;
 }
 
 /* VK_EXT_transform_feedback */
-void radv_CmdBindTransformFeedbackBuffersEXT(
-    VkCommandBuffer                             commandBuffer,
-    uint32_t                                    firstBinding,
-    uint32_t                                    bindingCount,
-    const VkBuffer*                             pBuffers,
-    const VkDeviceSize*                         pOffsets,
-    const VkDeviceSize*                         pSizes)
+void
+radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t firstBinding,
+                                        uint32_t bindingCount, const VkBuffer *pBuffers,
+                                        const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
-	uint8_t enabled_mask = 0;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+   uint8_t enabled_mask = 0;
 
-	assert(firstBinding + bindingCount <= MAX_SO_BUFFERS);
-	for (uint32_t i = 0; i < bindingCount; i++) {
-		uint32_t idx = firstBinding + i;
+   assert(firstBinding + bindingCount <= MAX_SO_BUFFERS);
+   for (uint32_t i = 0; i < bindingCount; i++) {
+      uint32_t idx = firstBinding + i;
 
-		sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]);
-		sb[idx].offset = pOffsets[i];
+      sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]);
+      sb[idx].offset = pOffsets[i];
 
-		if (!pSizes || pSizes[i] == VK_WHOLE_SIZE) {
-			sb[idx].size = sb[idx].buffer->size - sb[idx].offset;
-		} else {
-			sb[idx].size = pSizes[i];
-		}
+      if (!pSizes || pSizes[i] == VK_WHOLE_SIZE) {
+         sb[idx].size = sb[idx].buffer->size - sb[idx].offset;
+      } else {
+         sb[idx].size = pSizes[i];
+      }
 
-		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
-				   sb[idx].buffer->bo);
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, sb[idx].buffer->bo);
 
-		enabled_mask |= 1 << idx;
-	}
+      enabled_mask |= 1 << idx;
+   }
 
-	cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
+   cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
 
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
+   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
 }
 
 static void
 radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
-	radeon_emit(cs,
-		    S_028B94_STREAMOUT_0_EN(so->streamout_enabled) |
-		    S_028B94_RAST_STREAM(0) |
-		    S_028B94_STREAMOUT_1_EN(so->streamout_enabled) |
-		    S_028B94_STREAMOUT_2_EN(so->streamout_enabled) |
-		    S_028B94_STREAMOUT_3_EN(so->streamout_enabled));
-	radeon_emit(cs, so->hw_enabled_mask &
-			so->enabled_stream_buffers_mask);
+   radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
+   radeon_emit(cs, S_028B94_STREAMOUT_0_EN(so->streamout_enabled) | S_028B94_RAST_STREAM(0) |
+                      S_028B94_STREAMOUT_1_EN(so->streamout_enabled) |
+                      S_028B94_STREAMOUT_2_EN(so->streamout_enabled) |
+                      S_028B94_STREAMOUT_3_EN(so->streamout_enabled));
+   radeon_emit(cs, so->hw_enabled_mask & so->enabled_stream_buffers_mask);
 
-	cmd_buffer->state.context_roll_without_scissor_emitted = true;
+   cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 static void
 radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
 {
-	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
-	bool old_streamout_enabled = so->streamout_enabled;
-	uint32_t old_hw_enabled_mask = so->hw_enabled_mask;
+   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+   bool old_streamout_enabled = so->streamout_enabled;
+   uint32_t old_hw_enabled_mask = so->hw_enabled_mask;
 
-	so->streamout_enabled = enable;
+   so->streamout_enabled = enable;
 
-	so->hw_enabled_mask = so->enabled_mask |
-			      (so->enabled_mask << 4) |
-			      (so->enabled_mask << 8) |
-			      (so->enabled_mask << 12);
+   so->hw_enabled_mask = so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) |
+                         (so->enabled_mask << 12);
 
-	if (!cmd_buffer->device->physical_device->use_ngg_streamout &&
-	    ((old_streamout_enabled != so->streamout_enabled) ||
-	     (old_hw_enabled_mask != so->hw_enabled_mask)))
-		radv_emit_streamout_enable(cmd_buffer);
+   if (!cmd_buffer->device->physical_device->use_ngg_streamout &&
+       ((old_streamout_enabled != so->streamout_enabled) ||
+        (old_hw_enabled_mask != so->hw_enabled_mask)))
+      radv_emit_streamout_enable(cmd_buffer);
 
-	if (cmd_buffer->device->physical_device->use_ngg_streamout) {
-		cmd_buffer->gds_needed = true;
-		cmd_buffer->gds_oa_needed = true;
-	}
+   if (cmd_buffer->device->physical_device->use_ngg_streamout) {
+      cmd_buffer->gds_needed = true;
+      cmd_buffer->gds_oa_needed = true;
+   }
 }
 
-static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer)
+static void
+radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	unsigned reg_strmout_cntl;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   unsigned reg_strmout_cntl;
 
-	/* The register is at different places on different ASICs. */
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
-		reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
-		radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
-	} else {
-		reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
-		radeon_set_config_reg(cs, reg_strmout_cntl, 0);
-	}
+   /* The register is at different places on different ASICs. */
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+      reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
+      radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
+   } else {
+      reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
+      radeon_set_config_reg(cs, reg_strmout_cntl, 0);
+   }
 
-	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
+   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+   radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
 
-	radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-	radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
-	radeon_emit(cs, reg_strmout_cntl >> 2);  /* register */
-	radeon_emit(cs, 0);
-	radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
-	radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
-	radeon_emit(cs, 4); /* poll interval */
+   radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+   radeon_emit(cs,
+               WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+   radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
+   radeon_emit(cs, 0);
+   radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
+   radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
+   radeon_emit(cs, 4);                              /* poll interval */
 }
 
 static void
-radv_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer,
-			  uint32_t firstCounterBuffer,
-			  uint32_t counterBufferCount,
-			  const VkBuffer *pCounterBuffers,
-			  const VkDeviceSize *pCounterBufferOffsets)
-
-{
-	struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
-	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-
-	radv_flush_vgt_streamout(cmd_buffer);
-
-	assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
-	u_foreach_bit(i, so->enabled_mask) {
-		int32_t counter_buffer_idx = i - firstCounterBuffer;
-		if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
-			counter_buffer_idx = -1;
-
-		/* AMD GCN binds streamout buffers as shader resources.
-		 * VGT only counts primitives and tells the shader through
-		 * SGPRs what to do.
-		 */
-		radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
-		radeon_emit(cs, sb[i].size >> 2);	/* BUFFER_SIZE (in DW) */
-		radeon_emit(cs, so->stride_in_dw[i]);			/* VTX_STRIDE (in DW) */
-
-		cmd_buffer->state.context_roll_without_scissor_emitted = true;
-
-		if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
-			/* The array of counter buffers is optional. */
-			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
-			uint64_t va = radv_buffer_get_va(buffer->bo);
-			uint64_t counter_buffer_offset = 0;
-
-			if (pCounterBufferOffsets)
-				counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
-
-			va += buffer->offset + counter_buffer_offset;
-
-			/* Append */
-			radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
-			radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
-					STRMOUT_DATA_TYPE(1) | /* offset in bytes */
-					STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
-			radeon_emit(cs, 0); /* unused */
-			radeon_emit(cs, 0); /* unused */
-			radeon_emit(cs, va); /* src address lo */
-			radeon_emit(cs, va >> 32); /* src address hi */
-
-			radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
-		} else {
-			/* Start from the beginning. */
-			radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
-			radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
-					STRMOUT_DATA_TYPE(1) | /* offset in bytes */
-					STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
-			radeon_emit(cs, 0); /* unused */
-			radeon_emit(cs, 0); /* unused */
-			radeon_emit(cs, 0); /* unused */
-			radeon_emit(cs, 0); /* unused */
-		}
-	}
-
-	radv_set_streamout_enable(cmd_buffer, true);
+radv_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
+                          uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+                          const VkDeviceSize *pCounterBufferOffsets)
+
+{
+   struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+   radv_flush_vgt_streamout(cmd_buffer);
+
+   assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+   u_foreach_bit(i, so->enabled_mask)
+   {
+      int32_t counter_buffer_idx = i - firstCounterBuffer;
+      if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+         counter_buffer_idx = -1;
+
+      /* AMD GCN binds streamout buffers as shader resources.
+       * VGT only counts primitives and tells the shader through
+       * SGPRs what to do.
+       */
+      radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 2);
+      radeon_emit(cs, sb[i].size >> 2);     /* BUFFER_SIZE (in DW) */
+      radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */
+
+      cmd_buffer->state.context_roll_without_scissor_emitted = true;
+
+      if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
+         /* The array of counter buffers is optional. */
+         RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+         uint64_t va = radv_buffer_get_va(buffer->bo);
+         uint64_t counter_buffer_offset = 0;
+
+         if (pCounterBufferOffsets)
+            counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
+
+         va += buffer->offset + counter_buffer_offset;
+
+         /* Append */
+         radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+         radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) |   /* offset in bytes */
+                            STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
+         radeon_emit(cs, 0);                                                 /* unused */
+         radeon_emit(cs, 0);                                                 /* unused */
+         radeon_emit(cs, va);                                                /* src address lo */
+         radeon_emit(cs, va >> 32);                                          /* src address hi */
+
+         radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+      } else {
+         /* Start from the beginning. */
+         radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+         radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+                            STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
+         radeon_emit(cs, 0);                                                    /* unused */
+         radeon_emit(cs, 0);                                                    /* unused */
+         radeon_emit(cs, 0);                                                    /* unused */
+         radeon_emit(cs, 0);                                                    /* unused */
+      }
+   }
+
+   radv_set_streamout_enable(cmd_buffer, true);
 }
 
 static void
-gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer,
-			   uint32_t firstCounterBuffer,
-			   uint32_t counterBufferCount,
-			   const VkBuffer *pCounterBuffers,
-			   const VkDeviceSize *pCounterBufferOffsets)
-{
-	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
-	unsigned last_target = util_last_bit(so->enabled_mask) - 1;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-
-	assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
-	assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
-
-	/* Sync because the next streamout operation will overwrite GDS and we
-	 * have to make sure it's idle.
-	 * TODO: Improve by tracking if there is a streamout operation in
-	 * flight.
-	 */
-	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
-	si_emit_cache_flush(cmd_buffer);
-
-	u_foreach_bit(i, so->enabled_mask) {
-		int32_t counter_buffer_idx = i - firstCounterBuffer;
-		if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
-			counter_buffer_idx = -1;
-
-		bool append = counter_buffer_idx >= 0 &&
-			      pCounterBuffers && pCounterBuffers[counter_buffer_idx];
-		uint64_t va = 0;
-
-		if (append) {
-			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
-			uint64_t counter_buffer_offset = 0;
-
-			if (pCounterBufferOffsets)
-				counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
-
-			va += radv_buffer_get_va(buffer->bo);
-			va += buffer->offset + counter_buffer_offset;
-
-			radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
-		}
-
-		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
-		radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) |
-				S_411_DST_SEL(V_411_GDS) |
-				S_411_CP_SYNC(i == last_target));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, 4 * i); /* destination in GDS */
-		radeon_emit(cs, 0);
-		radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) |
-				S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
-	}
-
-	radv_set_streamout_enable(cmd_buffer, true);
-}
-
-void radv_CmdBeginTransformFeedbackEXT(
-    VkCommandBuffer                             commandBuffer,
-    uint32_t                                    firstCounterBuffer,
-    uint32_t                                    counterBufferCount,
-    const VkBuffer*                             pCounterBuffers,
-    const VkDeviceSize*                         pCounterBufferOffsets)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-
-	if (cmd_buffer->device->physical_device->use_ngg_streamout) {
-		gfx10_emit_streamout_begin(cmd_buffer,
-					   firstCounterBuffer, counterBufferCount,
-					   pCounterBuffers, pCounterBufferOffsets);
-	} else {
-		radv_emit_streamout_begin(cmd_buffer,
-					  firstCounterBuffer, counterBufferCount,
-					  pCounterBuffers, pCounterBufferOffsets);
-	}
+gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
+                           uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+                           const VkDeviceSize *pCounterBufferOffsets)
+{
+   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+   unsigned last_target = util_last_bit(so->enabled_mask) - 1;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+   assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
+   assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+
+   /* Sync because the next streamout operation will overwrite GDS and we
+    * have to make sure it's idle.
+    * TODO: Improve by tracking if there is a streamout operation in
+    * flight.
+    */
+   cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
+   si_emit_cache_flush(cmd_buffer);
+
+   u_foreach_bit(i, so->enabled_mask)
+   {
+      int32_t counter_buffer_idx = i - firstCounterBuffer;
+      if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+         counter_buffer_idx = -1;
+
+      bool append =
+         counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx];
+      uint64_t va = 0;
+
+      if (append) {
+         RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+         uint64_t counter_buffer_offset = 0;
+
+         if (pCounterBufferOffsets)
+            counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
+
+         va += radv_buffer_get_va(buffer->bo);
+         va += buffer->offset + counter_buffer_offset;
+
+         radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+      }
+
+      radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
+      radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) |
+                         S_411_DST_SEL(V_411_GDS) | S_411_CP_SYNC(i == last_target));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      radeon_emit(cs, 4 * i); /* destination in GDS */
+      radeon_emit(cs, 0);
+      radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
+   }
+
+   radv_set_streamout_enable(cmd_buffer, true);
+}
+
+void
+radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
+                                  uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+                                  const VkDeviceSize *pCounterBufferOffsets)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   if (cmd_buffer->device->physical_device->use_ngg_streamout) {
+      gfx10_emit_streamout_begin(cmd_buffer, firstCounterBuffer, counterBufferCount,
+                                 pCounterBuffers, pCounterBufferOffsets);
+   } else {
+      radv_emit_streamout_begin(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers,
+                                pCounterBufferOffsets);
+   }
 }
 
 static void
-radv_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer,
-			uint32_t firstCounterBuffer,
-			uint32_t counterBufferCount,
-			const VkBuffer *pCounterBuffers,
-			const VkDeviceSize *pCounterBufferOffsets)
+radv_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
+                        uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+                        const VkDeviceSize *pCounterBufferOffsets)
 {
-	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	radv_flush_vgt_streamout(cmd_buffer);
+   radv_flush_vgt_streamout(cmd_buffer);
 
-	assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
-	u_foreach_bit(i, so->enabled_mask) {
-		int32_t counter_buffer_idx = i - firstCounterBuffer;
-		if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
-			counter_buffer_idx = -1;
+   assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+   u_foreach_bit(i, so->enabled_mask)
+   {
+      int32_t counter_buffer_idx = i - firstCounterBuffer;
+      if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+         counter_buffer_idx = -1;
 
-		if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
-			/* The array of counters buffer is optional. */
-			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
-			uint64_t va = radv_buffer_get_va(buffer->bo);
-			uint64_t counter_buffer_offset = 0;
+      if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
+         /* The array of counters buffer is optional. */
+         RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+         uint64_t va = radv_buffer_get_va(buffer->bo);
+         uint64_t counter_buffer_offset = 0;
 
-			if (pCounterBufferOffsets)
-				counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
+         if (pCounterBufferOffsets)
+            counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
 
-			va += buffer->offset + counter_buffer_offset;
+         va += buffer->offset + counter_buffer_offset;
 
-			radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
-			radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
-					STRMOUT_DATA_TYPE(1) | /* offset in bytes */
-					STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
-					STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
-			radeon_emit(cs, va);		/* dst address lo */
-			radeon_emit(cs, va >> 32);	/* dst address hi */
-			radeon_emit(cs, 0);		/* unused */
-			radeon_emit(cs, 0);		/* unused */
+         radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+         radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+                            STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+                            STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
+         radeon_emit(cs, va);                                  /* dst address lo */
+         radeon_emit(cs, va >> 32);                            /* dst address hi */
+         radeon_emit(cs, 0);                                   /* unused */
+         radeon_emit(cs, 0);                                   /* unused */
 
-			radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
-		}
+         radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+      }
 
-		/* Deactivate transform feedback by zeroing the buffer size.
-		 * The counters (primitives generated, primitives emitted) may
-		 * be enabled even if there is not buffer bound. This ensures
-		 * that the primitives-emitted query won't increment.
-		 */
-		radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+      /* Deactivate transform feedback by zeroing the buffer size.
+       * The counters (primitives generated, primitives emitted) may
+       * be enabled even if there is not buffer bound. This ensures
+       * that the primitives-emitted query won't increment.
+       */
+      radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0);
 
-		cmd_buffer->state.context_roll_without_scissor_emitted = true;
-	}
+      cmd_buffer->state.context_roll_without_scissor_emitted = true;
+   }
 
-	radv_set_streamout_enable(cmd_buffer, false);
+   radv_set_streamout_enable(cmd_buffer, false);
 }
 
 static void
-gfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer,
-			 uint32_t firstCounterBuffer,
-			 uint32_t counterBufferCount,
-			 const VkBuffer *pCounterBuffers,
-			 const VkDeviceSize *pCounterBufferOffsets)
-{
-	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-
-	assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
-	assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
-
-	u_foreach_bit(i, so->enabled_mask) {
-		int32_t counter_buffer_idx = i - firstCounterBuffer;
-		if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
-			counter_buffer_idx = -1;
-
-		if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
-			/* The array of counters buffer is optional. */
-			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
-			uint64_t va = radv_buffer_get_va(buffer->bo);
-			uint64_t counter_buffer_offset = 0;
-
-			if (pCounterBufferOffsets)
-				counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
-
-			va += buffer->offset + counter_buffer_offset;
-
-			si_cs_emit_write_event_eop(cs,
-						   cmd_buffer->device->physical_device->rad_info.chip_class,
-						   radv_cmd_buffer_uses_mec(cmd_buffer),
-						   V_028A90_PS_DONE, 0,
-						   EOP_DST_SEL_TC_L2,
-						   EOP_DATA_SEL_GDS,
-						   va, EOP_DATA_GDS(i, 1), 0);
-
-			radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
-		}
-	}
-
-	radv_set_streamout_enable(cmd_buffer, false);
-}
-
-void radv_CmdEndTransformFeedbackEXT(
-    VkCommandBuffer                             commandBuffer,
-    uint32_t                                    firstCounterBuffer,
-    uint32_t                                    counterBufferCount,
-    const VkBuffer*                             pCounterBuffers,
-    const VkDeviceSize*                         pCounterBufferOffsets)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-
-	if (cmd_buffer->device->physical_device->use_ngg_streamout) {
-		gfx10_emit_streamout_end(cmd_buffer,
-					 firstCounterBuffer, counterBufferCount,
-					 pCounterBuffers, pCounterBufferOffsets);
-	} else {
-		radv_emit_streamout_end(cmd_buffer,
-					firstCounterBuffer, counterBufferCount,
-					pCounterBuffers, pCounterBufferOffsets);
-	}
-}
-
-void radv_CmdDrawIndirectByteCountEXT(
-    VkCommandBuffer                             commandBuffer,
-    uint32_t                                    instanceCount,
-    uint32_t                                    firstInstance,
-    VkBuffer                                    _counterBuffer,
-    VkDeviceSize                                counterBufferOffset,
-    uint32_t                                    counterOffset,
-    uint32_t                                    vertexStride)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer);
-	struct radv_draw_info info;
-
-	info.count = 0;
-	info.instance_count = instanceCount;
-	info.first_instance = firstInstance;
-	info.strmout_buffer = counterBuffer;
-	info.strmout_buffer_offset = counterBufferOffset;
-	info.stride = vertexStride;
-	info.indexed = false;
-	info.indirect = NULL;
-
-	if (!radv_before_draw(cmd_buffer, &info, 0))
-	   return;
-	radv_emit_direct_draw_packets(cmd_buffer, &info, 0, S_0287F0_USE_OPAQUE(1));
-	radv_after_draw(cmd_buffer);
+gfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
+                         uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+                         const VkDeviceSize *pCounterBufferOffsets)
+{
+   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+   assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
+   assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+
+   u_foreach_bit(i, so->enabled_mask)
+   {
+      int32_t counter_buffer_idx = i - firstCounterBuffer;
+      if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+         counter_buffer_idx = -1;
+
+      if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
+         /* The array of counters buffer is optional. */
+         RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+         uint64_t va = radv_buffer_get_va(buffer->bo);
+         uint64_t counter_buffer_offset = 0;
+
+         if (pCounterBufferOffsets)
+            counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
+
+         va += buffer->offset + counter_buffer_offset;
+
+         si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+                                    radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0,
+                                    EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0);
+
+         radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+      }
+   }
+
+   radv_set_streamout_enable(cmd_buffer, false);
+}
+
+void
+radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
+                                uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+                                const VkDeviceSize *pCounterBufferOffsets)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   if (cmd_buffer->device->physical_device->use_ngg_streamout) {
+      gfx10_emit_streamout_end(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers,
+                               pCounterBufferOffsets);
+   } else {
+      radv_emit_streamout_end(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers,
+                              pCounterBufferOffsets);
+   }
+}
+
+void
+radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount,
+                                 uint32_t firstInstance, VkBuffer _counterBuffer,
+                                 VkDeviceSize counterBufferOffset, uint32_t counterOffset,
+                                 uint32_t vertexStride)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer);
+   struct radv_draw_info info;
+
+   info.count = 0;
+   info.instance_count = instanceCount;
+   info.first_instance = firstInstance;
+   info.strmout_buffer = counterBuffer;
+   info.strmout_buffer_offset = counterBufferOffset;
+   info.stride = vertexStride;
+   info.indexed = false;
+   info.indirect = NULL;
+
+   if (!radv_before_draw(cmd_buffer, &info, 0))
+      return;
+   radv_emit_direct_draw_packets(cmd_buffer, &info, 0, S_0287F0_USE_OPAQUE(1));
+   radv_after_draw(cmd_buffer);
 }
 
 /* VK_AMD_buffer_marker */
-void radv_CmdWriteBufferMarkerAMD(
-    VkCommandBuffer                             commandBuffer,
-    VkPipelineStageFlagBits                     pipelineStage,
-    VkBuffer                                    dstBuffer,
-    VkDeviceSize                                dstOffset,
-    uint32_t                                    marker)
-{
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer);
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint64_t va = radv_buffer_get_va(buffer->bo) + dstOffset;
-
-	si_emit_cache_flush(cmd_buffer);
-
-	ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12);
-
-	if (!(pipelineStage & ~VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
-		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
-				COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-				COPY_DATA_WR_CONFIRM);
-		radeon_emit(cs, marker);
-		radeon_emit(cs, 0);
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-	} else {
-		si_cs_emit_write_event_eop(cs,
-					   cmd_buffer->device->physical_device->rad_info.chip_class,
-					   radv_cmd_buffer_uses_mec(cmd_buffer),
-					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
-					   EOP_DST_SEL_MEM,
-					   EOP_DATA_SEL_VALUE_32BIT,
-					   va, marker,
-					   cmd_buffer->gfx9_eop_bug_va);
-	}
-
-	assert(cmd_buffer->cs->cdw <= cdw_max);
+void
+radv_CmdWriteBufferMarkerAMD(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage,
+                             VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer);
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t va = radv_buffer_get_va(buffer->bo) + dstOffset;
+
+   si_emit_cache_flush(cmd_buffer);
+
+   ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12);
+
+   if (!(pipelineStage & ~VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
+      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+                         COPY_DATA_WR_CONFIRM);
+      radeon_emit(cs, marker);
+      radeon_emit(cs, 0);
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+   } else {
+      si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+                                 radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS,
+                                 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker,
+                                 cmd_buffer->gfx9_eop_bug_va);
+   }
+
+   assert(cmd_buffer->cs->cdw <= cdw_max);
 }
diff --git a/src/amd/vulkan/radv_constants.h b/src/amd/vulkan/radv_constants.h
index 0b0d6714d25..bceedac3da6 100644
--- a/src/amd/vulkan/radv_constants.h
+++ b/src/amd/vulkan/radv_constants.h
@@ -30,30 +30,30 @@
 
 #define ATI_VENDOR_ID 0x1002
 
-#define MAX_VBS         32
-#define MAX_VERTEX_ATTRIBS 32
-#define MAX_RTS          8
-#define MAX_VIEWPORTS   16
-#define MAX_SCISSORS    16
-#define MAX_DISCARD_RECTANGLES 4
-#define MAX_SAMPLE_LOCATIONS 32
-#define MAX_PUSH_CONSTANTS_SIZE 128
-#define MAX_PUSH_DESCRIPTORS 32
-#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
-#define MAX_DYNAMIC_STORAGE_BUFFERS 8
-#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
-#define MAX_SAMPLES_LOG2 4
-#define NUM_META_FS_KEYS 12
-#define RADV_MAX_DRM_DEVICES 8
-#define MAX_VIEWS        8
-#define MAX_SO_STREAMS 4
-#define MAX_SO_BUFFERS 4
-#define MAX_SO_OUTPUTS 64
-#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024)
+#define MAX_VBS                        32
+#define MAX_VERTEX_ATTRIBS             32
+#define MAX_RTS                        8
+#define MAX_VIEWPORTS                  16
+#define MAX_SCISSORS                   16
+#define MAX_DISCARD_RECTANGLES         4
+#define MAX_SAMPLE_LOCATIONS           32
+#define MAX_PUSH_CONSTANTS_SIZE        128
+#define MAX_PUSH_DESCRIPTORS           32
+#define MAX_DYNAMIC_UNIFORM_BUFFERS    16
+#define MAX_DYNAMIC_STORAGE_BUFFERS    8
+#define MAX_DYNAMIC_BUFFERS            (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
+#define MAX_SAMPLES_LOG2               4
+#define NUM_META_FS_KEYS               12
+#define RADV_MAX_DRM_DEVICES           8
+#define MAX_VIEWS                      8
+#define MAX_SO_STREAMS                 4
+#define MAX_SO_BUFFERS                 4
+#define MAX_SO_OUTPUTS                 64
+#define MAX_INLINE_UNIFORM_BLOCK_SIZE  (4ull * 1024 * 1024)
 #define MAX_INLINE_UNIFORM_BLOCK_COUNT 64
-#define MAX_BIND_POINTS 2 /* compute + graphics */
+#define MAX_BIND_POINTS                2 /* compute + graphics */
 
-#define NUM_DEPTH_CLEAR_PIPELINES 3
+#define NUM_DEPTH_CLEAR_PIPELINES      3
 #define NUM_DEPTH_DECOMPRESS_PIPELINES 3
 
 /*
@@ -65,13 +65,13 @@
 #define RADV_BUFFER_UPDATE_THRESHOLD 1024
 
 /* descriptor index into scratch ring offsets */
-#define RING_SCRATCH 0
-#define RING_ESGS_VS 1
-#define RING_ESGS_GS 2
-#define RING_GSVS_VS 3
-#define RING_GSVS_GS 4
-#define RING_HS_TESS_FACTOR 5
-#define RING_HS_TESS_OFFCHIP 6
+#define RING_SCRATCH             0
+#define RING_ESGS_VS             1
+#define RING_ESGS_GS             2
+#define RING_GSVS_VS             3
+#define RING_GSVS_GS             4
+#define RING_HS_TESS_FACTOR      5
+#define RING_HS_TESS_OFFCHIP     6
 #define RING_PS_SAMPLE_POSITIONS 7
 
 /* max number of descriptor sets */
@@ -80,7 +80,7 @@
 /* Make sure everything is addressable by a signed 32-bit int, and
  * our largest descriptors are 96 bytes.
  */
-#define RADV_MAX_PER_SET_DESCRIPTORS ((1ull << 31 ) / 96)
+#define RADV_MAX_PER_SET_DESCRIPTORS ((1ull << 31) / 96)
 
 /* Our buffer size fields allow only 2**32 - 1. We round that down to a multiple
  * of 4 bytes so we can align buffer sizes up.
@@ -91,4 +91,3 @@
 #define RADV_SUBGROUP_SIZE 64
 
 #endif /* RADV_CONSTANTS_H */
-
diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h
index c6f9e4e4342..277c77b5967 100644
--- a/src/amd/vulkan/radv_cs.h
+++ b/src/amd/vulkan/radv_cs.h
@@ -25,166 +25,166 @@
 #ifndef RADV_CS_H
 #define RADV_CS_H
 
-#include <string.h>
-#include <stdint.h>
 #include <assert.h>
+#include <stdint.h>
+#include <string.h>
 #include "radv_private.h"
 #include "sid.h"
 
-static inline unsigned radeon_check_space(struct radeon_winsys *ws,
-                                      struct radeon_cmdbuf *cs,
-                                      unsigned needed)
+static inline unsigned
+radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned needed)
 {
-        if (cs->max_dw - cs->cdw < needed)
-                ws->cs_grow(cs, needed);
-        return cs->cdw + needed;
+   if (cs->max_dw - cs->cdw < needed)
+      ws->cs_grow(cs, needed);
+   return cs->cdw + needed;
 }
 
-static inline void radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
+static inline void
+radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
 {
-        assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
-        assert(cs->cdw + 2 + num <= cs->max_dw);
-        assert(num);
-        radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
-        radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
+   assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
+   assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(num);
+   radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
+   radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
 }
 
-static inline void radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
+static inline void
+radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
 {
-        radeon_set_config_reg_seq(cs, reg, 1);
-        radeon_emit(cs, value);
+   radeon_set_config_reg_seq(cs, reg, 1);
+   radeon_emit(cs, value);
 }
 
-static inline void radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
+static inline void
+radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
 {
-        assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
-        assert(cs->cdw + 2 + num <= cs->max_dw);
-        assert(num);
-        radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
-        radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
+   assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
+   assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(num);
+   radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
+   radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
 }
 
-static inline void radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
+static inline void
+radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
 {
-        radeon_set_context_reg_seq(cs, reg, 1);
-        radeon_emit(cs, value);
+   radeon_set_context_reg_seq(cs, reg, 1);
+   radeon_emit(cs, value);
 }
 
-
-static inline void radeon_set_context_reg_idx(struct radeon_cmdbuf *cs,
-					      unsigned reg, unsigned idx,
-					      unsigned value)
+static inline void
+radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, unsigned value)
 {
-	assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
-	assert(cs->cdw + 3 <= cs->max_dw);
-	radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
-	radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
-	radeon_emit(cs, value);
+   assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
+   assert(cs->cdw + 3 <= cs->max_dw);
+   radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
+   radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
+   radeon_emit(cs, value);
 }
 
-static inline void radeon_set_context_reg_rmw(struct radeon_cmdbuf *cs,
-					      unsigned reg, unsigned value,
-					      unsigned mask)
+static inline void
+radeon_set_context_reg_rmw(struct radeon_cmdbuf *cs, unsigned reg, unsigned value, unsigned mask)
 {
-	assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
-	assert(cs->cdw + 4 <= cs->max_dw);
-	radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0));
-	radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
-	radeon_emit(cs, mask);
-	radeon_emit(cs, value);
+   assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
+   assert(cs->cdw + 4 <= cs->max_dw);
+   radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0));
+   radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
+   radeon_emit(cs, mask);
+   radeon_emit(cs, value);
 }
 
-static inline void radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
+static inline void
+radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
 {
-	assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
-	assert(cs->cdw + 2 + num <= cs->max_dw);
-	assert(num);
-	radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
-	radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
+   assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
+   assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(num);
+   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
+   radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
 }
 
-static inline void radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
+static inline void
+radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
 {
-	radeon_set_sh_reg_seq(cs, reg, 1);
-	radeon_emit(cs, value);
+   radeon_set_sh_reg_seq(cs, reg, 1);
+   radeon_emit(cs, value);
 }
 
-static inline void radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice,
-					 struct radeon_cmdbuf *cs,
-					 unsigned reg, unsigned idx,
-					 unsigned value)
+static inline void
+radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
+                      unsigned reg, unsigned idx, unsigned value)
 {
-	assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
-	assert(cs->cdw + 3 <= cs->max_dw);
-	assert(idx);
+   assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
+   assert(cs->cdw + 3 <= cs->max_dw);
+   assert(idx);
 
-	unsigned opcode = PKT3_SET_SH_REG_INDEX;
-	if (pdevice->rad_info.chip_class < GFX10)
-		opcode = PKT3_SET_SH_REG;
+   unsigned opcode = PKT3_SET_SH_REG_INDEX;
+   if (pdevice->rad_info.chip_class < GFX10)
+      opcode = PKT3_SET_SH_REG;
 
-	radeon_emit(cs, PKT3(opcode, 1, 0));
-	radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28));
-	radeon_emit(cs, value);
+   radeon_emit(cs, PKT3(opcode, 1, 0));
+   radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28));
+   radeon_emit(cs, value);
 }
 
-static inline void radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
+static inline void
+radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
 {
-	assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
-	assert(cs->cdw + 2 + num <= cs->max_dw);
-	assert(num);
-	radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
-	radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
+   assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+   assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(num);
+   radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
+   radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
 }
 
-static inline void radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs,
-						      unsigned reg, unsigned num)
+static inline void
+radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
 {
-	assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
-	assert(cs->cdw + 2 + num <= cs->max_dw);
-	assert(num);
-	radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1));
-	radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
+   assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+   assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(num);
+   radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1));
+   radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
 }
 
-static inline void radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
+static inline void
+radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
 {
-	radeon_set_uconfig_reg_seq(cs, reg, 1);
-	radeon_emit(cs, value);
+   radeon_set_uconfig_reg_seq(cs, reg, 1);
+   radeon_emit(cs, value);
 }
 
-static inline void radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice,
-					      struct radeon_cmdbuf *cs,
-					      unsigned reg, unsigned idx,
-					      unsigned value)
+static inline void
+radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
+                           unsigned reg, unsigned idx, unsigned value)
 {
-	assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
-	assert(cs->cdw + 3 <= cs->max_dw);
-	assert(idx);
-
-	unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
-	if (pdevice->rad_info.chip_class < GFX9 ||
-	    (pdevice->rad_info.chip_class == GFX9 && pdevice->rad_info.me_fw_version < 26))
-		opcode = PKT3_SET_UCONFIG_REG;
-
-	radeon_emit(cs, PKT3(opcode, 1, 0));
-	radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
-	radeon_emit(cs, value);
+   assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+   assert(cs->cdw + 3 <= cs->max_dw);
+   assert(idx);
+
+   unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
+   if (pdevice->rad_info.chip_class < GFX9 ||
+       (pdevice->rad_info.chip_class == GFX9 && pdevice->rad_info.me_fw_version < 26))
+      opcode = PKT3_SET_UCONFIG_REG;
+
+   radeon_emit(cs, PKT3(opcode, 1, 0));
+   radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
+   radeon_emit(cs, value);
 }
 
-static inline void radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs,
-						    unsigned reg,
-						    unsigned value)
+static inline void
+radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
 {
-	assert(reg < CIK_UCONFIG_REG_OFFSET);
-	assert(cs->cdw + 6 <= cs->max_dw);
-
-	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
-			COPY_DATA_DST_SEL(COPY_DATA_PERF));
-	radeon_emit(cs, value);
-	radeon_emit(cs, 0); /* unused */
-	radeon_emit(cs, reg >> 2);
-	radeon_emit(cs, 0); /* unused */
+   assert(reg < CIK_UCONFIG_REG_OFFSET);
+   assert(cs->cdw + 6 <= cs->max_dw);
+
+   radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+   radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF));
+   radeon_emit(cs, value);
+   radeon_emit(cs, 0); /* unused */
+   radeon_emit(cs, reg >> 2);
+   radeon_emit(cs, 0); /* unused */
 }
 
 #endif /* RADV_CS_H */
diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c
index 9ba853ee9f4..ca0c3389538 100644
--- a/src/amd/vulkan/radv_debug.c
+++ b/src/amd/vulkan/radv_debug.c
@@ -25,27 +25,27 @@
  * IN THE SOFTWARE.
  */
 
-#include <stdlib.h>
 #include <stdio.h>
+#include <stdlib.h>
 #ifndef _WIN32
 #include <sys/utsname.h>
 #endif
 #include <sys/stat.h>
 
 #include "util/mesa-sha1.h"
-#include "sid.h"
 #include "ac_debug.h"
 #include "radv_debug.h"
 #include "radv_shader.h"
+#include "sid.h"
 
 #define TRACE_BO_SIZE 4096
-#define TMA_BO_SIZE 4096
+#define TMA_BO_SIZE   4096
 
-#define COLOR_RESET	"\033[0m"
-#define COLOR_RED	"\033[31m"
-#define COLOR_GREEN	"\033[1;32m"
-#define COLOR_YELLOW	"\033[1;33m"
-#define COLOR_CYAN	"\033[1;36m"
+#define COLOR_RESET  "\033[0m"
+#define COLOR_RED    "\033[31m"
+#define COLOR_GREEN  "\033[1;32m"
+#define COLOR_YELLOW "\033[1;33m"
+#define COLOR_CYAN   "\033[1;36m"
 
 #define RADV_DUMP_DIR "radv_dumps"
 
@@ -64,967 +64,934 @@
 bool
 radv_init_trace(struct radv_device *device)
 {
-	struct radeon_winsys *ws = device->ws;
-	VkResult result;
-
-	device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
-					     RADEON_DOMAIN_VRAM,
-					     RADEON_FLAG_CPU_ACCESS|
-					     RADEON_FLAG_NO_INTERPROCESS_SHARING |
-					     RADEON_FLAG_ZERO_VRAM,
-					     RADV_BO_PRIORITY_UPLOAD_BUFFER);
-	if (!device->trace_bo)
-		return false;
-
-	result = ws->buffer_make_resident(ws, device->trace_bo, true);
-	if (result != VK_SUCCESS)
-		return false;
-
-	device->trace_id_ptr = ws->buffer_map(device->trace_bo);
-	if (!device->trace_id_ptr)
-		return false;
-
-	ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
-			    &device->dmesg_timestamp, NULL);
-
-	return true;
+   struct radeon_winsys *ws = device->ws;
+   VkResult result;
+
+   device->trace_bo = ws->buffer_create(
+      ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
+      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
+      RADV_BO_PRIORITY_UPLOAD_BUFFER);
+   if (!device->trace_bo)
+      return false;
+
+   result = ws->buffer_make_resident(ws, device->trace_bo, true);
+   if (result != VK_SUCCESS)
+      return false;
+
+   device->trace_id_ptr = ws->buffer_map(device->trace_bo);
+   if (!device->trace_id_ptr)
+      return false;
+
+   ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp,
+                       NULL);
+
+   return true;
 }
 
 void
 radv_finish_trace(struct radv_device *device)
 {
-	struct radeon_winsys *ws = device->ws;
+   struct radeon_winsys *ws = device->ws;
 
-	if (unlikely(device->trace_bo)) {
-		ws->buffer_make_resident(ws, device->trace_bo, false);
-		ws->buffer_destroy(ws, device->trace_bo);
-	}
+   if (unlikely(device->trace_bo)) {
+      ws->buffer_make_resident(ws, device->trace_bo, false);
+      ws->buffer_destroy(ws, device->trace_bo);
+   }
 }
 
 static void
 radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
 {
-	fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
-	device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
+   fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
+   device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);
 }
 
 static void
 radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
 {
-	struct radeon_winsys *ws = device->ws;
-	uint32_t value;
+   struct radeon_winsys *ws = device->ws;
+   uint32_t value;
 
-	if (ws->read_registers(ws, offset, 1, &value))
-		ac_dump_reg(f, device->physical_device->rad_info.chip_class,
-			    offset, value, ~0);
+   if (ws->read_registers(ws, offset, 1, &value))
+      ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0);
 }
 
 static void
 radv_dump_debug_registers(struct radv_device *device, FILE *f)
 {
-	struct radeon_info *info = &device->physical_device->rad_info;
-
-	fprintf(f, "Memory-mapped registers:\n");
-	radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
-
-	radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
-	radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
-	radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
-	radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
-	radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
-	radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
-	radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
-	if (info->chip_class <= GFX8) {
-		radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
-		radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
-		radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
-	}
-	radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
-	radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
-	radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
-	radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
-	radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
-	radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
-	radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
-	radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
-	radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
-	radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
-	fprintf(f, "\n");
+   struct radeon_info *info = &device->physical_device->rad_info;
+
+   fprintf(f, "Memory-mapped registers:\n");
+   radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
+
+   radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
+   radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
+   radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
+   radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
+   radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
+   radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
+   radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
+   if (info->chip_class <= GFX8) {
+      radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
+      radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
+      radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
+   }
+   radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
+   radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
+   radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
+   radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
+   radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
+   radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
+   radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
+   radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
+   radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
+   radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
+   fprintf(f, "\n");
 }
 
 static void
-radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc,
-			    FILE *f)
+radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
 {
-	fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
-	for (unsigned j = 0; j < 4; j++)
-		ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4,
-			    desc[j], 0xffffffff);
+   fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
+   for (unsigned j = 0; j < 4; j++)
+      ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
 }
 
 static void
-radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc,
-			   FILE *f)
+radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
 {
-	unsigned sq_img_rsrc_word0 = chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0
-							 : R_008F10_SQ_IMG_RSRC_WORD0;
-
-	fprintf(f, COLOR_CYAN "    Image:" COLOR_RESET "\n");
-	for (unsigned j = 0; j < 8; j++)
-		ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4,
-			    desc[j], 0xffffffff);
-
-	fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
-	for (unsigned j = 0; j < 8; j++)
-		ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4,
-			    desc[8 + j], 0xffffffff);
+   unsigned sq_img_rsrc_word0 =
+      chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
+
+   fprintf(f, COLOR_CYAN "    Image:" COLOR_RESET "\n");
+   for (unsigned j = 0; j < 8; j++)
+      ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
+
+   fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
+   for (unsigned j = 0; j < 8; j++)
+      ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
 }
 
 static void
-radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
-			     FILE *f)
+radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
 {
-	fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
-	for (unsigned j = 0; j < 4; j++) {
-		ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4,
-			    desc[j], 0xffffffff);
-	}
+   fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
+   for (unsigned j = 0; j < 4; j++) {
+      ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
+   }
 }
 
 static void
-radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,
-					    const uint32_t *desc, FILE *f)
+radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
+                                            FILE *f)
 {
-	radv_dump_image_descriptor(chip_class, desc, f);
-	radv_dump_sampler_descriptor(chip_class, desc + 16, f);
+   radv_dump_image_descriptor(chip_class, desc, f);
+   radv_dump_sampler_descriptor(chip_class, desc + 16, f);
 }
 
 static void
-radv_dump_descriptor_set(struct radv_device *device,
-			 struct radv_descriptor_set *set, unsigned id, FILE *f)
+radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
+                         FILE *f)
 {
-	enum chip_class chip_class = device->physical_device->rad_info.chip_class;
-	const struct radv_descriptor_set_layout *layout;
-	int i;
-
-	if (!set)
-		return;
-	layout = set->header.layout;
-
-	for (i = 0; i < set->header.layout->binding_count; i++) {
-		uint32_t *desc =
-			set->header.mapped_ptr + layout->binding[i].offset / 4;
-
-		switch (layout->binding[i].type) {
-		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-			radv_dump_buffer_descriptor(chip_class, desc, f);
-			break;
-		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-			radv_dump_image_descriptor(chip_class, desc, f);
-			break;
-		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-			radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
-			break;
-		case VK_DESCRIPTOR_TYPE_SAMPLER:
-			radv_dump_sampler_descriptor(chip_class, desc, f);
-			break;
-		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-			/* todo */
-			break;
-		default:
-			assert(!"unknown descriptor type");
-			break;
-		}
-		fprintf(f, "\n");
-	}
-	fprintf(f, "\n\n");
+   enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+   const struct radv_descriptor_set_layout *layout;
+   int i;
+
+   if (!set)
+      return;
+   layout = set->header.layout;
+
+   for (i = 0; i < set->header.layout->binding_count; i++) {
+      uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
+
+      switch (layout->binding[i].type) {
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+         radv_dump_buffer_descriptor(chip_class, desc, f);
+         break;
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+         radv_dump_image_descriptor(chip_class, desc, f);
+         break;
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+         radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
+         break;
+      case VK_DESCRIPTOR_TYPE_SAMPLER:
+         radv_dump_sampler_descriptor(chip_class, desc, f);
+         break;
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+         /* todo */
+         break;
+      default:
+         assert(!"unknown descriptor type");
+         break;
+      }
+      fprintf(f, "\n");
+   }
+   fprintf(f, "\n\n");
 }
 
 static void
 radv_dump_descriptors(struct radv_device *device, FILE *f)
 {
-	uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
-	int i;
+   uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+   int i;
 
-	fprintf(f, "Descriptors:\n");
-	for (i = 0; i < MAX_SETS; i++) {
-		struct radv_descriptor_set *set =
-			*(struct radv_descriptor_set **)(ptr + i + 4);
+   fprintf(f, "Descriptors:\n");
+   for (i = 0; i < MAX_SETS; i++) {
+      struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4);
 
-		radv_dump_descriptor_set(device, set, i, f);
-	}
+      radv_dump_descriptor_set(device, set, i, f);
+   }
 }
 
 struct radv_shader_inst {
-	char text[160];  /* one disasm line */
-	unsigned offset; /* instruction offset */
-	unsigned size;   /* instruction size = 4 or 8 */
+   char text[160];  /* one disasm line */
+   unsigned offset; /* instruction offset */
+   unsigned size;   /* instruction size = 4 or 8 */
 };
 
 /* Split a disassembly string into lines and add them to the array pointed
  * to by "instructions". */
-static void si_add_split_disasm(const char *disasm,
-				uint64_t start_addr,
-				unsigned *num,
-				struct radv_shader_inst *instructions)
+static void
+si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
+                    struct radv_shader_inst *instructions)
 {
-	struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
-	char *next;
-
-	while ((next = strchr(disasm, '\n'))) {
-		struct radv_shader_inst *inst = &instructions[*num];
-		unsigned len = next - disasm;
-
-		if (!memchr(disasm, ';', len)) {
-			/* Ignore everything that is not an instruction. */
-			disasm = next + 1;
-			continue;
-		}
-
-		assert(len < ARRAY_SIZE(inst->text));
-		memcpy(inst->text, disasm, len);
-		inst->text[len] = 0;
-		inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
-
-		const char *semicolon = strchr(disasm, ';');
-		assert(semicolon);
-		/* More than 16 chars after ";" means the instruction is 8 bytes long. */
-		inst->size = next - semicolon > 16 ? 8 : 4;
-
-		snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
-			" [PC=0x%"PRIx64", off=%u, size=%u]",
-			start_addr + inst->offset, inst->offset, inst->size);
-
-		last_inst = inst;
-		(*num)++;
-		disasm = next + 1;
-	}
+   struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
+   char *next;
+
+   while ((next = strchr(disasm, '\n'))) {
+      struct radv_shader_inst *inst = &instructions[*num];
+      unsigned len = next - disasm;
+
+      if (!memchr(disasm, ';', len)) {
+         /* Ignore everything that is not an instruction. */
+         disasm = next + 1;
+         continue;
+      }
+
+      assert(len < ARRAY_SIZE(inst->text));
+      memcpy(inst->text, disasm, len);
+      inst->text[len] = 0;
+      inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
+
+      const char *semicolon = strchr(disasm, ';');
+      assert(semicolon);
+      /* More than 16 chars after ";" means the instruction is 8 bytes long. */
+      inst->size = next - semicolon > 16 ? 8 : 4;
+
+      snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
+               " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
+               inst->size);
+
+      last_inst = inst;
+      (*num)++;
+      disasm = next + 1;
+   }
 }
 
 static void
-radv_dump_annotated_shader(struct radv_shader_variant *shader,
-			   gl_shader_stage stage, struct ac_wave_info *waves,
-			   unsigned num_waves, FILE *f)
+radv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage,
+                           struct ac_wave_info *waves, unsigned num_waves, FILE *f)
 {
-	uint64_t start_addr, end_addr;
-	unsigned i;
-
-	if (!shader)
-		return;
-
-	start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-	end_addr = start_addr + shader->code_size;
-
-	/* See if any wave executes the shader. */
-	for (i = 0; i < num_waves; i++) {
-		if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
-			break;
-	}
-
-	if (i == num_waves)
-		return; /* the shader is not being executed */
-
-	/* Remember the first found wave. The waves are sorted according to PC. */
-	waves = &waves[i];
-	num_waves -= i;
-
-	/* Get the list of instructions.
-	 * Buffer size / 4 is the upper bound of the instruction count.
-	 */
-	unsigned num_inst = 0;
-	struct radv_shader_inst *instructions =
-		calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
-
-	si_add_split_disasm(shader->disasm_string,
-			    start_addr, &num_inst, instructions);
-
-	fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
-		radv_get_shader_name(&shader->info, stage));
-
-	/* Print instructions with annotations. */
-	for (i = 0; i < num_inst; i++) {
-		struct radv_shader_inst *inst = &instructions[i];
-
-		fprintf(f, "%s\n", inst->text);
-
-		/* Print which waves execute the instruction right now. */
-		while (num_waves && start_addr + inst->offset == waves->pc) {
-			fprintf(f,
-				"          " COLOR_GREEN "^ SE%u SH%u CU%u "
-				"SIMD%u WAVE%u  EXEC=%016"PRIx64 "  ",
-				waves->se, waves->sh, waves->cu, waves->simd,
-				waves->wave, waves->exec);
-
-			if (inst->size == 4) {
-				fprintf(f, "INST32=%08X" COLOR_RESET "\n",
-					waves->inst_dw0);
-			} else {
-				fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
-					waves->inst_dw0, waves->inst_dw1);
-			}
-
-			waves->matched = true;
-			waves = &waves[1];
-			num_waves--;
-		}
-	}
-
-	fprintf(f, "\n\n");
-	free(instructions);
+   uint64_t start_addr, end_addr;
+   unsigned i;
+
+   if (!shader)
+      return;
+
+   start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+   end_addr = start_addr + shader->code_size;
+
+   /* See if any wave executes the shader. */
+   for (i = 0; i < num_waves; i++) {
+      if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
+         break;
+   }
+
+   if (i == num_waves)
+      return; /* the shader is not being executed */
+
+   /* Remember the first found wave. The waves are sorted according to PC. */
+   waves = &waves[i];
+   num_waves -= i;
+
+   /* Get the list of instructions.
+    * Buffer size / 4 is the upper bound of the instruction count.
+    */
+   unsigned num_inst = 0;
+   struct radv_shader_inst *instructions =
+      calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
+
+   si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
+
+   fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
+           radv_get_shader_name(&shader->info, stage));
+
+   /* Print instructions with annotations. */
+   for (i = 0; i < num_inst; i++) {
+      struct radv_shader_inst *inst = &instructions[i];
+
+      fprintf(f, "%s\n", inst->text);
+
+      /* Print which waves execute the instruction right now. */
+      while (num_waves && start_addr + inst->offset == waves->pc) {
+         fprintf(f,
+                 "          " COLOR_GREEN "^ SE%u SH%u CU%u "
+                 "SIMD%u WAVE%u  EXEC=%016" PRIx64 "  ",
+                 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
+
+         if (inst->size == 4) {
+            fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
+         } else {
+            fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
+         }
+
+         waves->matched = true;
+         waves = &waves[1];
+         num_waves--;
+      }
+   }
+
+   fprintf(f, "\n\n");
+   free(instructions);
 }
 
 static void
-radv_dump_annotated_shaders(struct radv_pipeline *pipeline,
-			    VkShaderStageFlagBits active_stages, FILE *f)
+radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
+                            FILE *f)
 {
-	struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
-	enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
-	unsigned num_waves = ac_get_wave_info(chip_class, waves);
-
-	fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
-		"\n\n", num_waves);
-
-	/* Dump annotated active graphics shaders. */
-	unsigned stages = active_stages;
-	while (stages) {
-		int stage = u_bit_scan(&stages);
-
-		radv_dump_annotated_shader(pipeline->shaders[stage],
-					   stage, waves, num_waves, f);
-	}
-
-	/* Print waves executing shaders that are not currently bound. */
-	unsigned i;
-	bool found = false;
-	for (i = 0; i < num_waves; i++) {
-		if (waves[i].matched)
-			continue;
-
-		if (!found) {
-			fprintf(f, COLOR_CYAN
-				"Waves not executing currently-bound shaders:"
-				COLOR_RESET "\n");
-			found = true;
-		}
-		fprintf(f, "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016"PRIx64
-			"  INST=%08X %08X  PC=%"PRIx64"\n",
-			waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
-			waves[i].wave, waves[i].exec, waves[i].inst_dw0,
-			waves[i].inst_dw1, waves[i].pc);
-	}
-	if (found)
-		fprintf(f, "\n\n");
+   struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
+   enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
+   unsigned num_waves = ac_get_wave_info(chip_class, waves);
+
+   fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
+
+   /* Dump annotated active graphics shaders. */
+   unsigned stages = active_stages;
+   while (stages) {
+      int stage = u_bit_scan(&stages);
+
+      radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);
+   }
+
+   /* Print waves executing shaders that are not currently bound. */
+   unsigned i;
+   bool found = false;
+   for (i = 0; i < num_waves; i++) {
+      if (waves[i].matched)
+         continue;
+
+      if (!found) {
+         fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
+         found = true;
+      }
+      fprintf(f,
+              "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016" PRIx64 "  INST=%08X %08X  PC=%" PRIx64
+              "\n",
+              waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
+              waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
+   }
+   if (found)
+      fprintf(f, "\n\n");
 }
 
 static void
-radv_dump_shader(struct radv_pipeline *pipeline,
-		 struct radv_shader_variant *shader, gl_shader_stage stage,
-		 FILE *f)
+radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader_variant *shader,
+                 gl_shader_stage stage, FILE *f)
 {
-	if (!shader)
-		return;
+   if (!shader)
+      return;
 
-	fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
+   fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
 
-	if (shader->spirv) {
-		unsigned char sha1[21];
-		char sha1buf[41];
+   if (shader->spirv) {
+      unsigned char sha1[21];
+      char sha1buf[41];
 
-		_mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
-		_mesa_sha1_format(sha1buf, sha1);
+      _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
+      _mesa_sha1_format(sha1buf, sha1);
 
-		fprintf(f, "SPIRV (sha1: %s):\n", sha1buf);
-		radv_print_spirv(shader->spirv, shader->spirv_size, f);
-	}
+      fprintf(f, "SPIRV (sha1: %s):\n", sha1buf);
+      radv_print_spirv(shader->spirv, shader->spirv_size, f);
+   }
 
-	if (shader->nir_string) {
-		fprintf(f, "NIR:\n%s\n", shader->nir_string);
-	}
+   if (shader->nir_string) {
+      fprintf(f, "NIR:\n%s\n", shader->nir_string);
+   }
 
-	fprintf(f, "%s IR:\n%s\n",
-		pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
-		shader->ir_string);
-	fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
+   fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
+           shader->ir_string);
+   fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
 
-	radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
+   radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
 }
 
 static void
-radv_dump_shaders(struct radv_pipeline *pipeline,
-		  VkShaderStageFlagBits active_stages, FILE *f)
+radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages, FILE *f)
 {
-	/* Dump active graphics shaders. */
-	unsigned stages = active_stages;
-	while (stages) {
-		int stage = u_bit_scan(&stages);
+   /* Dump active graphics shaders. */
+   unsigned stages = active_stages;
+   while (stages) {
+      int stage = u_bit_scan(&stages);
 
-		radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
-	}
+      radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
+   }
 }
 
 static void
 radv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f)
 {
-	void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;
-	uint32_t count = pipeline->num_vertex_bindings;
-	uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
-
-	if (!count)
-		return;
-
-	fprintf(f, "Num vertex bindings: %d\n", count);
-	for (uint32_t i = 0; i < count; i++) {
-		uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
-		uint64_t va = 0;
-
-		va |= desc[0];
-		va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
-
-		fprintf(f, "VBO#%d:\n", i);
-		fprintf(f, "\tVA: 0x%"PRIx64"\n", va);
-		fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
-		fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
-	}
+   void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;
+   uint32_t count = pipeline->num_vertex_bindings;
+   uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
+
+   if (!count)
+      return;
+
+   fprintf(f, "Num vertex bindings: %d\n", count);
+   for (uint32_t i = 0; i < count; i++) {
+      uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
+      uint64_t va = 0;
+
+      va |= desc[0];
+      va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
+
+      fprintf(f, "VBO#%d:\n", i);
+      fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
+      fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
+      fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
+   }
 }
 
 static struct radv_pipeline *
 radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
 {
-	uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
-	int offset = ring == RING_GFX ? 1 : 2;
+   uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+   int offset = ring == RING_GFX ? 1 : 2;
 
-	return *(struct radv_pipeline **)(ptr + offset);
+   return *(struct radv_pipeline **)(ptr + offset);
 }
 
 static void
 radv_dump_queue_state(struct radv_queue *queue, FILE *f)
 {
-	enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
-	struct radv_pipeline *pipeline;
-
-	fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
-
-	pipeline = radv_get_saved_pipeline(queue->device, ring);
-	if (pipeline) {
-		radv_dump_shaders(pipeline, pipeline->active_stages, f);
-		if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
-			radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
-		radv_dump_vertex_descriptors(pipeline, f);
-		radv_dump_descriptors(queue->device, f);
-	}
+   enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
+   struct radv_pipeline *pipeline;
+
+   fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
+
+   pipeline = radv_get_saved_pipeline(queue->device, ring);
+   if (pipeline) {
+      radv_dump_shaders(pipeline, pipeline->active_stages, f);
+      if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
+         radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
+      radv_dump_vertex_descriptors(pipeline, f);
+      radv_dump_descriptors(queue->device, f);
+   }
 }
 
 static void
 radv_dump_cmd(const char *cmd, FILE *f)
 {
 #ifndef _WIN32
-	char line[2048];
-	FILE *p;
-
-	p = popen(cmd, "r");
-	if (p) {
-		while (fgets(line, sizeof(line), p))
-			fputs(line, f);
-		fprintf(f, "\n");
-		pclose(p);
-	}
+   char line[2048];
+   FILE *p;
+
+   p = popen(cmd, "r");
+   if (p) {
+      while (fgets(line, sizeof(line), p))
+         fputs(line, f);
+      fprintf(f, "\n");
+      pclose(p);
+   }
 #endif
 }
 
 static void
 radv_dump_dmesg(FILE *f)
 {
-	fprintf(f, "\nLast 60 lines of dmesg:\n\n");
-	radv_dump_cmd("dmesg | tail -n60", f);
+   fprintf(f, "\nLast 60 lines of dmesg:\n\n");
+   radv_dump_cmd("dmesg | tail -n60", f);
 }
 
 void
 radv_dump_enabled_options(struct radv_device *device, FILE *f)
 {
-	uint64_t mask;
-
-	if (device->instance->debug_flags) {
-		fprintf(f, "Enabled debug options: ");
-
-		mask = device->instance->debug_flags;
-		while (mask) {
-			int i = u_bit_scan64(&mask);
-			fprintf(f, "%s, ", radv_get_debug_option_name(i));
-		}
-		fprintf(f, "\n");
-	}
-
-	if (device->instance->perftest_flags) {
-		fprintf(f, "Enabled perftest options: ");
-
-		mask = device->instance->perftest_flags;
-		while (mask) {
-			int i = u_bit_scan64(&mask);
-			fprintf(f, "%s, ", radv_get_perftest_option_name(i));
-		}
-		fprintf(f, "\n");
-	}
+   uint64_t mask;
+
+   if (device->instance->debug_flags) {
+      fprintf(f, "Enabled debug options: ");
+
+      mask = device->instance->debug_flags;
+      while (mask) {
+         int i = u_bit_scan64(&mask);
+         fprintf(f, "%s, ", radv_get_debug_option_name(i));
+      }
+      fprintf(f, "\n");
+   }
+
+   if (device->instance->perftest_flags) {
+      fprintf(f, "Enabled perftest options: ");
+
+      mask = device->instance->perftest_flags;
+      while (mask) {
+         int i = u_bit_scan64(&mask);
+         fprintf(f, "%s, ", radv_get_perftest_option_name(i));
+      }
+      fprintf(f, "\n");
+   }
 }
 
 static void
 radv_dump_app_info(struct radv_device *device, FILE *f)
 {
-	struct radv_instance *instance = device->instance;
-
-	fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
-	fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
-	fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
-	fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
-	fprintf(f, "API version: %d.%d.%d\n",
-		VK_VERSION_MAJOR(instance->vk.app_info.api_version),
-		VK_VERSION_MINOR(instance->vk.app_info.api_version),
-		VK_VERSION_PATCH(instance->vk.app_info.api_version));
-
-	radv_dump_enabled_options(device, f);
+   struct radv_instance *instance = device->instance;
+
+   fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
+   fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
+   fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
+   fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
+   fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
+           VK_VERSION_MINOR(instance->vk.app_info.api_version),
+           VK_VERSION_PATCH(instance->vk.app_info.api_version));
+
+   radv_dump_enabled_options(device, f);
 }
 
 static void
 radv_dump_device_name(struct radv_device *device, FILE *f)
 {
-	struct radeon_info *info = &device->physical_device->rad_info;
+   struct radeon_info *info = &device->physical_device->rad_info;
 #ifndef _WIN32
-	char kernel_version[128] = {0};
-	struct utsname uname_data;
+   char kernel_version[128] = {0};
+   struct utsname uname_data;
 #endif
-	const char *chip_name;
+   const char *chip_name;
 
-	chip_name = device->ws->get_chip_name(device->ws);
+   chip_name = device->ws->get_chip_name(device->ws);
 
 #ifdef _WIN32
-	fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n",
-		chip_name, device->physical_device->name,
-		info->drm_major, info->drm_minor, info->drm_patchlevel);
+   fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name,
+           info->drm_major, info->drm_minor, info->drm_patchlevel);
 #else
-	if (uname(&uname_data) == 0)
-		snprintf(kernel_version, sizeof(kernel_version),
-			 " / %s", uname_data.release);
-
-	fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n",
-		chip_name, device->physical_device->name,
-		info->drm_major, info->drm_minor, info->drm_patchlevel,
-		kernel_version);
+   if (uname(&uname_data) == 0)
+      snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
+
+   fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name,
+           info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
 #endif
 }
 
 static void
 radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
 {
-	enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
-	struct radv_device *device = queue->device;
-	char cmd[128];
+   enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
+   struct radv_device *device = queue->device;
+   char cmd[128];
 
-	/* TODO: Dump compute ring. */
-	if (ring != RING_GFX)
-		return;
+   /* TODO: Dump compute ring. */
+   if (ring != RING_GFX)
+      return;
 
-	sprintf(cmd, "umr -R %s 2>&1",
-		device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
+   sprintf(cmd, "umr -R %s 2>&1",
+           device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
 
-	fprintf(f, "\nUMR GFX ring:\n\n");
-	radv_dump_cmd(cmd, f);
+   fprintf(f, "\nUMR GFX ring:\n\n");
+   radv_dump_cmd(cmd, f);
 }
 
 static void
 radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
 {
-	enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
-	struct radv_device *device = queue->device;
-	char cmd[128];
+   enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
+   struct radv_device *device = queue->device;
+   char cmd[128];
 
-	/* TODO: Dump compute ring. */
-	if (ring != RING_GFX)
-		return;
+   /* TODO: Dump compute ring. */
+   if (ring != RING_GFX)
+      return;
 
-	sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
-		device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
+   sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
+           device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
 
-	fprintf(f, "\nUMR GFX waves:\n\n");
-	radv_dump_cmd(cmd, f);
+   fprintf(f, "\nUMR GFX waves:\n\n");
+   radv_dump_cmd(cmd, f);
 }
 
 static bool
 radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
 {
-	struct radeon_winsys *ws = queue->device->ws;
+   struct radeon_winsys *ws = queue->device->ws;
 
-	if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))
-		return true;
+   if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))
+      return true;
 
-	return false;
+   return false;
 }
 
 void
 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
 {
-	struct radv_device *device = queue->device;
-	char dump_dir[256], dump_path[512];
-	enum ring_type ring;
-	uint64_t addr;
-	FILE *f;
-
-	ring = radv_queue_family_to_ring(queue->queue_family_index);
-
-	bool hang_occurred = radv_gpu_hang_occured(queue, ring);
-	bool vm_fault_occurred = false;
-	if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
-		vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
-		                                        &device->dmesg_timestamp, &addr);
-	if (!hang_occurred && !vm_fault_occurred)
-		return;
-
-	fprintf(stderr, "radv: GPU hang detected...\n");
-
-	/* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
-	 * various debugging info about that GPU hang.
-	 */
-	struct tm *timep, result;
-	time_t raw_time;
-	char buf_time[128];
-
-	time(&raw_time);
-	timep = os_localtime(&raw_time, &result);
-	strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
-
-	snprintf(dump_dir, sizeof(dump_dir), "%s/"RADV_DUMP_DIR"_%d_%s",
-		 debug_get_option("HOME", "."), getpid(), buf_time);
-	if (mkdir(dump_dir, 0774) && errno != EEXIST) {
-		fprintf(stderr, "radv: can't create directory '%s' (%i).\n",
-			dump_dir, errno);
-		abort();
-	}
-
-	fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
-
-	/* Dump trace file. */
-	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
-	f = fopen(dump_path, "w+");
-	if (f) {
-		radv_dump_trace(queue->device, cs, f);
-		fclose(f);
-	}
-
-	/* Dump pipeline state. */
-	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
-	f = fopen(dump_path, "w+");
-	if (f) {
-		radv_dump_queue_state(queue, f);
-		fclose(f);
-	}
-
-	if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
-		/* Dump UMR ring. */
-		snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
-		f = fopen(dump_path, "w+");
-		if (f) {
-			radv_dump_umr_ring(queue, f);
-			fclose(f);
-		}
-
-		/* Dump UMR waves. */
-		snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
-		f = fopen(dump_path, "w+");
-		if (f) {
-			radv_dump_umr_waves(queue, f);
-			fclose(f);
-		}
-	}
-
-	/* Dump debug registers. */
-	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
-	f = fopen(dump_path, "w+");
-	if (f) {
-		radv_dump_debug_registers(device, f);
-		fclose(f);
-	}
-
-	/* Dump BO ranges. */
-	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
-	f = fopen(dump_path, "w+");
-	if (f) {
-		device->ws->dump_bo_ranges(device->ws, f);
-		fclose(f);
-	}
-
-	/* Dump BO log. */
-	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
-	f = fopen(dump_path, "w+");
-	if (f) {
-		device->ws->dump_bo_log(device->ws, f);
-		fclose(f);
-	}
-
-	/* Dump VM fault info. */
-	if (vm_fault_occurred) {
-		snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
-		f = fopen(dump_path, "w+");
-		if (f) {
-			fprintf(f, "VM fault report.\n\n");
-			fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
-			fclose(f);
-		}
-	}
-
-	/* Dump app info. */
-	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
-	f = fopen(dump_path, "w+");
-	if (f) {
-		radv_dump_app_info(device, f);
-		fclose(f);
-	}
-
-	/* Dump GPU info. */
-	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
-	f = fopen(dump_path, "w+");
-	if (f) {
-		radv_dump_device_name(device, f);
-		ac_print_gpu_info(&device->physical_device->rad_info, f);
-		fclose(f);
-	}
-
-	/* Dump dmesg. */
-	snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
-	f = fopen(dump_path, "w+");
-	if (f) {
-		radv_dump_dmesg(f);
-		fclose(f);
-	}
-
-	fprintf(stderr, "radv: GPU hang report saved successfully!\n");
-	abort();
+   struct radv_device *device = queue->device;
+   char dump_dir[256], dump_path[512];
+   enum ring_type ring;
+   uint64_t addr;
+   FILE *f;
+
+   ring = radv_queue_family_to_ring(queue->queue_family_index);
+
+   bool hang_occurred = radv_gpu_hang_occured(queue, ring);
+   bool vm_fault_occurred = false;
+   if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
+      vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
+                                              &device->dmesg_timestamp, &addr);
+   if (!hang_occurred && !vm_fault_occurred)
+      return;
+
+   fprintf(stderr, "radv: GPU hang detected...\n");
+
+   /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
+    * various debugging info about that GPU hang.
+    */
+   struct tm *timep, result;
+   time_t raw_time;
+   char buf_time[128];
+
+   time(&raw_time);
+   timep = os_localtime(&raw_time, &result);
+   strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
+
+   snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
+            getpid(), buf_time);
+   if (mkdir(dump_dir, 0774) && errno != EEXIST) {
+      fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
+      abort();
+   }
+
+   fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
+
+   /* Dump trace file. */
+   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
+   f = fopen(dump_path, "w+");
+   if (f) {
+      radv_dump_trace(queue->device, cs, f);
+      fclose(f);
+   }
+
+   /* Dump pipeline state. */
+   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
+   f = fopen(dump_path, "w+");
+   if (f) {
+      radv_dump_queue_state(queue, f);
+      fclose(f);
+   }
+
+   if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
+      /* Dump UMR ring. */
+      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
+      f = fopen(dump_path, "w+");
+      if (f) {
+         radv_dump_umr_ring(queue, f);
+         fclose(f);
+      }
+
+      /* Dump UMR waves. */
+      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
+      f = fopen(dump_path, "w+");
+      if (f) {
+         radv_dump_umr_waves(queue, f);
+         fclose(f);
+      }
+   }
+
+   /* Dump debug registers. */
+   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
+   f = fopen(dump_path, "w+");
+   if (f) {
+      radv_dump_debug_registers(device, f);
+      fclose(f);
+   }
+
+   /* Dump BO ranges. */
+   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
+   f = fopen(dump_path, "w+");
+   if (f) {
+      device->ws->dump_bo_ranges(device->ws, f);
+      fclose(f);
+   }
+
+   /* Dump BO log. */
+   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
+   f = fopen(dump_path, "w+");
+   if (f) {
+      device->ws->dump_bo_log(device->ws, f);
+      fclose(f);
+   }
+
+   /* Dump VM fault info. */
+   if (vm_fault_occurred) {
+      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
+      f = fopen(dump_path, "w+");
+      if (f) {
+         fprintf(f, "VM fault report.\n\n");
+         fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);
+         fclose(f);
+      }
+   }
+
+   /* Dump app info. */
+   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
+   f = fopen(dump_path, "w+");
+   if (f) {
+      radv_dump_app_info(device, f);
+      fclose(f);
+   }
+
+   /* Dump GPU info. */
+   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
+   f = fopen(dump_path, "w+");
+   if (f) {
+      radv_dump_device_name(device, f);
+      ac_print_gpu_info(&device->physical_device->rad_info, f);
+      fclose(f);
+   }
+
+   /* Dump dmesg. */
+   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
+   f = fopen(dump_path, "w+");
+   if (f) {
+      radv_dump_dmesg(f);
+      fclose(f);
+   }
+
+   fprintf(stderr, "radv: GPU hang report saved successfully!\n");
+   abort();
 }
 
 void
 radv_print_spirv(const char *data, uint32_t size, FILE *fp)
 {
 #ifndef _WIN32
-	char path[] = "/tmp/fileXXXXXX";
-	char command[128];
-	int fd;
+   char path[] = "/tmp/fileXXXXXX";
+   char command[128];
+   int fd;
 
-	/* Dump the binary into a temporary file. */
-	fd = mkstemp(path);
-	if (fd < 0)
-		return;
+   /* Dump the binary into a temporary file. */
+   fd = mkstemp(path);
+   if (fd < 0)
+      return;
 
-	if (write(fd, data, size) == -1)
-		goto fail;
+   if (write(fd, data, size) == -1)
+      goto fail;
 
-	/* Disassemble using spirv-dis if installed. */
-	sprintf(command, "spirv-dis %s", path);
-	radv_dump_cmd(command, fp);
+   /* Disassemble using spirv-dis if installed. */
+   sprintf(command, "spirv-dis %s", path);
+   radv_dump_cmd(command, fp);
 
 fail:
-	close(fd);
-	unlink(path);
+   close(fd);
+   unlink(path);
 #endif
 }
 
 bool
 radv_trap_handler_init(struct radv_device *device)
 {
-	struct radeon_winsys *ws = device->ws;
-	VkResult result;
-
-	/* Create the trap handler shader and upload it like other shaders. */
-	device->trap_handler_shader = radv_create_trap_handler_shader(device);
-	if (!device->trap_handler_shader) {
-		fprintf(stderr, "radv: failed to create the trap handler shader.\n");
-		return false;
-	}
-
-	result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
-	if (result != VK_SUCCESS)
-		return false;
-
-	device->tma_bo = ws->buffer_create(ws, TMA_BO_SIZE, 256,
-					   RADEON_DOMAIN_VRAM,
-					   RADEON_FLAG_CPU_ACCESS |
-					   RADEON_FLAG_NO_INTERPROCESS_SHARING |
-					   RADEON_FLAG_ZERO_VRAM |
-					   RADEON_FLAG_32BIT,
-					   RADV_BO_PRIORITY_SCRATCH);
-	if (!device->tma_bo)
-		return false;
-
-	result = ws->buffer_make_resident(ws, device->tma_bo, true);
-	if (result != VK_SUCCESS)
-		return false;
-
-	device->tma_ptr = ws->buffer_map(device->tma_bo);
-	if (!device->tma_ptr)
-		return false;
-
-	/* Upload a buffer descriptor to store various info from the trap. */
-	uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
-	uint32_t desc[4];
-
-	desc[0] = tma_va;
-	desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
-	desc[2] = TMA_BO_SIZE;
-	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-		  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-		  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-		  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-		  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-
-	memcpy(device->tma_ptr, desc, sizeof(desc));
-
-	return true;
+   struct radeon_winsys *ws = device->ws;
+   VkResult result;
+
+   /* Create the trap handler shader and upload it like other shaders. */
+   device->trap_handler_shader = radv_create_trap_handler_shader(device);
+   if (!device->trap_handler_shader) {
+      fprintf(stderr, "radv: failed to create the trap handler shader.\n");
+      return false;
+   }
+
+   result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
+   if (result != VK_SUCCESS)
+      return false;
+
+   device->tma_bo = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
+                                      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
+                                         RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
+                                      RADV_BO_PRIORITY_SCRATCH);
+   if (!device->tma_bo)
+      return false;
+
+   result = ws->buffer_make_resident(ws, device->tma_bo, true);
+   if (result != VK_SUCCESS)
+      return false;
+
+   device->tma_ptr = ws->buffer_map(device->tma_bo);
+   if (!device->tma_ptr)
+      return false;
+
+   /* Upload a buffer descriptor to store various info from the trap. */
+   uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
+   uint32_t desc[4];
+
+   desc[0] = tma_va;
+   desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
+   desc[2] = TMA_BO_SIZE;
+   desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+             S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+             S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+   memcpy(device->tma_ptr, desc, sizeof(desc));
+
+   return true;
 }
 
 void
 radv_trap_handler_finish(struct radv_device *device)
 {
-	struct radeon_winsys *ws = device->ws;
+   struct radeon_winsys *ws = device->ws;
 
-	if (unlikely(device->trap_handler_shader)) {
-		ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
-		radv_shader_variant_destroy(device, device->trap_handler_shader);
-	}
+   if (unlikely(device->trap_handler_shader)) {
+      ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
+      radv_shader_variant_destroy(device, device->trap_handler_shader);
+   }
 
-	if (unlikely(device->tma_bo)) {
-		ws->buffer_make_resident(ws, device->tma_bo, false);
-		ws->buffer_destroy(ws, device->tma_bo);
-	}
+   if (unlikely(device->tma_bo)) {
+      ws->buffer_make_resident(ws, device->tma_bo, false);
+      ws->buffer_destroy(ws, device->tma_bo);
+   }
 }
 
 static struct radv_shader_variant *
 radv_get_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
 {
-	struct radv_shader_variant *shader = NULL;
+   struct radv_shader_variant *shader = NULL;
 
-	mtx_lock(&device->shader_slab_mutex);
+   mtx_lock(&device->shader_slab_mutex);
 
-	list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
+   list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs)
+   {
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
 #endif
-		list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) {
+      list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list)
+      {
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
-			uint64_t offset = align_u64(s->bo_offset + s->code_size, 256);
-			uint64_t va = radv_buffer_get_va(s->bo);
-
-			if (faulty_pc >= va + s->bo_offset && faulty_pc < va + offset) {
-				mtx_unlock(&device->shader_slab_mutex);
-				return s;
-			}
-		}
-	}
-	mtx_unlock(&device->shader_slab_mutex);
-
-	return shader;
+         uint64_t offset = align_u64(s->bo_offset + s->code_size, 256);
+         uint64_t va = radv_buffer_get_va(s->bo);
+
+         if (faulty_pc >= va + s->bo_offset && faulty_pc < va + offset) {
+            mtx_unlock(&device->shader_slab_mutex);
+            return s;
+         }
+      }
+   }
+   mtx_unlock(&device->shader_slab_mutex);
+
+   return shader;
 }
 
 static void
 radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
 {
-	struct radv_shader_variant *shader;
-	uint64_t start_addr, end_addr;
-	uint32_t instr_offset;
-
-	shader = radv_get_faulty_shader(device, faulty_pc);
-	if (!shader)
-		return;
-
-	start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-	end_addr = start_addr + shader->code_size;
-	instr_offset = faulty_pc - start_addr;
-
-	fprintf(stderr, "Faulty shader found "
-			"VA=[0x%"PRIx64"-0x%"PRIx64"], instr_offset=%d\n",
-		start_addr, end_addr, instr_offset);
-
-	/* Get the list of instructions.
-	 * Buffer size / 4 is the upper bound of the instruction count.
-	 */
-	unsigned num_inst = 0;
-	struct radv_shader_inst *instructions =
-		calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
-
-	/* Split the disassembly string into instructions. */
-	si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
-
-	/* Print instructions with annotations. */
-	for (unsigned i = 0; i < num_inst; i++) {
-		struct radv_shader_inst *inst = &instructions[i];
-
-		if (start_addr + inst->offset == faulty_pc) {
-			fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
-			fprintf(stderr, "%s\n", inst->text);
-			fprintf(stderr, "\n");
-		} else {
-			fprintf(stderr, "%s\n", inst->text);
-		}
-	}
-
-	free(instructions);
+   struct radv_shader_variant *shader;
+   uint64_t start_addr, end_addr;
+   uint32_t instr_offset;
+
+   shader = radv_get_faulty_shader(device, faulty_pc);
+   if (!shader)
+      return;
+
+   start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+   end_addr = start_addr + shader->code_size;
+   instr_offset = faulty_pc - start_addr;
+
+   fprintf(stderr,
+           "Faulty shader found "
+           "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
+           start_addr, end_addr, instr_offset);
+
+   /* Get the list of instructions.
+    * Buffer size / 4 is the upper bound of the instruction count.
+    */
+   unsigned num_inst = 0;
+   struct radv_shader_inst *instructions =
+      calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
+
+   /* Split the disassembly string into instructions. */
+   si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
+
+   /* Print instructions with annotations. */
+   for (unsigned i = 0; i < num_inst; i++) {
+      struct radv_shader_inst *inst = &instructions[i];
+
+      if (start_addr + inst->offset == faulty_pc) {
+         fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
+         fprintf(stderr, "%s\n", inst->text);
+         fprintf(stderr, "\n");
+      } else {
+         fprintf(stderr, "%s\n", inst->text);
+      }
+   }
+
+   free(instructions);
 }
 
 struct radv_sq_hw_reg {
-	uint32_t status;
-	uint32_t trap_sts;
-	uint32_t hw_id;
-	uint32_t ib_sts;
+   uint32_t status;
+   uint32_t trap_sts;
+   uint32_t hw_id;
+   uint32_t ib_sts;
 };
 
 static void
 radv_dump_sq_hw_regs(struct radv_device *device)
 {
-	struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
-
-	fprintf(stderr, "\nHardware registers:\n");
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
-			    R_000408_SQ_WAVE_STATUS, regs->status, ~0);
-		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
-			    R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
-		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
-			    R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0);
-		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
-			    R_00041C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
-	} else {
-		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
-			    R_000048_SQ_WAVE_STATUS, regs->status, ~0);
-		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
-			    R_00004C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
-		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
-			    R_000050_SQ_WAVE_HW_ID, regs->hw_id, ~0);
-		ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
-			    R_00005C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
-	}
-	fprintf(stderr, "\n\n");
+   struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
+
+   fprintf(stderr, "\nHardware registers:\n");
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS,
+                  regs->status, ~0);
+      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS,
+                  regs->trap_sts, ~0);
+      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1,
+                  regs->hw_id, ~0);
+      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS,
+                  regs->ib_sts, ~0);
+   } else {
+      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS,
+                  regs->status, ~0);
+      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS,
+                  regs->trap_sts, ~0);
+      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID,
+                  regs->hw_id, ~0);
+      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS,
+                  regs->ib_sts, ~0);
+   }
+   fprintf(stderr, "\n\n");
 }
 
 void
 radv_check_trap_handler(struct radv_queue *queue)
 {
-	enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
-	struct radv_device *device = queue->device;
-	struct radeon_winsys *ws = device->ws;
+   enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
+   struct radv_device *device = queue->device;
+   struct radeon_winsys *ws = device->ws;
 
-	/* Wait for the context to be idle in a finite time. */
-	ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx);
+   /* Wait for the context to be idle in a finite time. */
+   ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx);
 
-	/* Try to detect if the trap handler has been reached by the hw by
-	 * looking at ttmp0 which should be non-zero if a shader exception
-	 * happened.
-	 */
-	if (!device->tma_ptr[4])
-		return;
+   /* Try to detect if the trap handler has been reached by the hw by
+    * looking at ttmp0 which should be non-zero if a shader exception
+    * happened.
+    */
+   if (!device->tma_ptr[4])
+      return;
 
 #if 0
 	fprintf(stderr, "tma_ptr:\n");
@@ -1032,27 +999,27 @@ radv_check_trap_handler(struct radv_queue *queue)
 		fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
 #endif
 
-	radv_dump_sq_hw_regs(device);
+   radv_dump_sq_hw_regs(device);
 
-	uint32_t ttmp0 = device->tma_ptr[4];
-	uint32_t ttmp1 = device->tma_ptr[5];
+   uint32_t ttmp0 = device->tma_ptr[4];
+   uint32_t ttmp1 = device->tma_ptr[5];
 
-	/* According to the ISA docs, 3.10 Trap and Exception Registers:
-	 *
-	 * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
-	 *
-	 * "When the trap handler is entered, the PC of the faulting
-	 *  instruction is: (PC - PC_rewind * 4)."
-	 * */
-	uint8_t trap_id = (ttmp1 >> 16) & 0xff;
-	uint8_t ht = (ttmp1 >> 24) & 0x1;
-	uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
-	uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
+   /* According to the ISA docs, 3.10 Trap and Exception Registers:
+    *
+    * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
+    *
+    * "When the trap handler is entered, the PC of the faulting
+    *  instruction is: (PC - PC_rewind * 4)."
+    * */
+   uint8_t trap_id = (ttmp1 >> 16) & 0xff;
+   uint8_t ht = (ttmp1 >> 24) & 0x1;
+   uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
+   uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
 
-	fprintf(stderr, "PC=0x%"PRIx64", trapID=%d, HT=%d, PC_rewind=%d\n",
-		pc, trap_id, ht, pc_rewind);
+   fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
+           pc_rewind);
 
-	radv_dump_faulty_shader(device, pc);
+   radv_dump_faulty_shader(device, pc);
 
-	abort();
+   abort();
 }
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 1569fcb108e..922b29af8e8 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -28,69 +28,65 @@
 
 /* Please keep docs/envvars.rst up-to-date when you add/remove options. */
 enum {
-	RADV_DEBUG_NO_FAST_CLEARS    = 1ull << 0,
-	RADV_DEBUG_NO_DCC            = 1ull << 1,
-	RADV_DEBUG_DUMP_SHADERS      = 1ull << 2,
-	RADV_DEBUG_NO_CACHE          = 1ull << 3,
-	RADV_DEBUG_DUMP_SHADER_STATS = 1ull << 4,
-	RADV_DEBUG_NO_HIZ            = 1ull << 5,
-	RADV_DEBUG_NO_COMPUTE_QUEUE  = 1ull << 6,
-	RADV_DEBUG_ALL_BOS           = 1ull << 7,
-	RADV_DEBUG_NO_IBS            = 1ull << 8,
-	RADV_DEBUG_DUMP_SPIRV        = 1ull << 9,
-	RADV_DEBUG_VM_FAULTS         = 1ull << 10,
-	RADV_DEBUG_ZERO_VRAM         = 1ull << 11,
-	RADV_DEBUG_SYNC_SHADERS      = 1ull << 12,
-	RADV_DEBUG_PREOPTIR          = 1ull << 13,
-	RADV_DEBUG_NO_DYNAMIC_BOUNDS = 1ull << 14,
-	RADV_DEBUG_NO_OUT_OF_ORDER   = 1ull << 15,
-	RADV_DEBUG_INFO              = 1ull << 16,
-	RADV_DEBUG_ERRORS            = 1ull << 17,
-	RADV_DEBUG_STARTUP           = 1ull << 18,
-	RADV_DEBUG_CHECKIR           = 1ull << 19,
-	RADV_DEBUG_NOTHREADLLVM      = 1ull << 20,
-	RADV_DEBUG_NOBINNING         = 1ull << 21,
-	RADV_DEBUG_NO_NGG            = 1ull << 22,
-	RADV_DEBUG_DUMP_META_SHADERS = 1ull << 23,
-	RADV_DEBUG_NO_MEMORY_CACHE   = 1ull << 24,
-	RADV_DEBUG_DISCARD_TO_DEMOTE = 1ull << 25,
-	RADV_DEBUG_LLVM              = 1ull << 26,
-	RADV_DEBUG_FORCE_COMPRESS    = 1ull << 27,
-	RADV_DEBUG_HANG              = 1ull << 28,
-	RADV_DEBUG_IMG               = 1ull << 29,
-	RADV_DEBUG_NO_UMR            = 1ull << 30,
-	RADV_DEBUG_INVARIANT_GEOM    = 1ull << 31,
-	RADV_DEBUG_NO_DISPLAY_DCC    = 1ull << 32,
-	RADV_DEBUG_NO_TC_COMPAT_CMASK= 1ull << 33,
-	RADV_DEBUG_NO_VRS_FLAT_SHADING = 1ull << 34,
+   RADV_DEBUG_NO_FAST_CLEARS = 1ull << 0,
+   RADV_DEBUG_NO_DCC = 1ull << 1,
+   RADV_DEBUG_DUMP_SHADERS = 1ull << 2,
+   RADV_DEBUG_NO_CACHE = 1ull << 3,
+   RADV_DEBUG_DUMP_SHADER_STATS = 1ull << 4,
+   RADV_DEBUG_NO_HIZ = 1ull << 5,
+   RADV_DEBUG_NO_COMPUTE_QUEUE = 1ull << 6,
+   RADV_DEBUG_ALL_BOS = 1ull << 7,
+   RADV_DEBUG_NO_IBS = 1ull << 8,
+   RADV_DEBUG_DUMP_SPIRV = 1ull << 9,
+   RADV_DEBUG_VM_FAULTS = 1ull << 10,
+   RADV_DEBUG_ZERO_VRAM = 1ull << 11,
+   RADV_DEBUG_SYNC_SHADERS = 1ull << 12,
+   RADV_DEBUG_PREOPTIR = 1ull << 13,
+   RADV_DEBUG_NO_DYNAMIC_BOUNDS = 1ull << 14,
+   RADV_DEBUG_NO_OUT_OF_ORDER = 1ull << 15,
+   RADV_DEBUG_INFO = 1ull << 16,
+   RADV_DEBUG_ERRORS = 1ull << 17,
+   RADV_DEBUG_STARTUP = 1ull << 18,
+   RADV_DEBUG_CHECKIR = 1ull << 19,
+   RADV_DEBUG_NOTHREADLLVM = 1ull << 20,
+   RADV_DEBUG_NOBINNING = 1ull << 21,
+   RADV_DEBUG_NO_NGG = 1ull << 22,
+   RADV_DEBUG_DUMP_META_SHADERS = 1ull << 23,
+   RADV_DEBUG_NO_MEMORY_CACHE = 1ull << 24,
+   RADV_DEBUG_DISCARD_TO_DEMOTE = 1ull << 25,
+   RADV_DEBUG_LLVM = 1ull << 26,
+   RADV_DEBUG_FORCE_COMPRESS = 1ull << 27,
+   RADV_DEBUG_HANG = 1ull << 28,
+   RADV_DEBUG_IMG = 1ull << 29,
+   RADV_DEBUG_NO_UMR = 1ull << 30,
+   RADV_DEBUG_INVARIANT_GEOM = 1ull << 31,
+   RADV_DEBUG_NO_DISPLAY_DCC = 1ull << 32,
+   RADV_DEBUG_NO_TC_COMPAT_CMASK = 1ull << 33,
+   RADV_DEBUG_NO_VRS_FLAT_SHADING = 1ull << 34,
 };
 
 enum {
-	RADV_PERFTEST_LOCAL_BOS       = 1u << 0,
-	RADV_PERFTEST_DCC_MSAA        = 1u << 1,
-	RADV_PERFTEST_BO_LIST         = 1u << 2,
-	RADV_PERFTEST_TC_COMPAT_CMASK = 1u << 3,
-	RADV_PERFTEST_CS_WAVE_32      = 1u << 4,
-	RADV_PERFTEST_PS_WAVE_32      = 1u << 5,
-	RADV_PERFTEST_GE_WAVE_32      = 1u << 6,
-	RADV_PERFTEST_DFSM            = 1u << 7,
-	RADV_PERFTEST_NO_SAM          = 1u << 8,
-	RADV_PERFTEST_SAM             = 1u << 9,
-	RADV_PERFTEST_DCC_STORES      = 1u << 10,
+   RADV_PERFTEST_LOCAL_BOS = 1u << 0,
+   RADV_PERFTEST_DCC_MSAA = 1u << 1,
+   RADV_PERFTEST_BO_LIST = 1u << 2,
+   RADV_PERFTEST_TC_COMPAT_CMASK = 1u << 3,
+   RADV_PERFTEST_CS_WAVE_32 = 1u << 4,
+   RADV_PERFTEST_PS_WAVE_32 = 1u << 5,
+   RADV_PERFTEST_GE_WAVE_32 = 1u << 6,
+   RADV_PERFTEST_DFSM = 1u << 7,
+   RADV_PERFTEST_NO_SAM = 1u << 8,
+   RADV_PERFTEST_SAM = 1u << 9,
+   RADV_PERFTEST_DCC_STORES = 1u << 10,
 };
 
-bool
-radv_init_trace(struct radv_device *device);
+bool radv_init_trace(struct radv_device *device);
 void radv_finish_trace(struct radv_device *device);
 
-void
-radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs);
+void radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs);
 
-void
-radv_print_spirv(const char *data, uint32_t size, FILE *fp);
+void radv_print_spirv(const char *data, uint32_t size, FILE *fp);
 
-void
-radv_dump_enabled_options(struct radv_device *device, FILE *f);
+void radv_dump_enabled_options(struct radv_device *device, FILE *f);
 
 bool radv_trap_handler_init(struct radv_device *device);
 void radv_trap_handler_finish(struct radv_device *device);
diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c
index 0abe0422dbf..25b14939e8f 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -22,9 +22,9 @@
  * IN THE SOFTWARE.
  */
 #include <assert.h>
+#include <fcntl.h>
 #include <stdbool.h>
 #include <string.h>
-#include <fcntl.h>
 
 #include "util/mesa-sha1.h"
 #include "radv_private.h"
@@ -33,409 +33,413 @@
 #include "vk_format.h"
 #include "vk_util.h"
 
-
-static bool has_equal_immutable_samplers(const VkSampler *samplers, uint32_t count)
+static bool
+has_equal_immutable_samplers(const VkSampler *samplers, uint32_t count)
 {
-	if (!samplers)
-		return false;
-	for(uint32_t i = 1; i < count; ++i) {
-		if (memcmp(radv_sampler_from_handle(samplers[0])->state,
-		           radv_sampler_from_handle(samplers[i])->state, 16)) {
-			return false;
-		}
-	}
-	return true;
+   if (!samplers)
+      return false;
+   for (uint32_t i = 1; i < count; ++i) {
+      if (memcmp(radv_sampler_from_handle(samplers[0])->state,
+                 radv_sampler_from_handle(samplers[i])->state, 16)) {
+         return false;
+      }
+   }
+   return true;
 }
 
-static bool radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVALVE *list,
-                                                        uint64_t *out_size, uint64_t *out_align)
+static bool
+radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVALVE *list,
+                                            uint64_t *out_size, uint64_t *out_align)
 {
-	uint32_t max_size = 0;
-	uint32_t max_align = 0;
-
-	for (uint32_t i = 0; i < list->descriptorTypeCount; i++) {
-		uint32_t size = 0;
-		uint32_t align = 0;
-
-		switch (list->pDescriptorTypes[i]) {
-		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-			size = 16;
-			align = 16;
-			break;
-		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-			size = 64;
-			align = 32;
-			break;
-		default:
-			return false;
-		}
-
-		max_size = MAX2(max_size, size);
-		max_align = MAX2(max_align, align);
-	}
-
-	*out_size = max_size;
-	*out_align = max_align;
-	return true;
+   uint32_t max_size = 0;
+   uint32_t max_align = 0;
+
+   for (uint32_t i = 0; i < list->descriptorTypeCount; i++) {
+      uint32_t size = 0;
+      uint32_t align = 0;
+
+      switch (list->pDescriptorTypes[i]) {
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+         size = 16;
+         align = 16;
+         break;
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+         size = 64;
+         align = 32;
+         break;
+      default:
+         return false;
+      }
+
+      max_size = MAX2(max_size, size);
+      max_align = MAX2(max_align, align);
+   }
+
+   *out_size = max_size;
+   *out_align = max_align;
+   return true;
 }
 
-VkResult radv_CreateDescriptorSetLayout(
-	VkDevice                                    _device,
-	const VkDescriptorSetLayoutCreateInfo*      pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkDescriptorSetLayout*                      pSetLayout)
+VkResult
+radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+                               const VkAllocationCallbacks *pAllocator,
+                               VkDescriptorSetLayout *pSetLayout)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_descriptor_set_layout *set_layout;
-
-	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
-	const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
-		vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
-	const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
-		vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
-
-	uint32_t num_bindings = 0;
-	uint32_t immutable_sampler_count = 0;
-	uint32_t ycbcr_sampler_count = 0;
-	for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
-		num_bindings = MAX2(num_bindings, pCreateInfo->pBindings[j].binding + 1);
-		if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
-		     pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
-		     pCreateInfo->pBindings[j].pImmutableSamplers) {
-			immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
-
-			bool has_ycbcr_sampler = false;
-			for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) {
-				if (radv_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i])->ycbcr_sampler)
-					has_ycbcr_sampler = true;
-			}
-
-			if (has_ycbcr_sampler)
-				ycbcr_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
-		}
-	}
-
-	uint32_t samplers_offset =
-			offsetof(struct radv_descriptor_set_layout, binding[num_bindings]);
-	size_t size = samplers_offset + immutable_sampler_count * 4 * sizeof(uint32_t);
-	if (ycbcr_sampler_count > 0) {
-		/* Store block of offsets first, followed by the conversion descriptors (padded to the struct alignment) */
-		size += num_bindings * sizeof(uint32_t);
-		size = ALIGN(size, alignof(struct radv_sampler_ycbcr_conversion));
-		size += ycbcr_sampler_count * sizeof(struct radv_sampler_ycbcr_conversion);
-	}
-
-	set_layout = vk_zalloc2(&device->vk.alloc, pAllocator, size, 8,
-	                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (!set_layout)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &set_layout->base,
-			    VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT);
-
-	set_layout->flags = pCreateInfo->flags;
-	set_layout->layout_size = size;
-
-	/* We just allocate all the samplers at the end of the struct */
-	uint32_t *samplers = (uint32_t*)&set_layout->binding[num_bindings];
-	struct radv_sampler_ycbcr_conversion *ycbcr_samplers = NULL;
-	uint32_t *ycbcr_sampler_offsets = NULL;
-
-	if (ycbcr_sampler_count > 0) {
-		ycbcr_sampler_offsets = samplers + 4 * immutable_sampler_count;
-		set_layout->ycbcr_sampler_offsets_offset = (char*)ycbcr_sampler_offsets - (char*)set_layout;
-
-		uintptr_t first_ycbcr_sampler_offset = (uintptr_t)ycbcr_sampler_offsets + sizeof(uint32_t) * num_bindings;
-		first_ycbcr_sampler_offset = ALIGN(first_ycbcr_sampler_offset, alignof(struct radv_sampler_ycbcr_conversion));
-		ycbcr_samplers = (struct radv_sampler_ycbcr_conversion *)first_ycbcr_sampler_offset;
-	} else
-		set_layout->ycbcr_sampler_offsets_offset = 0;
-
-	VkDescriptorSetLayoutBinding *bindings = NULL;
-	VkResult result = vk_create_sorted_bindings(pCreateInfo->pBindings,
-	                                            pCreateInfo->bindingCount,
-	                                            &bindings);
-	if (result != VK_SUCCESS) {
-		vk_object_base_finish(&set_layout->base);
-		vk_free2(&device->vk.alloc, pAllocator, set_layout);
-		return vk_error(device->instance, result);
-	}
-
-	set_layout->binding_count = num_bindings;
-	set_layout->shader_stages = 0;
-	set_layout->dynamic_shader_stages = 0;
-	set_layout->has_immutable_samplers = false;
-	set_layout->size = 0;
-
-	uint32_t buffer_count = 0;
-	uint32_t dynamic_offset_count = 0;
-
-	for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
-		const VkDescriptorSetLayoutBinding *binding = bindings + j;
-		uint32_t b = binding->binding;
-		uint32_t alignment = 0;
-		unsigned binding_buffer_count = 0;
-		uint32_t descriptor_count = binding->descriptorCount;
-		bool has_ycbcr_sampler = false;
-
-		/* main image + fmask */
-		uint32_t max_sampled_image_descriptors = 2;
-
-		if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
-		    binding->pImmutableSamplers) {
-			for (unsigned i = 0; i < binding->descriptorCount; ++i) {
-				struct radv_sampler_ycbcr_conversion *conversion =
-					radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
-
-				if (conversion) {
-					has_ycbcr_sampler = true;
-					max_sampled_image_descriptors = MAX2(max_sampled_image_descriptors,
-					                                     vk_format_get_plane_count(conversion->format));
-				}
-			}
-		}
-
-		switch (binding->descriptorType) {
-		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-			assert(!(pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
-			set_layout->binding[b].dynamic_offset_count = 1;
-			set_layout->dynamic_shader_stages |= binding->stageFlags;
-			set_layout->binding[b].size = 0;
-			binding_buffer_count = 1;
-			alignment = 1;
-			break;
-		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-			set_layout->binding[b].size = 16;
-			binding_buffer_count = 1;
-			alignment = 16;
-			break;
-		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-			/* main descriptor + fmask descriptor */
-			set_layout->binding[b].size = 64;
-			binding_buffer_count = 1;
-			alignment = 32;
-			break;
-		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-			/* main descriptor + fmask descriptor + sampler */
-			set_layout->binding[b].size = 96;
-			binding_buffer_count = 1;
-			alignment = 32;
-			break;
-		case VK_DESCRIPTOR_TYPE_SAMPLER:
-			set_layout->binding[b].size = 16;
-			alignment = 16;
-			break;
-		case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE: {
-			uint64_t mutable_size = 0, mutable_align = 0;
-			radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[j],
-					&mutable_size, &mutable_align);
-			assert(mutable_size && mutable_align);
-			set_layout->binding[b].size = mutable_size;
-			alignment = mutable_align;
-			break;
-		}
-		case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
-			alignment = 16;
-			set_layout->binding[b].size = descriptor_count;
-			descriptor_count = 1;
-			break;
-		default:
-			break;
-		}
-
-		set_layout->size = align(set_layout->size, alignment);
-		set_layout->binding[b].type = binding->descriptorType;
-		set_layout->binding[b].array_size = descriptor_count;
-		set_layout->binding[b].offset = set_layout->size;
-		set_layout->binding[b].buffer_offset = buffer_count;
-		set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
-
-		if (variable_flags && binding->binding < variable_flags->bindingCount &&
-		    (variable_flags->pBindingFlags[binding->binding] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
-			assert(!binding->pImmutableSamplers); /* Terribly ill defined  how many samplers are valid */
-			assert(binding->binding == num_bindings - 1);
-
-			set_layout->has_variable_descriptors = true;
-		}
-
-		if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
-		     binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
-		    binding->pImmutableSamplers) {
-			set_layout->binding[b].immutable_samplers_offset = samplers_offset;
-			set_layout->binding[b].immutable_samplers_equal =
-				has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount);
-			set_layout->has_immutable_samplers = true;
-
-
-			for (uint32_t i = 0; i < binding->descriptorCount; i++)
-				memcpy(samplers + 4 * i, &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16);
-
-			/* Don't reserve space for the samplers if they're not accessed. */
-			if (set_layout->binding[b].immutable_samplers_equal) {
-				if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
-				    max_sampled_image_descriptors <= 2)
-					set_layout->binding[b].size -= 32;
-				else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
-					set_layout->binding[b].size -= 16;
-			}
-			samplers += 4 * binding->descriptorCount;
-			samplers_offset += 4 * sizeof(uint32_t) * binding->descriptorCount;
-
-			if (has_ycbcr_sampler) {
-				ycbcr_sampler_offsets[b] = (const char*)ycbcr_samplers - (const char*)set_layout;
-				for (uint32_t i = 0; i < binding->descriptorCount; i++) {
-					if (radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler)
-						ycbcr_samplers[i] = *radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
-					else
-						ycbcr_samplers[i].format = VK_FORMAT_UNDEFINED;
-				}
-				ycbcr_samplers += binding->descriptorCount;
-			}
-		}
-
-		set_layout->size += descriptor_count * set_layout->binding[b].size;
-		buffer_count += descriptor_count * binding_buffer_count;
-		dynamic_offset_count += descriptor_count *
-			set_layout->binding[b].dynamic_offset_count;
-		set_layout->shader_stages |= binding->stageFlags;
-	}
-
-	free(bindings);
-
-	set_layout->buffer_count = buffer_count;
-	set_layout->dynamic_offset_count = dynamic_offset_count;
-
-	*pSetLayout = radv_descriptor_set_layout_to_handle(set_layout);
-
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_descriptor_set_layout *set_layout;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
+   const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
+      vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
+   const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
+      vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
+
+   uint32_t num_bindings = 0;
+   uint32_t immutable_sampler_count = 0;
+   uint32_t ycbcr_sampler_count = 0;
+   for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
+      num_bindings = MAX2(num_bindings, pCreateInfo->pBindings[j].binding + 1);
+      if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
+           pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
+          pCreateInfo->pBindings[j].pImmutableSamplers) {
+         immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
+
+         bool has_ycbcr_sampler = false;
+         for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) {
+            if (radv_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i])
+                   ->ycbcr_sampler)
+               has_ycbcr_sampler = true;
+         }
+
+         if (has_ycbcr_sampler)
+            ycbcr_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
+      }
+   }
+
+   uint32_t samplers_offset = offsetof(struct radv_descriptor_set_layout, binding[num_bindings]);
+   size_t size = samplers_offset + immutable_sampler_count * 4 * sizeof(uint32_t);
+   if (ycbcr_sampler_count > 0) {
+      /* Store block of offsets first, followed by the conversion descriptors (padded to the struct
+       * alignment) */
+      size += num_bindings * sizeof(uint32_t);
+      size = ALIGN(size, alignof(struct radv_sampler_ycbcr_conversion));
+      size += ycbcr_sampler_count * sizeof(struct radv_sampler_ycbcr_conversion);
+   }
+
+   set_layout =
+      vk_zalloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!set_layout)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &set_layout->base, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT);
+
+   set_layout->flags = pCreateInfo->flags;
+   set_layout->layout_size = size;
+
+   /* We just allocate all the samplers at the end of the struct */
+   uint32_t *samplers = (uint32_t *)&set_layout->binding[num_bindings];
+   struct radv_sampler_ycbcr_conversion *ycbcr_samplers = NULL;
+   uint32_t *ycbcr_sampler_offsets = NULL;
+
+   if (ycbcr_sampler_count > 0) {
+      ycbcr_sampler_offsets = samplers + 4 * immutable_sampler_count;
+      set_layout->ycbcr_sampler_offsets_offset = (char *)ycbcr_sampler_offsets - (char *)set_layout;
+
+      uintptr_t first_ycbcr_sampler_offset =
+         (uintptr_t)ycbcr_sampler_offsets + sizeof(uint32_t) * num_bindings;
+      first_ycbcr_sampler_offset =
+         ALIGN(first_ycbcr_sampler_offset, alignof(struct radv_sampler_ycbcr_conversion));
+      ycbcr_samplers = (struct radv_sampler_ycbcr_conversion *)first_ycbcr_sampler_offset;
+   } else
+      set_layout->ycbcr_sampler_offsets_offset = 0;
+
+   VkDescriptorSetLayoutBinding *bindings = NULL;
+   VkResult result =
+      vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
+   if (result != VK_SUCCESS) {
+      vk_object_base_finish(&set_layout->base);
+      vk_free2(&device->vk.alloc, pAllocator, set_layout);
+      return vk_error(device->instance, result);
+   }
+
+   set_layout->binding_count = num_bindings;
+   set_layout->shader_stages = 0;
+   set_layout->dynamic_shader_stages = 0;
+   set_layout->has_immutable_samplers = false;
+   set_layout->size = 0;
+
+   uint32_t buffer_count = 0;
+   uint32_t dynamic_offset_count = 0;
+
+   for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
+      const VkDescriptorSetLayoutBinding *binding = bindings + j;
+      uint32_t b = binding->binding;
+      uint32_t alignment = 0;
+      unsigned binding_buffer_count = 0;
+      uint32_t descriptor_count = binding->descriptorCount;
+      bool has_ycbcr_sampler = false;
+
+      /* main image + fmask */
+      uint32_t max_sampled_image_descriptors = 2;
+
+      if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
+          binding->pImmutableSamplers) {
+         for (unsigned i = 0; i < binding->descriptorCount; ++i) {
+            struct radv_sampler_ycbcr_conversion *conversion =
+               radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
+
+            if (conversion) {
+               has_ycbcr_sampler = true;
+               max_sampled_image_descriptors = MAX2(max_sampled_image_descriptors,
+                                                    vk_format_get_plane_count(conversion->format));
+            }
+         }
+      }
+
+      switch (binding->descriptorType) {
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+         assert(!(pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+         set_layout->binding[b].dynamic_offset_count = 1;
+         set_layout->dynamic_shader_stages |= binding->stageFlags;
+         set_layout->binding[b].size = 0;
+         binding_buffer_count = 1;
+         alignment = 1;
+         break;
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+         set_layout->binding[b].size = 16;
+         binding_buffer_count = 1;
+         alignment = 16;
+         break;
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+         /* main descriptor + fmask descriptor */
+         set_layout->binding[b].size = 64;
+         binding_buffer_count = 1;
+         alignment = 32;
+         break;
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+         /* main descriptor + fmask descriptor + sampler */
+         set_layout->binding[b].size = 96;
+         binding_buffer_count = 1;
+         alignment = 32;
+         break;
+      case VK_DESCRIPTOR_TYPE_SAMPLER:
+         set_layout->binding[b].size = 16;
+         alignment = 16;
+         break;
+      case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE: {
+         uint64_t mutable_size = 0, mutable_align = 0;
+         radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[j],
+                                                     &mutable_size, &mutable_align);
+         assert(mutable_size && mutable_align);
+         set_layout->binding[b].size = mutable_size;
+         alignment = mutable_align;
+         break;
+      }
+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         alignment = 16;
+         set_layout->binding[b].size = descriptor_count;
+         descriptor_count = 1;
+         break;
+      default:
+         break;
+      }
+
+      set_layout->size = align(set_layout->size, alignment);
+      set_layout->binding[b].type = binding->descriptorType;
+      set_layout->binding[b].array_size = descriptor_count;
+      set_layout->binding[b].offset = set_layout->size;
+      set_layout->binding[b].buffer_offset = buffer_count;
+      set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
+
+      if (variable_flags && binding->binding < variable_flags->bindingCount &&
+          (variable_flags->pBindingFlags[binding->binding] &
+           VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
+         assert(
+            !binding->pImmutableSamplers); /* Terribly ill defined  how many samplers are valid */
+         assert(binding->binding == num_bindings - 1);
+
+         set_layout->has_variable_descriptors = true;
+      }
+
+      if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
+           binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
+          binding->pImmutableSamplers) {
+         set_layout->binding[b].immutable_samplers_offset = samplers_offset;
+         set_layout->binding[b].immutable_samplers_equal =
+            has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount);
+         set_layout->has_immutable_samplers = true;
+
+         for (uint32_t i = 0; i < binding->descriptorCount; i++)
+            memcpy(samplers + 4 * i,
+                   &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16);
+
+         /* Don't reserve space for the samplers if they're not accessed. */
+         if (set_layout->binding[b].immutable_samplers_equal) {
+            if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
+                max_sampled_image_descriptors <= 2)
+               set_layout->binding[b].size -= 32;
+            else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
+               set_layout->binding[b].size -= 16;
+         }
+         samplers += 4 * binding->descriptorCount;
+         samplers_offset += 4 * sizeof(uint32_t) * binding->descriptorCount;
+
+         if (has_ycbcr_sampler) {
+            ycbcr_sampler_offsets[b] = (const char *)ycbcr_samplers - (const char *)set_layout;
+            for (uint32_t i = 0; i < binding->descriptorCount; i++) {
+               if (radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler)
+                  ycbcr_samplers[i] =
+                     *radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
+               else
+                  ycbcr_samplers[i].format = VK_FORMAT_UNDEFINED;
+            }
+            ycbcr_samplers += binding->descriptorCount;
+         }
+      }
+
+      set_layout->size += descriptor_count * set_layout->binding[b].size;
+      buffer_count += descriptor_count * binding_buffer_count;
+      dynamic_offset_count += descriptor_count * set_layout->binding[b].dynamic_offset_count;
+      set_layout->shader_stages |= binding->stageFlags;
+   }
+
+   free(bindings);
+
+   set_layout->buffer_count = buffer_count;
+   set_layout->dynamic_offset_count = dynamic_offset_count;
+
+   *pSetLayout = radv_descriptor_set_layout_to_handle(set_layout);
+
+   return VK_SUCCESS;
 }
 
-void radv_DestroyDescriptorSetLayout(
-	VkDevice                                    _device,
-	VkDescriptorSetLayout                       _set_layout,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyDescriptorSetLayout(VkDevice _device, VkDescriptorSetLayout _set_layout,
+                                const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, _set_layout);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, _set_layout);
 
-	if (!set_layout)
-		return;
+   if (!set_layout)
+      return;
 
-	vk_object_base_finish(&set_layout->base);
-	vk_free2(&device->vk.alloc, pAllocator, set_layout);
+   vk_object_base_finish(&set_layout->base);
+   vk_free2(&device->vk.alloc, pAllocator, set_layout);
 }
 
-void radv_GetDescriptorSetLayoutSupport(VkDevice device,
-                                        const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
-                                        VkDescriptorSetLayoutSupport* pSupport)
+void
+radv_GetDescriptorSetLayoutSupport(VkDevice device,
+                                   const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+                                   VkDescriptorSetLayoutSupport *pSupport)
 {
-	VkDescriptorSetLayoutBinding *bindings = NULL;
-	VkResult result = vk_create_sorted_bindings(pCreateInfo->pBindings,
-	                                            pCreateInfo->bindingCount,
-	                                            &bindings);
-	if (result != VK_SUCCESS) {
-		pSupport->supported = false;
-		return;
-	}
-
-	const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
-		vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
-	VkDescriptorSetVariableDescriptorCountLayoutSupport *variable_count =
-		vk_find_struct((void*)pCreateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT);
-	const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
-		vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
-	if (variable_count) {
-		variable_count->maxVariableDescriptorCount = 0;
-	}
-
-	bool supported = true;
-	uint64_t size = 0;
-	for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
-		const VkDescriptorSetLayoutBinding *binding = bindings + i;
-
-		uint64_t descriptor_size = 0;
-		uint64_t descriptor_alignment = 1;
-		uint32_t descriptor_count = binding->descriptorCount;
-		switch (binding->descriptorType) {
-		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-			break;
-		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-			descriptor_size = 16;
-			descriptor_alignment = 16;
-			break;
-		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-			descriptor_size = 64;
-			descriptor_alignment = 32;
-			break;
-		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-			if (!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
-				descriptor_size = 64;
-			} else {
-				descriptor_size = 96;
-			}
-			descriptor_alignment = 32;
-			break;
-		case VK_DESCRIPTOR_TYPE_SAMPLER:
-			if (!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
-				descriptor_size = 16;
-				descriptor_alignment = 16;
-			}
-			break;
-		case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
-			descriptor_alignment = 16;
-			descriptor_size = descriptor_count;
-			descriptor_count = 1;
-			break;
-		case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
-			if (!radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[i],
-					&descriptor_size, &descriptor_alignment)) {
-				supported = false;
-			}
-			break;
-		default:
-			break;
-		}
-
-		if (size && !align_u64(size, descriptor_alignment)) {
-			supported = false;
-		}
-		size = align_u64(size, descriptor_alignment);
-
-		uint64_t max_count = INT32_MAX;
-		if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
-			max_count = INT32_MAX - size;
-		else if (descriptor_size)
-			max_count = (INT32_MAX - size) / descriptor_size;
-
-		if (max_count < descriptor_count) {
-			supported = false;
-		}
-		if (variable_flags && binding->binding <variable_flags->bindingCount && variable_count &&
-		    (variable_flags->pBindingFlags[binding->binding] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
-			variable_count->maxVariableDescriptorCount = MIN2(UINT32_MAX, max_count);
-		}
-		size += descriptor_count * descriptor_size;
-	}
-
-	free(bindings);
-
-	pSupport->supported = supported;
+   VkDescriptorSetLayoutBinding *bindings = NULL;
+   VkResult result =
+      vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
+   if (result != VK_SUCCESS) {
+      pSupport->supported = false;
+      return;
+   }
+
+   const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
+      vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
+   VkDescriptorSetVariableDescriptorCountLayoutSupport *variable_count = vk_find_struct(
+      (void *)pCreateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT);
+   const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
+      vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
+   if (variable_count) {
+      variable_count->maxVariableDescriptorCount = 0;
+   }
+
+   bool supported = true;
+   uint64_t size = 0;
+   for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
+      const VkDescriptorSetLayoutBinding *binding = bindings + i;
+
+      uint64_t descriptor_size = 0;
+      uint64_t descriptor_alignment = 1;
+      uint32_t descriptor_count = binding->descriptorCount;
+      switch (binding->descriptorType) {
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+         break;
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+         descriptor_size = 16;
+         descriptor_alignment = 16;
+         break;
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+         descriptor_size = 64;
+         descriptor_alignment = 32;
+         break;
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+         if (!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
+            descriptor_size = 64;
+         } else {
+            descriptor_size = 96;
+         }
+         descriptor_alignment = 32;
+         break;
+      case VK_DESCRIPTOR_TYPE_SAMPLER:
+         if (!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
+            descriptor_size = 16;
+            descriptor_alignment = 16;
+         }
+         break;
+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         descriptor_alignment = 16;
+         descriptor_size = descriptor_count;
+         descriptor_count = 1;
+         break;
+      case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
+         if (!radv_mutable_descriptor_type_size_alignment(
+                &mutable_info->pMutableDescriptorTypeLists[i], &descriptor_size,
+                &descriptor_alignment)) {
+            supported = false;
+         }
+         break;
+      default:
+         break;
+      }
+
+      if (size && !align_u64(size, descriptor_alignment)) {
+         supported = false;
+      }
+      size = align_u64(size, descriptor_alignment);
+
+      uint64_t max_count = INT32_MAX;
+      if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+         max_count = INT32_MAX - size;
+      else if (descriptor_size)
+         max_count = (INT32_MAX - size) / descriptor_size;
+
+      if (max_count < descriptor_count) {
+         supported = false;
+      }
+      if (variable_flags && binding->binding < variable_flags->bindingCount && variable_count &&
+          (variable_flags->pBindingFlags[binding->binding] &
+           VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
+         variable_count->maxVariableDescriptorCount = MIN2(UINT32_MAX, max_count);
+      }
+      size += descriptor_count * descriptor_size;
+   }
+
+   free(bindings);
+
+   pSupport->supported = supported;
 }
 
 /*
@@ -443,1094 +447,1057 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device,
  * just multiple descriptor set layouts pasted together.
  */
 
-VkResult radv_CreatePipelineLayout(
-	VkDevice                                    _device,
-	const VkPipelineLayoutCreateInfo*           pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkPipelineLayout*                           pPipelineLayout)
+VkResult
+radv_CreatePipelineLayout(VkDevice _device, const VkPipelineLayoutCreateInfo *pCreateInfo,
+                          const VkAllocationCallbacks *pAllocator,
+                          VkPipelineLayout *pPipelineLayout)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_pipeline_layout *layout;
-	struct mesa_sha1 ctx;
-
-	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
-
-	layout = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*layout), 8,
-			     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (layout == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &layout->base,
-			    VK_OBJECT_TYPE_PIPELINE_LAYOUT);
-
-	layout->num_sets = pCreateInfo->setLayoutCount;
-
-	unsigned dynamic_offset_count = 0;
-	uint16_t dynamic_shader_stages = 0;
-
-
-	_mesa_sha1_init(&ctx);
-	for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
-		RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout,
-				 pCreateInfo->pSetLayouts[set]);
-		layout->set[set].layout = set_layout;
-
-		layout->set[set].dynamic_offset_start = dynamic_offset_count;
-		layout->set[set].dynamic_offset_count = 0;
-		layout->set[set].dynamic_offset_stages = 0;
-
-		for (uint32_t b = 0; b < set_layout->binding_count; b++) {
-			layout->set[set].dynamic_offset_count +=
-				set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count;
-			layout->set[set].dynamic_offset_stages |= set_layout->dynamic_shader_stages;
-		}
-		dynamic_offset_count += layout->set[set].dynamic_offset_count;
-		dynamic_shader_stages |= layout->set[set].dynamic_offset_stages;
-
-		/* Hash the entire set layout except for the vk_object_base. The
-		 * rest of the set layout is carefully constructed to not have
-		 * pointers so a full hash instead of a per-field hash should be ok. */
-		_mesa_sha1_update(&ctx,
-		                  (const char*)set_layout + sizeof(struct vk_object_base),
-		                  set_layout->layout_size - sizeof(struct vk_object_base));
-	}
-
-	layout->dynamic_offset_count = dynamic_offset_count;
-	layout->dynamic_shader_stages = dynamic_shader_stages;
-	layout->push_constant_size = 0;
-
-	for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
-		const VkPushConstantRange *range = pCreateInfo->pPushConstantRanges + i;
-		layout->push_constant_size = MAX2(layout->push_constant_size,
-						  range->offset + range->size);
-	}
-
-	layout->push_constant_size = align(layout->push_constant_size, 16);
-	_mesa_sha1_update(&ctx, &layout->push_constant_size,
-			  sizeof(layout->push_constant_size));
-	_mesa_sha1_final(&ctx, layout->sha1);
-	*pPipelineLayout = radv_pipeline_layout_to_handle(layout);
-
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_pipeline_layout *layout;
+   struct mesa_sha1 ctx;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
+
+   layout = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*layout), 8,
+                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (layout == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_PIPELINE_LAYOUT);
+
+   layout->num_sets = pCreateInfo->setLayoutCount;
+
+   unsigned dynamic_offset_count = 0;
+   uint16_t dynamic_shader_stages = 0;
+
+   _mesa_sha1_init(&ctx);
+   for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
+      RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[set]);
+      layout->set[set].layout = set_layout;
+
+      layout->set[set].dynamic_offset_start = dynamic_offset_count;
+      layout->set[set].dynamic_offset_count = 0;
+      layout->set[set].dynamic_offset_stages = 0;
+
+      for (uint32_t b = 0; b < set_layout->binding_count; b++) {
+         layout->set[set].dynamic_offset_count +=
+            set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count;
+         layout->set[set].dynamic_offset_stages |= set_layout->dynamic_shader_stages;
+      }
+      dynamic_offset_count += layout->set[set].dynamic_offset_count;
+      dynamic_shader_stages |= layout->set[set].dynamic_offset_stages;
+
+      /* Hash the entire set layout except for the vk_object_base. The
+       * rest of the set layout is carefully constructed to not have
+       * pointers so a full hash instead of a per-field hash should be ok. */
+      _mesa_sha1_update(&ctx, (const char *)set_layout + sizeof(struct vk_object_base),
+                        set_layout->layout_size - sizeof(struct vk_object_base));
+   }
+
+   layout->dynamic_offset_count = dynamic_offset_count;
+   layout->dynamic_shader_stages = dynamic_shader_stages;
+   layout->push_constant_size = 0;
+
+   for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
+      const VkPushConstantRange *range = pCreateInfo->pPushConstantRanges + i;
+      layout->push_constant_size = MAX2(layout->push_constant_size, range->offset + range->size);
+   }
+
+   layout->push_constant_size = align(layout->push_constant_size, 16);
+   _mesa_sha1_update(&ctx, &layout->push_constant_size, sizeof(layout->push_constant_size));
+   _mesa_sha1_final(&ctx, layout->sha1);
+   *pPipelineLayout = radv_pipeline_layout_to_handle(layout);
+
+   return VK_SUCCESS;
 }
 
-void radv_DestroyPipelineLayout(
-	VkDevice                                    _device,
-	VkPipelineLayout                            _pipelineLayout,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyPipelineLayout(VkDevice _device, VkPipelineLayout _pipelineLayout,
+                           const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, _pipelineLayout);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, _pipelineLayout);
 
-	if (!pipeline_layout)
-		return;
+   if (!pipeline_layout)
+      return;
 
-	vk_object_base_finish(&pipeline_layout->base);
-	vk_free2(&device->vk.alloc, pAllocator, pipeline_layout);
+   vk_object_base_finish(&pipeline_layout->base);
+   vk_free2(&device->vk.alloc, pAllocator, pipeline_layout);
 }
 
 static VkResult
-radv_descriptor_set_create(struct radv_device *device,
-			   struct radv_descriptor_pool *pool,
-			   const struct radv_descriptor_set_layout *layout,
-			   const uint32_t *variable_count,
-			   struct radv_descriptor_set **out_set)
+radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_pool *pool,
+                           const struct radv_descriptor_set_layout *layout,
+                           const uint32_t *variable_count, struct radv_descriptor_set **out_set)
 {
-	struct radv_descriptor_set *set;
-	uint32_t buffer_count = layout->buffer_count;
-	if (variable_count) {
-		unsigned stride = 1;
-		if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
-		    layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
-			stride = 0;
-		buffer_count = layout->binding[layout->binding_count - 1].buffer_offset +
-		               *variable_count * stride;
-	}
-	unsigned range_offset = sizeof(struct radv_descriptor_set_header) +
-		sizeof(struct radeon_winsys_bo *) * buffer_count;
-	unsigned mem_size = range_offset +
-		sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;
-
-	if (pool->host_memory_base) {
-		if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
-			return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
-
-		set = (struct radv_descriptor_set*)pool->host_memory_ptr;
-		pool->host_memory_ptr += mem_size;
-		memset(set->descriptors, 0, sizeof(struct radeon_winsys_bo *) * buffer_count);
-	} else {
-		set = vk_alloc2(&device->vk.alloc, NULL, mem_size, 8,
-		                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
-		if (!set)
-			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-	}
-
-	memset(set, 0, mem_size);
-
-	vk_object_base_init(&device->vk, &set->header.base,
-			    VK_OBJECT_TYPE_DESCRIPTOR_SET);
-
-	if (layout->dynamic_offset_count) {
-		set->header.dynamic_descriptors = (struct radv_descriptor_range*)((uint8_t*)set + range_offset);
-	}
-
-	set->header.layout = layout;
-	set->header.buffer_count = buffer_count;
-	uint32_t layout_size = layout->size;
-	if (variable_count) {
-		assert(layout->has_variable_descriptors);
-		uint32_t stride = layout->binding[layout->binding_count - 1].size;
-		if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
-			stride = 1;
-
-		layout_size = layout->binding[layout->binding_count - 1].offset +
-		              *variable_count * stride;
-	}
-	layout_size = align_u32(layout_size, 32);
-	if (layout_size) {
-		set->header.size = layout_size;
-
-		if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {
-			vk_free2(&device->vk.alloc, NULL, set);
-			return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
-		}
-
-		/* try to allocate linearly first, so that we don't spend
-		 * time looking for gaps if the app only allocates &
-		 * resets via the pool. */
-		if (pool->current_offset + layout_size <= pool->size) {
-			set->header.bo = pool->bo;
-			set->header.mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
-			set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + pool->current_offset) : 0;
-			if (!pool->host_memory_base) {
-				pool->entries[pool->entry_count].offset = pool->current_offset;
-				pool->entries[pool->entry_count].size = layout_size;
-				pool->entries[pool->entry_count].set = set;
-				pool->entry_count++;
-			}
-			pool->current_offset += layout_size;
-		} else if (!pool->host_memory_base) {
-			uint64_t offset = 0;
-			int index;
-
-			for (index = 0; index < pool->entry_count; ++index) {
-				if (pool->entries[index].offset - offset >= layout_size)
-					break;
-				offset = pool->entries[index].offset + pool->entries[index].size;
-			}
-
-			if (pool->size - offset < layout_size) {
-				vk_free2(&device->vk.alloc, NULL, set);
-				return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
-			}
-			set->header.bo = pool->bo;
-			set->header.mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
-			set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + offset) : 0;
-			memmove(&pool->entries[index + 1], &pool->entries[index],
-				sizeof(pool->entries[0]) * (pool->entry_count - index));
-			pool->entries[index].offset = offset;
-			pool->entries[index].size = layout_size;
-			pool->entries[index].set = set;
-			pool->entry_count++;
-		} else
-			return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
-	}
-
-	if (layout->has_immutable_samplers) {
-		for (unsigned i = 0; i < layout->binding_count; ++i) {
-			if (!layout->binding[i].immutable_samplers_offset ||
-			layout->binding[i].immutable_samplers_equal)
-				continue;
-
-			unsigned offset = layout->binding[i].offset / 4;
-			if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
-				offset += radv_combined_image_descriptor_sampler_offset(layout->binding + i) / 4;
-
-			const uint32_t *samplers = (const uint32_t*)((const char*)layout + layout->binding[i].immutable_samplers_offset);
-			for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
-				memcpy(set->header.mapped_ptr + offset, samplers + 4 * j, 16);
-				offset += layout->binding[i].size / 4;
-			}
-
-		}
-	}
-	*out_set = set;
-	return VK_SUCCESS;
+   struct radv_descriptor_set *set;
+   uint32_t buffer_count = layout->buffer_count;
+   if (variable_count) {
+      unsigned stride = 1;
+      if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+          layout->binding[layout->binding_count - 1].type ==
+             VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+         stride = 0;
+      buffer_count =
+         layout->binding[layout->binding_count - 1].buffer_offset + *variable_count * stride;
+   }
+   unsigned range_offset =
+      sizeof(struct radv_descriptor_set_header) + sizeof(struct radeon_winsys_bo *) * buffer_count;
+   unsigned mem_size =
+      range_offset + sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;
+
+   if (pool->host_memory_base) {
+      if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
+         return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
+
+      set = (struct radv_descriptor_set *)pool->host_memory_ptr;
+      pool->host_memory_ptr += mem_size;
+      memset(set->descriptors, 0, sizeof(struct radeon_winsys_bo *) * buffer_count);
+   } else {
+      set = vk_alloc2(&device->vk.alloc, NULL, mem_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+      if (!set)
+         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   memset(set, 0, mem_size);
+
+   vk_object_base_init(&device->vk, &set->header.base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
+
+   if (layout->dynamic_offset_count) {
+      set->header.dynamic_descriptors =
+         (struct radv_descriptor_range *)((uint8_t *)set + range_offset);
+   }
+
+   set->header.layout = layout;
+   set->header.buffer_count = buffer_count;
+   uint32_t layout_size = layout->size;
+   if (variable_count) {
+      assert(layout->has_variable_descriptors);
+      uint32_t stride = layout->binding[layout->binding_count - 1].size;
+      if (layout->binding[layout->binding_count - 1].type ==
+          VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+         stride = 1;
+
+      layout_size = layout->binding[layout->binding_count - 1].offset + *variable_count * stride;
+   }
+   layout_size = align_u32(layout_size, 32);
+   if (layout_size) {
+      set->header.size = layout_size;
+
+      if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {
+         vk_free2(&device->vk.alloc, NULL, set);
+         return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
+      }
+
+      /* try to allocate linearly first, so that we don't spend
+       * time looking for gaps if the app only allocates &
+       * resets via the pool. */
+      if (pool->current_offset + layout_size <= pool->size) {
+         set->header.bo = pool->bo;
+         set->header.mapped_ptr = (uint32_t *)(pool->mapped_ptr + pool->current_offset);
+         set->header.va =
+            pool->bo ? (radv_buffer_get_va(set->header.bo) + pool->current_offset) : 0;
+         if (!pool->host_memory_base) {
+            pool->entries[pool->entry_count].offset = pool->current_offset;
+            pool->entries[pool->entry_count].size = layout_size;
+            pool->entries[pool->entry_count].set = set;
+            pool->entry_count++;
+         }
+         pool->current_offset += layout_size;
+      } else if (!pool->host_memory_base) {
+         uint64_t offset = 0;
+         int index;
+
+         for (index = 0; index < pool->entry_count; ++index) {
+            if (pool->entries[index].offset - offset >= layout_size)
+               break;
+            offset = pool->entries[index].offset + pool->entries[index].size;
+         }
+
+         if (pool->size - offset < layout_size) {
+            vk_free2(&device->vk.alloc, NULL, set);
+            return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
+         }
+         set->header.bo = pool->bo;
+         set->header.mapped_ptr = (uint32_t *)(pool->mapped_ptr + offset);
+         set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + offset) : 0;
+         memmove(&pool->entries[index + 1], &pool->entries[index],
+                 sizeof(pool->entries[0]) * (pool->entry_count - index));
+         pool->entries[index].offset = offset;
+         pool->entries[index].size = layout_size;
+         pool->entries[index].set = set;
+         pool->entry_count++;
+      } else
+         return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
+   }
+
+   if (layout->has_immutable_samplers) {
+      for (unsigned i = 0; i < layout->binding_count; ++i) {
+         if (!layout->binding[i].immutable_samplers_offset ||
+             layout->binding[i].immutable_samplers_equal)
+            continue;
+
+         unsigned offset = layout->binding[i].offset / 4;
+         if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+            offset += radv_combined_image_descriptor_sampler_offset(layout->binding + i) / 4;
+
+         const uint32_t *samplers =
+            (const uint32_t *)((const char *)layout + layout->binding[i].immutable_samplers_offset);
+         for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
+            memcpy(set->header.mapped_ptr + offset, samplers + 4 * j, 16);
+            offset += layout->binding[i].size / 4;
+         }
+      }
+   }
+   *out_set = set;
+   return VK_SUCCESS;
 }
 
 static void
-radv_descriptor_set_destroy(struct radv_device *device,
-			    struct radv_descriptor_pool *pool,
-			    struct radv_descriptor_set *set,
-			    bool free_bo)
+radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_pool *pool,
+                            struct radv_descriptor_set *set, bool free_bo)
 {
-	assert(!pool->host_memory_base);
-
-	if (free_bo && set->header.size && !pool->host_memory_base) {
-		uint32_t offset = (uint8_t*)set->header.mapped_ptr - pool->mapped_ptr;
-		for (int i = 0; i < pool->entry_count; ++i) {
-			if (pool->entries[i].offset == offset) {
-				memmove(&pool->entries[i], &pool->entries[i+1],
-					sizeof(pool->entries[i]) * (pool->entry_count - i - 1));
-				--pool->entry_count;
-				break;
-			}
-		}
-	}
-	vk_object_base_finish(&set->header.base);
-	vk_free2(&device->vk.alloc, NULL, set);
+   assert(!pool->host_memory_base);
+
+   if (free_bo && set->header.size && !pool->host_memory_base) {
+      uint32_t offset = (uint8_t *)set->header.mapped_ptr - pool->mapped_ptr;
+      for (int i = 0; i < pool->entry_count; ++i) {
+         if (pool->entries[i].offset == offset) {
+            memmove(&pool->entries[i], &pool->entries[i + 1],
+                    sizeof(pool->entries[i]) * (pool->entry_count - i - 1));
+            --pool->entry_count;
+            break;
+         }
+      }
+   }
+   vk_object_base_finish(&set->header.base);
+   vk_free2(&device->vk.alloc, NULL, set);
 }
 
-static void radv_destroy_descriptor_pool(struct radv_device *device,
-                                         const VkAllocationCallbacks *pAllocator,
-                                         struct radv_descriptor_pool *pool)
+static void
+radv_destroy_descriptor_pool(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+                             struct radv_descriptor_pool *pool)
 {
-	if (!pool->host_memory_base) {
-		for(int i = 0; i < pool->entry_count; ++i) {
-			radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
-		}
-	}
-
-	if (pool->bo)
-		device->ws->buffer_destroy(device->ws, pool->bo);
-	if (pool->host_bo)
-		vk_free2(&device->vk.alloc, pAllocator, pool->host_bo);
-
-	vk_object_base_finish(&pool->base);
-	vk_free2(&device->vk.alloc, pAllocator, pool);
+   if (!pool->host_memory_base) {
+      for (int i = 0; i < pool->entry_count; ++i) {
+         radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
+      }
+   }
+
+   if (pool->bo)
+      device->ws->buffer_destroy(device->ws, pool->bo);
+   if (pool->host_bo)
+      vk_free2(&device->vk.alloc, pAllocator, pool->host_bo);
+
+   vk_object_base_finish(&pool->base);
+   vk_free2(&device->vk.alloc, pAllocator, pool);
 }
 
-VkResult radv_CreateDescriptorPool(
-	VkDevice                                    _device,
-	const VkDescriptorPoolCreateInfo*           pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkDescriptorPool*                           pDescriptorPool)
+VkResult
+radv_CreateDescriptorPool(VkDevice _device, const VkDescriptorPoolCreateInfo *pCreateInfo,
+                          const VkAllocationCallbacks *pAllocator,
+                          VkDescriptorPool *pDescriptorPool)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_descriptor_pool *pool;
-	uint64_t size = sizeof(struct radv_descriptor_pool);
-	uint64_t bo_size = 0, bo_count = 0, range_count = 0;
-
-	const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
-		vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
-
-	vk_foreach_struct(ext, pCreateInfo->pNext) {
-		switch (ext->sType) {
-		case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: {
-			const struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT *info =
-				(const struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT*)ext;
-			/* the sizes are 4 aligned, and we need to align to at
-			 * most 32, which needs at most 28 bytes extra per
-			 * binding. */
-			bo_size += 28llu * info->maxInlineUniformBlockBindings;
-			break;
-		}
-		default:
-			break;
-		}
-	}
-
-	for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
-		if (pCreateInfo->pPoolSizes[i].type != VK_DESCRIPTOR_TYPE_SAMPLER)
-			bo_count += pCreateInfo->pPoolSizes[i].descriptorCount;
-
-		switch(pCreateInfo->pPoolSizes[i].type) {
-		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-			range_count += pCreateInfo->pPoolSizes[i].descriptorCount;
-			break;
-		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-		case VK_DESCRIPTOR_TYPE_SAMPLER:
-			/* 32 as we may need to align for images */
-			bo_size += 32 * pCreateInfo->pPoolSizes[i].descriptorCount;
-			break;
-		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-			bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
-			break;
-		case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
-			/* Per spec, if a mutable descriptor type list is provided for the pool entry, we
-			 * allocate enough memory to hold any subset of that list.
-			 * If there is no mutable descriptor type list available,
-			 * we must allocate enough for any supported mutable descriptor type, i.e. 64 bytes. */
-			if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount) {
-				uint64_t mutable_size, mutable_alignment;
-				if (radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[i],
-							&mutable_size, &mutable_alignment)) {
-					bo_size += mutable_size * pCreateInfo->pPoolSizes[i].descriptorCount;
-				}
-			} else {
-				bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
-			}
-			break;
-		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-			bo_size += 96 * pCreateInfo->pPoolSizes[i].descriptorCount;
-			break;
-		case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
-			bo_size += pCreateInfo->pPoolSizes[i].descriptorCount;
-			break;
-		default:
-			break;
-		}
-	}
-
-	if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
-		uint64_t host_size = pCreateInfo->maxSets * sizeof(struct radv_descriptor_set);
-		host_size += sizeof(struct radeon_winsys_bo*) * bo_count;
-		host_size += sizeof(struct radv_descriptor_range) * range_count;
-		size += host_size;
-	} else {
-		size += sizeof(struct radv_descriptor_pool_entry) * pCreateInfo->maxSets;
-	}
-
-	pool = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
-	                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (!pool)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	memset(pool, 0, sizeof(*pool));
-
-	vk_object_base_init(&device->vk, &pool->base,
-			    VK_OBJECT_TYPE_DESCRIPTOR_POOL);
-
-	if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
-		pool->host_memory_base = (uint8_t*)pool + sizeof(struct radv_descriptor_pool);
-		pool->host_memory_ptr = pool->host_memory_base;
-		pool->host_memory_end = (uint8_t*)pool + size;
-	}
-
-	if (bo_size) {
-		if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_VALVE)) {
-			pool->bo = device->ws->buffer_create(device->ws, bo_size, 32,
-								 RADEON_DOMAIN_VRAM,
-								 RADEON_FLAG_NO_INTERPROCESS_SHARING |
-								 RADEON_FLAG_READ_ONLY |
-								 RADEON_FLAG_32BIT,
-								 RADV_BO_PRIORITY_DESCRIPTOR);
-			if (!pool->bo) {
-				radv_destroy_descriptor_pool(device, pAllocator, pool);
-				return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-			}
-			pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
-			if (!pool->mapped_ptr) {
-				radv_destroy_descriptor_pool(device, pAllocator, pool);
-				return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-			}
-		} else {
-			pool->host_bo = vk_alloc2(&device->vk.alloc, pAllocator, bo_size, 8,
-					VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-			if (!pool->host_bo) {
-				radv_destroy_descriptor_pool(device, pAllocator, pool);
-				return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-			}
-			pool->mapped_ptr = pool->host_bo;
-		}
-	}
-	pool->size = bo_size;
-	pool->max_entry_count = pCreateInfo->maxSets;
-
-	*pDescriptorPool = radv_descriptor_pool_to_handle(pool);
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_descriptor_pool *pool;
+   uint64_t size = sizeof(struct radv_descriptor_pool);
+   uint64_t bo_size = 0, bo_count = 0, range_count = 0;
+
+   const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
+      vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
+
+   vk_foreach_struct(ext, pCreateInfo->pNext)
+   {
+      switch (ext->sType) {
+      case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: {
+         const struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT *info =
+            (const struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT *)ext;
+         /* the sizes are 4 aligned, and we need to align to at
+          * most 32, which needs at most 28 bytes extra per
+          * binding. */
+         bo_size += 28llu * info->maxInlineUniformBlockBindings;
+         break;
+      }
+      default:
+         break;
+      }
+   }
+
+   for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
+      if (pCreateInfo->pPoolSizes[i].type != VK_DESCRIPTOR_TYPE_SAMPLER)
+         bo_count += pCreateInfo->pPoolSizes[i].descriptorCount;
+
+      switch (pCreateInfo->pPoolSizes[i].type) {
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+         range_count += pCreateInfo->pPoolSizes[i].descriptorCount;
+         break;
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_SAMPLER:
+         /* 32 as we may need to align for images */
+         bo_size += 32 * pCreateInfo->pPoolSizes[i].descriptorCount;
+         break;
+      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+         bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
+         break;
+      case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
+         /* Per spec, if a mutable descriptor type list is provided for the pool entry, we
+          * allocate enough memory to hold any subset of that list.
+          * If there is no mutable descriptor type list available,
+          * we must allocate enough for any supported mutable descriptor type, i.e. 64 bytes. */
+         if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount) {
+            uint64_t mutable_size, mutable_alignment;
+            if (radv_mutable_descriptor_type_size_alignment(
+                   &mutable_info->pMutableDescriptorTypeLists[i], &mutable_size,
+                   &mutable_alignment)) {
+               bo_size += mutable_size * pCreateInfo->pPoolSizes[i].descriptorCount;
+            }
+         } else {
+            bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
+         }
+         break;
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+         bo_size += 96 * pCreateInfo->pPoolSizes[i].descriptorCount;
+         break;
+      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+         bo_size += pCreateInfo->pPoolSizes[i].descriptorCount;
+         break;
+      default:
+         break;
+      }
+   }
+
+   if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
+      uint64_t host_size = pCreateInfo->maxSets * sizeof(struct radv_descriptor_set);
+      host_size += sizeof(struct radeon_winsys_bo *) * bo_count;
+      host_size += sizeof(struct radv_descriptor_range) * range_count;
+      size += host_size;
+   } else {
+      size += sizeof(struct radv_descriptor_pool_entry) * pCreateInfo->maxSets;
+   }
+
+   pool = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!pool)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   memset(pool, 0, sizeof(*pool));
+
+   vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_DESCRIPTOR_POOL);
+
+   if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
+      pool->host_memory_base = (uint8_t *)pool + sizeof(struct radv_descriptor_pool);
+      pool->host_memory_ptr = pool->host_memory_base;
+      pool->host_memory_end = (uint8_t *)pool + size;
+   }
+
+   if (bo_size) {
+      if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_VALVE)) {
+         pool->bo = device->ws->buffer_create(
+            device->ws, bo_size, 32, RADEON_DOMAIN_VRAM,
+            RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT,
+            RADV_BO_PRIORITY_DESCRIPTOR);
+         if (!pool->bo) {
+            radv_destroy_descriptor_pool(device, pAllocator, pool);
+            return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+         }
+         pool->mapped_ptr = (uint8_t *)device->ws->buffer_map(pool->bo);
+         if (!pool->mapped_ptr) {
+            radv_destroy_descriptor_pool(device, pAllocator, pool);
+            return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+         }
+      } else {
+         pool->host_bo =
+            vk_alloc2(&device->vk.alloc, pAllocator, bo_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+         if (!pool->host_bo) {
+            radv_destroy_descriptor_pool(device, pAllocator, pool);
+            return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+         }
+         pool->mapped_ptr = pool->host_bo;
+      }
+   }
+   pool->size = bo_size;
+   pool->max_entry_count = pCreateInfo->maxSets;
+
+   *pDescriptorPool = radv_descriptor_pool_to_handle(pool);
+   return VK_SUCCESS;
 }
 
-void radv_DestroyDescriptorPool(
-	VkDevice                                    _device,
-	VkDescriptorPool                            _pool,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyDescriptorPool(VkDevice _device, VkDescriptorPool _pool,
+                           const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_descriptor_pool, pool, _pool);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_descriptor_pool, pool, _pool);
 
-	if (!pool)
-		return;
+   if (!pool)
+      return;
 
-	radv_destroy_descriptor_pool(device, pAllocator, pool);
+   radv_destroy_descriptor_pool(device, pAllocator, pool);
 }
 
-VkResult radv_ResetDescriptorPool(
-	VkDevice                                    _device,
-	VkDescriptorPool                            descriptorPool,
-	VkDescriptorPoolResetFlags                  flags)
+VkResult
+radv_ResetDescriptorPool(VkDevice _device, VkDescriptorPool descriptorPool,
+                         VkDescriptorPoolResetFlags flags)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
 
-	if (!pool->host_memory_base) {
-		for(int i = 0; i < pool->entry_count; ++i) {
-			radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
-		}
-		pool->entry_count = 0;
-	}
+   if (!pool->host_memory_base) {
+      for (int i = 0; i < pool->entry_count; ++i) {
+         radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
+      }
+      pool->entry_count = 0;
+   }
 
-	pool->current_offset = 0;
-	pool->host_memory_ptr = pool->host_memory_base;
+   pool->current_offset = 0;
+   pool->host_memory_ptr = pool->host_memory_base;
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-VkResult radv_AllocateDescriptorSets(
-	VkDevice                                    _device,
-	const VkDescriptorSetAllocateInfo*          pAllocateInfo,
-	VkDescriptorSet*                            pDescriptorSets)
+VkResult
+radv_AllocateDescriptorSets(VkDevice _device, const VkDescriptorSetAllocateInfo *pAllocateInfo,
+                            VkDescriptorSet *pDescriptorSets)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_descriptor_pool, pool, pAllocateInfo->descriptorPool);
-
-	VkResult result = VK_SUCCESS;
-	uint32_t i;
-	struct radv_descriptor_set *set = NULL;
-
-	const VkDescriptorSetVariableDescriptorCountAllocateInfo *variable_counts =
-		vk_find_struct_const(pAllocateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO);
-	const uint32_t zero = 0;
-
-	/* allocate a set of buffers for each shader to contain descriptors */
-	for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
-		RADV_FROM_HANDLE(radv_descriptor_set_layout, layout,
-				 pAllocateInfo->pSetLayouts[i]);
-
-		const uint32_t *variable_count = NULL;
-		if (variable_counts) {
-			if (i < variable_counts->descriptorSetCount)
-				variable_count = variable_counts->pDescriptorCounts + i;
-			else
-				variable_count = &zero;
-		}
-
-		assert(!(layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
-
-		result = radv_descriptor_set_create(device, pool, layout, variable_count, &set);
-		if (result != VK_SUCCESS)
-			break;
-
-		pDescriptorSets[i] = radv_descriptor_set_to_handle(set);
-	}
-
-	if (result != VK_SUCCESS) {
-		radv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool,
-					i, pDescriptorSets);
-		for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
-			pDescriptorSets[i] = VK_NULL_HANDLE;
-		}
-	}
-	return result;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_descriptor_pool, pool, pAllocateInfo->descriptorPool);
+
+   VkResult result = VK_SUCCESS;
+   uint32_t i;
+   struct radv_descriptor_set *set = NULL;
+
+   const VkDescriptorSetVariableDescriptorCountAllocateInfo *variable_counts = vk_find_struct_const(
+      pAllocateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO);
+   const uint32_t zero = 0;
+
+   /* allocate a set of buffers for each shader to contain descriptors */
+   for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
+      RADV_FROM_HANDLE(radv_descriptor_set_layout, layout, pAllocateInfo->pSetLayouts[i]);
+
+      const uint32_t *variable_count = NULL;
+      if (variable_counts) {
+         if (i < variable_counts->descriptorSetCount)
+            variable_count = variable_counts->pDescriptorCounts + i;
+         else
+            variable_count = &zero;
+      }
+
+      assert(!(layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+
+      result = radv_descriptor_set_create(device, pool, layout, variable_count, &set);
+      if (result != VK_SUCCESS)
+         break;
+
+      pDescriptorSets[i] = radv_descriptor_set_to_handle(set);
+   }
+
+   if (result != VK_SUCCESS) {
+      radv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, i, pDescriptorSets);
+      for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
+         pDescriptorSets[i] = VK_NULL_HANDLE;
+      }
+   }
+   return result;
 }
 
-VkResult radv_FreeDescriptorSets(
-	VkDevice                                    _device,
-	VkDescriptorPool                            descriptorPool,
-	uint32_t                                    count,
-	const VkDescriptorSet*                      pDescriptorSets)
+VkResult
+radv_FreeDescriptorSets(VkDevice _device, VkDescriptorPool descriptorPool, uint32_t count,
+                        const VkDescriptorSet *pDescriptorSets)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
 
-	for (uint32_t i = 0; i < count; i++) {
-		RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
+   for (uint32_t i = 0; i < count; i++) {
+      RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
 
-		if (set && !pool->host_memory_base)
-			radv_descriptor_set_destroy(device, pool, set, true);
-	}
-	return VK_SUCCESS;
+      if (set && !pool->host_memory_base)
+         radv_descriptor_set_destroy(device, pool, set, true);
+   }
+   return VK_SUCCESS;
 }
 
-static void write_texel_buffer_descriptor(struct radv_device *device,
-					  struct radv_cmd_buffer *cmd_buffer,
-					  unsigned *dst,
-					  struct radeon_winsys_bo **buffer_list,
-					  const VkBufferView _buffer_view)
+static void
+write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+                              unsigned *dst, struct radeon_winsys_bo **buffer_list,
+                              const VkBufferView _buffer_view)
 {
-	RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view);
+   RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view);
 
-	if (!buffer_view) {
-		memset(dst, 0, 4 * 4);
-		if (!cmd_buffer)
-			*buffer_list = NULL;
-		return;
-	}
+   if (!buffer_view) {
+      memset(dst, 0, 4 * 4);
+      if (!cmd_buffer)
+         *buffer_list = NULL;
+      return;
+   }
 
-	memcpy(dst, buffer_view->state, 4 * 4);
+   memcpy(dst, buffer_view->state, 4 * 4);
 
-	if (cmd_buffer)
-		radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo);
-	else
-		*buffer_list = buffer_view->bo;
+   if (cmd_buffer)
+      radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo);
+   else
+      *buffer_list = buffer_view->bo;
 }
 
-static void write_buffer_descriptor(struct radv_device *device,
-                                    struct radv_cmd_buffer *cmd_buffer,
-                                    unsigned *dst,
-                                    struct radeon_winsys_bo **buffer_list,
-                                    const VkDescriptorBufferInfo *buffer_info)
+static void
+write_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+                        unsigned *dst, struct radeon_winsys_bo **buffer_list,
+                        const VkDescriptorBufferInfo *buffer_info)
 {
-	RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
-
-	if (!buffer) {
-		memset(dst, 0, 4 * 4);
-		if (!cmd_buffer)
-			*buffer_list = NULL;
-		return;
-	}
-
-	uint64_t va = radv_buffer_get_va(buffer->bo);
-	uint32_t range = buffer_info->range;
-
-	if (buffer_info->range == VK_WHOLE_SIZE)
-		range = buffer->size - buffer_info->offset;
-
-	/* robustBufferAccess is relaxed enough to allow this (in combination
-	 * with the alignment/size we return from vkGetBufferMemoryRequirements)
-	 * and this allows the shader compiler to create more efficient 8/16-bit
-	 * buffer accesses. */
-	range = align(range, 4);
-
-	va += buffer_info->offset + buffer->offset;
-
-	uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-			      S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-			      S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-			      S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
-			      S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
-			      S_008F0C_RESOURCE_LEVEL(1);
-	} else {
-		rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-			      S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-	}
-
-	dst[0] = va;
-	dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
-	dst[2] = range;
-	dst[3] = rsrc_word3;
-
-	if (cmd_buffer)
-		radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo);
-	else
-		*buffer_list = buffer->bo;
+   RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
+
+   if (!buffer) {
+      memset(dst, 0, 4 * 4);
+      if (!cmd_buffer)
+         *buffer_list = NULL;
+      return;
+   }
+
+   uint64_t va = radv_buffer_get_va(buffer->bo);
+   uint32_t range = buffer_info->range;
+
+   if (buffer_info->range == VK_WHOLE_SIZE)
+      range = buffer->size - buffer_info->offset;
+
+   /* robustBufferAccess is relaxed enough to allow this (in combination
+    * with the alignment/size we return from vkGetBufferMemoryRequirements)
+    * and this allows the shader compiler to create more efficient 8/16-bit
+    * buffer accesses. */
+   range = align(range, 4);
+
+   va += buffer_info->offset + buffer->offset;
+
+   uint32_t rsrc_word3 =
+      S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+      S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+   } else {
+      rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                    S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+   }
+
+   dst[0] = va;
+   dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+   dst[2] = range;
+   dst[3] = rsrc_word3;
+
+   if (cmd_buffer)
+      radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo);
+   else
+      *buffer_list = buffer->bo;
 }
 
-static void write_block_descriptor(struct radv_device *device,
-                                   struct radv_cmd_buffer *cmd_buffer,
-                                   void *dst,
-                                   const VkWriteDescriptorSet *writeset)
+static void
+write_block_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, void *dst,
+                       const VkWriteDescriptorSet *writeset)
 {
-	const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
-		vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
+   const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
+      vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
 
-	memcpy(dst, inline_ub->pData, inline_ub->dataSize);
+   memcpy(dst, inline_ub->pData, inline_ub->dataSize);
 }
 
-static void write_dynamic_buffer_descriptor(struct radv_device *device,
-                                            struct radv_descriptor_range *range,
-                                            struct radeon_winsys_bo **buffer_list,
-                                            const VkDescriptorBufferInfo *buffer_info)
+static void
+write_dynamic_buffer_descriptor(struct radv_device *device, struct radv_descriptor_range *range,
+                                struct radeon_winsys_bo **buffer_list,
+                                const VkDescriptorBufferInfo *buffer_info)
 {
-	RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
-	uint64_t va;
-	unsigned size;
+   RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
+   uint64_t va;
+   unsigned size;
 
-	if (!buffer) {
-		range->va = 0;
-		*buffer_list = NULL;
-		return;
-	}
+   if (!buffer) {
+      range->va = 0;
+      *buffer_list = NULL;
+      return;
+   }
 
-	va = radv_buffer_get_va(buffer->bo);
-	size = buffer_info->range;
+   va = radv_buffer_get_va(buffer->bo);
+   size = buffer_info->range;
 
-	if (buffer_info->range == VK_WHOLE_SIZE)
-		size = buffer->size - buffer_info->offset;
+   if (buffer_info->range == VK_WHOLE_SIZE)
+      size = buffer->size - buffer_info->offset;
 
-	/* robustBufferAccess is relaxed enough to allow this (in combination
-	 * with the alignment/size we return from vkGetBufferMemoryRequirements)
-	 * and this allows the shader compiler to create more efficient 8/16-bit
-	 * buffer accesses. */
-	size = align(size, 4);
+   /* robustBufferAccess is relaxed enough to allow this (in combination
+    * with the alignment/size we return from vkGetBufferMemoryRequirements)
+    * and this allows the shader compiler to create more efficient 8/16-bit
+    * buffer accesses. */
+   size = align(size, 4);
 
-	va += buffer_info->offset + buffer->offset;
-	range->va = va;
-	range->size = size;
+   va += buffer_info->offset + buffer->offset;
+   range->va = va;
+   range->size = size;
 
-	*buffer_list = buffer->bo;
+   *buffer_list = buffer->bo;
 }
 
 static void
-write_image_descriptor(struct radv_device *device,
-		       struct radv_cmd_buffer *cmd_buffer,
-		       unsigned size, unsigned *dst,
-		       struct radeon_winsys_bo **buffer_list,
-		       VkDescriptorType descriptor_type,
-		       const VkDescriptorImageInfo *image_info)
+write_image_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+                       unsigned size, unsigned *dst, struct radeon_winsys_bo **buffer_list,
+                       VkDescriptorType descriptor_type, const VkDescriptorImageInfo *image_info)
 {
-	RADV_FROM_HANDLE(radv_image_view, iview, image_info->imageView);
-	union radv_descriptor *descriptor;
-
-	if (!iview) {
-		memset(dst, 0, size);
-		if (!cmd_buffer)
-			*buffer_list = NULL;
-		return;
-	}
-
-	if (descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
-		descriptor = &iview->storage_descriptor;
-	} else {
-		descriptor = &iview->descriptor;
-	}
-
-	memcpy(dst, descriptor, size);
-
-	if (cmd_buffer)
-		radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->bo);
-	else
-		*buffer_list = iview->bo;
+   RADV_FROM_HANDLE(radv_image_view, iview, image_info->imageView);
+   union radv_descriptor *descriptor;
+
+   if (!iview) {
+      memset(dst, 0, size);
+      if (!cmd_buffer)
+         *buffer_list = NULL;
+      return;
+   }
+
+   if (descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+      descriptor = &iview->storage_descriptor;
+   } else {
+      descriptor = &iview->descriptor;
+   }
+
+   memcpy(dst, descriptor, size);
+
+   if (cmd_buffer)
+      radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->bo);
+   else
+      *buffer_list = iview->bo;
 }
 
 static void
 write_combined_image_sampler_descriptor(struct radv_device *device,
-					struct radv_cmd_buffer *cmd_buffer,
-					unsigned sampler_offset,
-					unsigned *dst,
-					struct radeon_winsys_bo **buffer_list,
-					VkDescriptorType descriptor_type,
-					const VkDescriptorImageInfo *image_info,
-					bool has_sampler)
+                                        struct radv_cmd_buffer *cmd_buffer, unsigned sampler_offset,
+                                        unsigned *dst, struct radeon_winsys_bo **buffer_list,
+                                        VkDescriptorType descriptor_type,
+                                        const VkDescriptorImageInfo *image_info, bool has_sampler)
 {
-	RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
-
-	write_image_descriptor(device, cmd_buffer, sampler_offset, dst, buffer_list,
-	                       descriptor_type, image_info);
-	/* copy over sampler state */
-	if (has_sampler) {
-		memcpy(dst + sampler_offset / sizeof(*dst), sampler->state, 16);
-	}
+   RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
+
+   write_image_descriptor(device, cmd_buffer, sampler_offset, dst, buffer_list, descriptor_type,
+                          image_info);
+   /* copy over sampler state */
+   if (has_sampler) {
+      memcpy(dst + sampler_offset / sizeof(*dst), sampler->state, 16);
+   }
 }
 
 static void
-write_sampler_descriptor(struct radv_device *device,
-					unsigned *dst,
-					const VkDescriptorImageInfo *image_info)
+write_sampler_descriptor(struct radv_device *device, unsigned *dst,
+                         const VkDescriptorImageInfo *image_info)
 {
-	RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
+   RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
 
-	memcpy(dst, sampler->state, 16);
+   memcpy(dst, sampler->state, 16);
 }
 
-void radv_update_descriptor_sets(
-	struct radv_device*                         device,
-	struct radv_cmd_buffer*                     cmd_buffer,
-	VkDescriptorSet                             dstSetOverride,
-	uint32_t                                    descriptorWriteCount,
-	const VkWriteDescriptorSet*                 pDescriptorWrites,
-	uint32_t                                    descriptorCopyCount,
-	const VkCopyDescriptorSet*                  pDescriptorCopies)
+void
+radv_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+                            VkDescriptorSet dstSetOverride, uint32_t descriptorWriteCount,
+                            const VkWriteDescriptorSet *pDescriptorWrites,
+                            uint32_t descriptorCopyCount,
+                            const VkCopyDescriptorSet *pDescriptorCopies)
 {
-	uint32_t i, j;
-	for (i = 0; i < descriptorWriteCount; i++) {
-		const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
-		RADV_FROM_HANDLE(radv_descriptor_set, set,
-		                 dstSetOverride ? dstSetOverride : writeset->dstSet);
-		const struct radv_descriptor_set_binding_layout *binding_layout =
-			set->header.layout->binding + writeset->dstBinding;
-		uint32_t *ptr = set->header.mapped_ptr;
-		struct radeon_winsys_bo **buffer_list =  set->descriptors;
-		/* Immutable samplers are not copied into push descriptors when they are
-		 * allocated, so if we are writing push descriptors we have to copy the
-		 * immutable samplers into them now.
-		 */
-		const bool copy_immutable_samplers = cmd_buffer &&
-			binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal;
-		const uint32_t *samplers = radv_immutable_samplers(set->header.layout, binding_layout);
-
-		ptr += binding_layout->offset / 4;
-
-		if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
-			write_block_descriptor(device, cmd_buffer, (uint8_t*)ptr + writeset->dstArrayElement, writeset);
-			continue;
-		}
-
-		ptr += binding_layout->size * writeset->dstArrayElement / 4;
-		buffer_list += binding_layout->buffer_offset;
-		buffer_list += writeset->dstArrayElement;
-		for (j = 0; j < writeset->descriptorCount; ++j) {
-			switch(writeset->descriptorType) {
-			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
-				unsigned idx = writeset->dstArrayElement + j;
-				idx += binding_layout->dynamic_offset_offset;
-				assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
-				write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx,
-								buffer_list, writeset->pBufferInfo + j);
-				break;
-			}
-			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-				write_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
-							writeset->pBufferInfo + j);
-				break;
-			case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-			case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-				write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
-							      writeset->pTexelBufferView[j]);
-				break;
-			case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-			case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-			case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-				write_image_descriptor(device, cmd_buffer, 64, ptr, buffer_list,
-						       writeset->descriptorType,
-						       writeset->pImageInfo + j);
-				break;
-			case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
-				unsigned sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout);
-				write_combined_image_sampler_descriptor(device, cmd_buffer, sampler_offset,
-									ptr, buffer_list,
-									writeset->descriptorType,
-									writeset->pImageInfo + j,
-									!binding_layout->immutable_samplers_offset);
-				if (copy_immutable_samplers) {
-					const unsigned idx = writeset->dstArrayElement + j;
-					memcpy((char*)ptr + sampler_offset, samplers + 4 * idx, 16);
-				}
-				break;
-			}
-			case VK_DESCRIPTOR_TYPE_SAMPLER:
-				if (!binding_layout->immutable_samplers_offset) {
-					write_sampler_descriptor(device, ptr,
-					                         writeset->pImageInfo + j);
-				} else if (copy_immutable_samplers) {
-					unsigned idx = writeset->dstArrayElement + j;
-					memcpy(ptr, samplers + 4 * idx, 16);
-				}
-				break;
-			default:
-				break;
-			}
-			ptr += binding_layout->size / 4;
-			++buffer_list;
-		}
-
-	}
-
-	for (i = 0; i < descriptorCopyCount; i++) {
-		const VkCopyDescriptorSet *copyset = &pDescriptorCopies[i];
-		RADV_FROM_HANDLE(radv_descriptor_set, src_set,
-		                 copyset->srcSet);
-		RADV_FROM_HANDLE(radv_descriptor_set, dst_set,
-		                 copyset->dstSet);
-		const struct radv_descriptor_set_binding_layout *src_binding_layout =
-			src_set->header.layout->binding + copyset->srcBinding;
-		const struct radv_descriptor_set_binding_layout *dst_binding_layout =
-			dst_set->header.layout->binding + copyset->dstBinding;
-		uint32_t *src_ptr = src_set->header.mapped_ptr;
-		uint32_t *dst_ptr = dst_set->header.mapped_ptr;
-		struct radeon_winsys_bo **src_buffer_list = src_set->descriptors;
-		struct radeon_winsys_bo **dst_buffer_list = dst_set->descriptors;
-
-		src_ptr += src_binding_layout->offset / 4;
-		dst_ptr += dst_binding_layout->offset / 4;
-
-		if (src_binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
-			src_ptr += copyset->srcArrayElement / 4;
-			dst_ptr += copyset->dstArrayElement / 4;
-
-			memcpy(dst_ptr, src_ptr, copyset->descriptorCount);
-			continue;
-		}
-
-		src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
-		dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
-
-		src_buffer_list += src_binding_layout->buffer_offset;
-		src_buffer_list += copyset->srcArrayElement;
-
-		dst_buffer_list += dst_binding_layout->buffer_offset;
-		dst_buffer_list += copyset->dstArrayElement;
-
-		/* In case of copies between mutable descriptor types
-		 * and non-mutable descriptor types. */
-		size_t copy_size = MIN2(src_binding_layout->size, dst_binding_layout->size);
-
-		for (j = 0; j < copyset->descriptorCount; ++j) {
-			switch (src_binding_layout->type) {
-			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
-				unsigned src_idx = copyset->srcArrayElement + j;
-				unsigned dst_idx = copyset->dstArrayElement + j;
-				struct radv_descriptor_range *src_range, *dst_range;
-				src_idx += src_binding_layout->dynamic_offset_offset;
-				dst_idx += dst_binding_layout->dynamic_offset_offset;
-
-				src_range = src_set->header.dynamic_descriptors + src_idx;
-				dst_range = dst_set->header.dynamic_descriptors + dst_idx;
-				*dst_range = *src_range;
-				break;
-			}
-			default:
-				memcpy(dst_ptr, src_ptr, copy_size);
-			}
-			src_ptr += src_binding_layout->size / 4;
-			dst_ptr += dst_binding_layout->size / 4;
-
-			if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) {
-				/* Sampler descriptors don't have a buffer list. */
-				dst_buffer_list[j] = src_buffer_list[j];
-			}
-		}
-	}
+   uint32_t i, j;
+   for (i = 0; i < descriptorWriteCount; i++) {
+      const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
+      RADV_FROM_HANDLE(radv_descriptor_set, set,
+                       dstSetOverride ? dstSetOverride : writeset->dstSet);
+      const struct radv_descriptor_set_binding_layout *binding_layout =
+         set->header.layout->binding + writeset->dstBinding;
+      uint32_t *ptr = set->header.mapped_ptr;
+      struct radeon_winsys_bo **buffer_list = set->descriptors;
+      /* Immutable samplers are not copied into push descriptors when they are
+       * allocated, so if we are writing push descriptors we have to copy the
+       * immutable samplers into them now.
+       */
+      const bool copy_immutable_samplers = cmd_buffer &&
+                                           binding_layout->immutable_samplers_offset &&
+                                           !binding_layout->immutable_samplers_equal;
+      const uint32_t *samplers = radv_immutable_samplers(set->header.layout, binding_layout);
+
+      ptr += binding_layout->offset / 4;
+
+      if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+         write_block_descriptor(device, cmd_buffer, (uint8_t *)ptr + writeset->dstArrayElement,
+                                writeset);
+         continue;
+      }
+
+      ptr += binding_layout->size * writeset->dstArrayElement / 4;
+      buffer_list += binding_layout->buffer_offset;
+      buffer_list += writeset->dstArrayElement;
+      for (j = 0; j < writeset->descriptorCount; ++j) {
+         switch (writeset->descriptorType) {
+         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+            unsigned idx = writeset->dstArrayElement + j;
+            idx += binding_layout->dynamic_offset_offset;
+            assert(!(set->header.layout->flags &
+                     VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+            write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx,
+                                            buffer_list, writeset->pBufferInfo + j);
+            break;
+         }
+         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+            write_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
+                                    writeset->pBufferInfo + j);
+            break;
+         case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+         case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+            write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
+                                          writeset->pTexelBufferView[j]);
+            break;
+         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+            write_image_descriptor(device, cmd_buffer, 64, ptr, buffer_list,
+                                   writeset->descriptorType, writeset->pImageInfo + j);
+            break;
+         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
+            unsigned sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout);
+            write_combined_image_sampler_descriptor(
+               device, cmd_buffer, sampler_offset, ptr, buffer_list, writeset->descriptorType,
+               writeset->pImageInfo + j, !binding_layout->immutable_samplers_offset);
+            if (copy_immutable_samplers) {
+               const unsigned idx = writeset->dstArrayElement + j;
+               memcpy((char *)ptr + sampler_offset, samplers + 4 * idx, 16);
+            }
+            break;
+         }
+         case VK_DESCRIPTOR_TYPE_SAMPLER:
+            if (!binding_layout->immutable_samplers_offset) {
+               write_sampler_descriptor(device, ptr, writeset->pImageInfo + j);
+            } else if (copy_immutable_samplers) {
+               unsigned idx = writeset->dstArrayElement + j;
+               memcpy(ptr, samplers + 4 * idx, 16);
+            }
+            break;
+         default:
+            break;
+         }
+         ptr += binding_layout->size / 4;
+         ++buffer_list;
+      }
+   }
+
+   for (i = 0; i < descriptorCopyCount; i++) {
+      const VkCopyDescriptorSet *copyset = &pDescriptorCopies[i];
+      RADV_FROM_HANDLE(radv_descriptor_set, src_set, copyset->srcSet);
+      RADV_FROM_HANDLE(radv_descriptor_set, dst_set, copyset->dstSet);
+      const struct radv_descriptor_set_binding_layout *src_binding_layout =
+         src_set->header.layout->binding + copyset->srcBinding;
+      const struct radv_descriptor_set_binding_layout *dst_binding_layout =
+         dst_set->header.layout->binding + copyset->dstBinding;
+      uint32_t *src_ptr = src_set->header.mapped_ptr;
+      uint32_t *dst_ptr = dst_set->header.mapped_ptr;
+      struct radeon_winsys_bo **src_buffer_list = src_set->descriptors;
+      struct radeon_winsys_bo **dst_buffer_list = dst_set->descriptors;
+
+      src_ptr += src_binding_layout->offset / 4;
+      dst_ptr += dst_binding_layout->offset / 4;
+
+      if (src_binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+         src_ptr += copyset->srcArrayElement / 4;
+         dst_ptr += copyset->dstArrayElement / 4;
+
+         memcpy(dst_ptr, src_ptr, copyset->descriptorCount);
+         continue;
+      }
+
+      src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
+      dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
+
+      src_buffer_list += src_binding_layout->buffer_offset;
+      src_buffer_list += copyset->srcArrayElement;
+
+      dst_buffer_list += dst_binding_layout->buffer_offset;
+      dst_buffer_list += copyset->dstArrayElement;
+
+      /* In case of copies between mutable descriptor types
+       * and non-mutable descriptor types. */
+      size_t copy_size = MIN2(src_binding_layout->size, dst_binding_layout->size);
+
+      for (j = 0; j < copyset->descriptorCount; ++j) {
+         switch (src_binding_layout->type) {
+         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+            unsigned src_idx = copyset->srcArrayElement + j;
+            unsigned dst_idx = copyset->dstArrayElement + j;
+            struct radv_descriptor_range *src_range, *dst_range;
+            src_idx += src_binding_layout->dynamic_offset_offset;
+            dst_idx += dst_binding_layout->dynamic_offset_offset;
+
+            src_range = src_set->header.dynamic_descriptors + src_idx;
+            dst_range = dst_set->header.dynamic_descriptors + dst_idx;
+            *dst_range = *src_range;
+            break;
+         }
+         default:
+            memcpy(dst_ptr, src_ptr, copy_size);
+         }
+         src_ptr += src_binding_layout->size / 4;
+         dst_ptr += dst_binding_layout->size / 4;
+
+         if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) {
+            /* Sampler descriptors don't have a buffer list. */
+            dst_buffer_list[j] = src_buffer_list[j];
+         }
+      }
+   }
 }
 
-void radv_UpdateDescriptorSets(
-	VkDevice                                    _device,
-	uint32_t                                    descriptorWriteCount,
-	const VkWriteDescriptorSet*                 pDescriptorWrites,
-	uint32_t                                    descriptorCopyCount,
-	const VkCopyDescriptorSet*                  pDescriptorCopies)
+void
+radv_UpdateDescriptorSets(VkDevice _device, uint32_t descriptorWriteCount,
+                          const VkWriteDescriptorSet *pDescriptorWrites,
+                          uint32_t descriptorCopyCount,
+                          const VkCopyDescriptorSet *pDescriptorCopies)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device, device, _device);
 
-	radv_update_descriptor_sets(device, NULL, VK_NULL_HANDLE, descriptorWriteCount, pDescriptorWrites,
-			            descriptorCopyCount, pDescriptorCopies);
+   radv_update_descriptor_sets(device, NULL, VK_NULL_HANDLE, descriptorWriteCount,
+                               pDescriptorWrites, descriptorCopyCount, pDescriptorCopies);
 }
 
-VkResult radv_CreateDescriptorUpdateTemplate(VkDevice _device,
-                                             const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,
-                                             const VkAllocationCallbacks *pAllocator,
-                                             VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate)
+VkResult
+radv_CreateDescriptorUpdateTemplate(VkDevice _device,
+                                    const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,
+                                    const VkAllocationCallbacks *pAllocator,
+                                    VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->descriptorSetLayout);
-	const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount;
-	const size_t size = sizeof(struct radv_descriptor_update_template) +
-		sizeof(struct radv_descriptor_update_template_entry) * entry_count;
-	struct radv_descriptor_update_template *templ;
-	uint32_t i;
-
-	templ = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (!templ)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &templ->base,
-			    VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE);
-
-	templ->entry_count = entry_count;
-
-	if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR) {
-		RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->pipelineLayout);
-
-		/* descriptorSetLayout should be ignored for push descriptors
-		 * and instead it refers to pipelineLayout and set.
-		 */
-		assert(pCreateInfo->set < MAX_SETS);
-		set_layout = pipeline_layout->set[pCreateInfo->set].layout;
-
-		templ->bind_point = pCreateInfo->pipelineBindPoint;
-	}
-
-	for (i = 0; i < entry_count; i++) {
-		const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
-		const struct radv_descriptor_set_binding_layout *binding_layout =
-			set_layout->binding + entry->dstBinding;
-		const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
-		const uint32_t *immutable_samplers = NULL;
-		uint32_t dst_offset;
-		uint32_t dst_stride;
-
-		/* dst_offset is an offset into dynamic_descriptors when the descriptor
-		   is dynamic, and an offset into mapped_ptr otherwise */
-		switch (entry->descriptorType) {
-		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-			assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET);
-			dst_offset = binding_layout->dynamic_offset_offset + entry->dstArrayElement;
-			dst_stride = 0; /* Not used */
-			break;
-		default:
-			switch (entry->descriptorType) {
-			case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-			case VK_DESCRIPTOR_TYPE_SAMPLER:
-				/* Immutable samplers are copied into push descriptors when they are pushed */
-				if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR &&
-				    binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal) {
-					immutable_samplers = radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4;
-				}
-				break;
-			default:
-				break;
-			}
-			dst_offset = binding_layout->offset / 4;
-			if (entry->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
-				dst_offset += entry->dstArrayElement / 4;
-			else
-				dst_offset += binding_layout->size * entry->dstArrayElement / 4;
-
-			dst_stride = binding_layout->size / 4;
-			break;
-		}
-
-		templ->entry[i] = (struct radv_descriptor_update_template_entry) {
-			.descriptor_type = entry->descriptorType,
-			.descriptor_count = entry->descriptorCount,
-			.src_offset = entry->offset,
-			.src_stride = entry->stride,
-			.dst_offset = dst_offset,
-			.dst_stride = dst_stride,
-			.buffer_offset = buffer_offset,
-			.has_sampler = !binding_layout->immutable_samplers_offset,
-			.sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout),
-			.immutable_samplers = immutable_samplers
-		};
-	}
-
-	*pDescriptorUpdateTemplate = radv_descriptor_update_template_to_handle(templ);
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->descriptorSetLayout);
+   const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount;
+   const size_t size = sizeof(struct radv_descriptor_update_template) +
+                       sizeof(struct radv_descriptor_update_template_entry) * entry_count;
+   struct radv_descriptor_update_template *templ;
+   uint32_t i;
+
+   templ = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!templ)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &templ->base, VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE);
+
+   templ->entry_count = entry_count;
+
+   if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR) {
+      RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->pipelineLayout);
+
+      /* descriptorSetLayout should be ignored for push descriptors
+       * and instead it refers to pipelineLayout and set.
+       */
+      assert(pCreateInfo->set < MAX_SETS);
+      set_layout = pipeline_layout->set[pCreateInfo->set].layout;
+
+      templ->bind_point = pCreateInfo->pipelineBindPoint;
+   }
+
+   for (i = 0; i < entry_count; i++) {
+      const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
+      const struct radv_descriptor_set_binding_layout *binding_layout =
+         set_layout->binding + entry->dstBinding;
+      const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
+      const uint32_t *immutable_samplers = NULL;
+      uint32_t dst_offset;
+      uint32_t dst_stride;
+
+      /* dst_offset is an offset into dynamic_descriptors when the descriptor
+         is dynamic, and an offset into mapped_ptr otherwise */
+      switch (entry->descriptorType) {
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+         assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET);
+         dst_offset = binding_layout->dynamic_offset_offset + entry->dstArrayElement;
+         dst_stride = 0; /* Not used */
+         break;
+      default:
+         switch (entry->descriptorType) {
+         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+         case VK_DESCRIPTOR_TYPE_SAMPLER:
+            /* Immutable samplers are copied into push descriptors when they are pushed */
+            if (pCreateInfo->templateType ==
+                   VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR &&
+                binding_layout->immutable_samplers_offset &&
+                !binding_layout->immutable_samplers_equal) {
+               immutable_samplers =
+                  radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4;
+            }
+            break;
+         default:
+            break;
+         }
+         dst_offset = binding_layout->offset / 4;
+         if (entry->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+            dst_offset += entry->dstArrayElement / 4;
+         else
+            dst_offset += binding_layout->size * entry->dstArrayElement / 4;
+
+         dst_stride = binding_layout->size / 4;
+         break;
+      }
+
+      templ->entry[i] = (struct radv_descriptor_update_template_entry){
+         .descriptor_type = entry->descriptorType,
+         .descriptor_count = entry->descriptorCount,
+         .src_offset = entry->offset,
+         .src_stride = entry->stride,
+         .dst_offset = dst_offset,
+         .dst_stride = dst_stride,
+         .buffer_offset = buffer_offset,
+         .has_sampler = !binding_layout->immutable_samplers_offset,
+         .sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout),
+         .immutable_samplers = immutable_samplers};
+   }
+
+   *pDescriptorUpdateTemplate = radv_descriptor_update_template_to_handle(templ);
+   return VK_SUCCESS;
 }
 
-void radv_DestroyDescriptorUpdateTemplate(VkDevice _device,
-                                          VkDescriptorUpdateTemplate descriptorUpdateTemplate,
-                                          const VkAllocationCallbacks *pAllocator)
+void
+radv_DestroyDescriptorUpdateTemplate(VkDevice _device,
+                                     VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+                                     const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
 
-	if (!templ)
-		return;
+   if (!templ)
+      return;
 
-	vk_object_base_finish(&templ->base);
-	vk_free2(&device->vk.alloc, pAllocator, templ);
+   vk_object_base_finish(&templ->base);
+   vk_free2(&device->vk.alloc, pAllocator, templ);
 }
 
-void radv_update_descriptor_set_with_template(struct radv_device *device,
-                                              struct radv_cmd_buffer *cmd_buffer,
-                                              struct radv_descriptor_set *set,
-                                              VkDescriptorUpdateTemplate descriptorUpdateTemplate,
-                                              const void *pData)
+void
+radv_update_descriptor_set_with_template(struct radv_device *device,
+                                         struct radv_cmd_buffer *cmd_buffer,
+                                         struct radv_descriptor_set *set,
+                                         VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+                                         const void *pData)
 {
-	RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
-	uint32_t i;
-
-	for (i = 0; i < templ->entry_count; ++i) {
-		struct radeon_winsys_bo **buffer_list = set->descriptors + templ->entry[i].buffer_offset;
-		uint32_t *pDst = set->header.mapped_ptr + templ->entry[i].dst_offset;
-		const uint8_t *pSrc = ((const uint8_t *) pData) + templ->entry[i].src_offset;
-		uint32_t j;
-
-		if (templ->entry[i].descriptor_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
-			memcpy((uint8_t*)pDst, pSrc, templ->entry[i].descriptor_count);
-			continue;
-		}
-
-		for (j = 0; j < templ->entry[i].descriptor_count; ++j) {
-			switch (templ->entry[i].descriptor_type) {
-			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
-				const unsigned idx = templ->entry[i].dst_offset + j;
-				assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
-				write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx,
-								buffer_list, (struct VkDescriptorBufferInfo *) pSrc);
-				break;
-			}
-			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-				write_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
-				                        (struct VkDescriptorBufferInfo *) pSrc);
-				break;
-			case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-			case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-				write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
-						              *(VkBufferView *) pSrc);
-				break;
-			case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-			case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-			case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-				write_image_descriptor(device, cmd_buffer, 64, pDst, buffer_list,
-						       templ->entry[i].descriptor_type,
-					               (struct VkDescriptorImageInfo *) pSrc);
-				break;
-			case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-				write_combined_image_sampler_descriptor(device, cmd_buffer, templ->entry[i].sampler_offset,
-									pDst, buffer_list, templ->entry[i].descriptor_type,
-									(struct VkDescriptorImageInfo *) pSrc,
-									templ->entry[i].has_sampler);
-				if (templ->entry[i].immutable_samplers) {
-					memcpy((char*)pDst + templ->entry[i].sampler_offset, templ->entry[i].immutable_samplers + 4 * j, 16);
-				}
-				break;
-			case VK_DESCRIPTOR_TYPE_SAMPLER:
-				if (templ->entry[i].has_sampler)
-					write_sampler_descriptor(device, pDst,
-					                         (struct VkDescriptorImageInfo *) pSrc);
-				else if (templ->entry[i].immutable_samplers)
-					memcpy(pDst, templ->entry[i].immutable_samplers + 4 * j, 16);
-				break;
-			default:
-				break;
-			}
-		        pSrc += templ->entry[i].src_stride;
-			pDst += templ->entry[i].dst_stride;
-			++buffer_list;
-		}
-	}
+   RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
+   uint32_t i;
+
+   for (i = 0; i < templ->entry_count; ++i) {
+      struct radeon_winsys_bo **buffer_list = set->descriptors + templ->entry[i].buffer_offset;
+      uint32_t *pDst = set->header.mapped_ptr + templ->entry[i].dst_offset;
+      const uint8_t *pSrc = ((const uint8_t *)pData) + templ->entry[i].src_offset;
+      uint32_t j;
+
+      if (templ->entry[i].descriptor_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+         memcpy((uint8_t *)pDst, pSrc, templ->entry[i].descriptor_count);
+         continue;
+      }
+
+      for (j = 0; j < templ->entry[i].descriptor_count; ++j) {
+         switch (templ->entry[i].descriptor_type) {
+         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+            const unsigned idx = templ->entry[i].dst_offset + j;
+            assert(!(set->header.layout->flags &
+                     VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+            write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx,
+                                            buffer_list, (struct VkDescriptorBufferInfo *)pSrc);
+            break;
+         }
+         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+            write_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
+                                    (struct VkDescriptorBufferInfo *)pSrc);
+            break;
+         case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+         case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+            write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
+                                          *(VkBufferView *)pSrc);
+            break;
+         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+            write_image_descriptor(device, cmd_buffer, 64, pDst, buffer_list,
+                                   templ->entry[i].descriptor_type,
+                                   (struct VkDescriptorImageInfo *)pSrc);
+            break;
+         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+            write_combined_image_sampler_descriptor(
+               device, cmd_buffer, templ->entry[i].sampler_offset, pDst, buffer_list,
+               templ->entry[i].descriptor_type, (struct VkDescriptorImageInfo *)pSrc,
+               templ->entry[i].has_sampler);
+            if (templ->entry[i].immutable_samplers) {
+               memcpy((char *)pDst + templ->entry[i].sampler_offset,
+                      templ->entry[i].immutable_samplers + 4 * j, 16);
+            }
+            break;
+         case VK_DESCRIPTOR_TYPE_SAMPLER:
+            if (templ->entry[i].has_sampler)
+               write_sampler_descriptor(device, pDst, (struct VkDescriptorImageInfo *)pSrc);
+            else if (templ->entry[i].immutable_samplers)
+               memcpy(pDst, templ->entry[i].immutable_samplers + 4 * j, 16);
+            break;
+         default:
+            break;
+         }
+         pSrc += templ->entry[i].src_stride;
+         pDst += templ->entry[i].dst_stride;
+         ++buffer_list;
+      }
+   }
 }
 
-void radv_UpdateDescriptorSetWithTemplate(VkDevice _device,
-                                          VkDescriptorSet descriptorSet,
-                                          VkDescriptorUpdateTemplate descriptorUpdateTemplate,
-                                          const void *pData)
+void
+radv_UpdateDescriptorSetWithTemplate(VkDevice _device, VkDescriptorSet descriptorSet,
+                                     VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+                                     const void *pData)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet);
 
-	radv_update_descriptor_set_with_template(device, NULL, set, descriptorUpdateTemplate, pData);
+   radv_update_descriptor_set_with_template(device, NULL, set, descriptorUpdateTemplate, pData);
 }
 
-
-VkResult radv_CreateSamplerYcbcrConversion(VkDevice _device,
-					   const VkSamplerYcbcrConversionCreateInfo* pCreateInfo,
-					   const VkAllocationCallbacks* pAllocator,
-					   VkSamplerYcbcrConversion* pYcbcrConversion)
+VkResult
+radv_CreateSamplerYcbcrConversion(VkDevice _device,
+                                  const VkSamplerYcbcrConversionCreateInfo *pCreateInfo,
+                                  const VkAllocationCallbacks *pAllocator,
+                                  VkSamplerYcbcrConversion *pYcbcrConversion)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_sampler_ycbcr_conversion *conversion = NULL;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_sampler_ycbcr_conversion *conversion = NULL;
 
-	conversion = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*conversion), 8,
-	                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   conversion = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*conversion), 8,
+                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 
-	if (conversion == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   if (conversion == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	vk_object_base_init(&device->vk, &conversion->base,
-			    VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION);
+   vk_object_base_init(&device->vk, &conversion->base, VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION);
 
-	conversion->format = pCreateInfo->format;
-	conversion->ycbcr_model = pCreateInfo->ycbcrModel;
-	conversion->ycbcr_range = pCreateInfo->ycbcrRange;
-	conversion->components = pCreateInfo->components;
-	conversion->chroma_offsets[0] = pCreateInfo->xChromaOffset;
-	conversion->chroma_offsets[1] = pCreateInfo->yChromaOffset;
-	conversion->chroma_filter = pCreateInfo->chromaFilter;
+   conversion->format = pCreateInfo->format;
+   conversion->ycbcr_model = pCreateInfo->ycbcrModel;
+   conversion->ycbcr_range = pCreateInfo->ycbcrRange;
+   conversion->components = pCreateInfo->components;
+   conversion->chroma_offsets[0] = pCreateInfo->xChromaOffset;
+   conversion->chroma_offsets[1] = pCreateInfo->yChromaOffset;
+   conversion->chroma_filter = pCreateInfo->chromaFilter;
 
-	*pYcbcrConversion = radv_sampler_ycbcr_conversion_to_handle(conversion);
-	return VK_SUCCESS;
+   *pYcbcrConversion = radv_sampler_ycbcr_conversion_to_handle(conversion);
+   return VK_SUCCESS;
 }
 
-
-void radv_DestroySamplerYcbcrConversion(VkDevice _device,
-					VkSamplerYcbcrConversion ycbcrConversion,
-					const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroySamplerYcbcrConversion(VkDevice _device, VkSamplerYcbcrConversion ycbcrConversion,
+                                   const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_sampler_ycbcr_conversion, ycbcr_conversion, ycbcrConversion);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_sampler_ycbcr_conversion, ycbcr_conversion, ycbcrConversion);
 
-	if (!ycbcr_conversion)
-		return;
+   if (!ycbcr_conversion)
+      return;
 
-	vk_object_base_finish(&ycbcr_conversion->base);
-	vk_free2(&device->vk.alloc, pAllocator, ycbcr_conversion);
+   vk_object_base_finish(&ycbcr_conversion->base);
+   vk_free2(&device->vk.alloc, pAllocator, ycbcr_conversion);
 }
diff --git a/src/amd/vulkan/radv_descriptor_set.h b/src/amd/vulkan/radv_descriptor_set.h
index fdaaca07318..65d33f19dc3 100644
--- a/src/amd/vulkan/radv_descriptor_set.h
+++ b/src/amd/vulkan/radv_descriptor_set.h
@@ -104,27 +104,30 @@ struct radv_pipeline_layout {
 
 static inline const uint32_t *
 radv_immutable_samplers(const struct radv_descriptor_set_layout *set,
-                        const struct radv_descriptor_set_binding_layout *binding) {
-	return (const uint32_t*)((const char*)set + binding->immutable_samplers_offset);
+                        const struct radv_descriptor_set_binding_layout *binding)
+{
+   return (const uint32_t *)((const char *)set + binding->immutable_samplers_offset);
 }
 
 static inline unsigned
-radv_combined_image_descriptor_sampler_offset(const struct radv_descriptor_set_binding_layout *binding)
+radv_combined_image_descriptor_sampler_offset(
+   const struct radv_descriptor_set_binding_layout *binding)
 {
-	return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0);
+   return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0);
 }
 
 static inline const struct radv_sampler_ycbcr_conversion *
-radv_immutable_ycbcr_samplers(const struct radv_descriptor_set_layout *set,
-                              unsigned binding_index)
+radv_immutable_ycbcr_samplers(const struct radv_descriptor_set_layout *set, unsigned binding_index)
 {
-	if (!set->ycbcr_sampler_offsets_offset)
-		return NULL;
+   if (!set->ycbcr_sampler_offsets_offset)
+      return NULL;
 
-	const uint32_t *offsets = (const uint32_t*)((const char*)set + set->ycbcr_sampler_offsets_offset);
+   const uint32_t *offsets =
+      (const uint32_t *)((const char *)set + set->ycbcr_sampler_offsets_offset);
 
-	if (offsets[binding_index] == 0)
-		return NULL;
-	return (const struct radv_sampler_ycbcr_conversion *)((const char*)set + offsets[binding_index]);
+   if (offsets[binding_index] == 0)
+      return NULL;
+   return (const struct radv_sampler_ycbcr_conversion *)((const char *)set +
+                                                         offsets[binding_index]);
 }
 #endif /* RADV_DESCRIPTOR_SET_H */
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 25af86c3950..863a33e2279 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -25,41 +25,41 @@
  * IN THE SOFTWARE.
  */
 
+#include <fcntl.h>
 #include <stdbool.h>
 #include <string.h>
-#include <fcntl.h>
 
+#include "util/disk_cache.h"
+#include "radv_cs.h"
 #include "radv_debug.h"
 #include "radv_private.h"
 #include "radv_shader.h"
-#include "radv_cs.h"
-#include "util/disk_cache.h"
 #include "vk_util.h"
 #ifdef _WIN32
-typedef void* drmDevicePtr;
+typedef void *drmDevicePtr;
 #else
-#include <xf86drm.h>
 #include <amdgpu.h>
+#include <xf86drm.h>
 #include "drm-uapi/amdgpu_drm.h"
 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
 #endif
-#include "winsys/null/radv_null_winsys_public.h"
-#include "ac_llvm_util.h"
-#include "vk_format.h"
-#include "sid.h"
-#include "git_sha1.h"
 #include "util/build_id.h"
 #include "util/debug.h"
+#include "util/driconf.h"
 #include "util/mesa-sha1.h"
 #include "util/timespec.h"
 #include "util/u_atomic.h"
-#include "util/driconf.h"
+#include "winsys/null/radv_null_winsys_public.h"
+#include "ac_llvm_util.h"
+#include "git_sha1.h"
+#include "sid.h"
+#include "vk_format.h"
 
 /* The number of IBs per submit isn't infinite, it depends on the ring type
  * (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
  * This limit is arbitrary but should be safe for now.  Ideally, we should get
  * this limit from the KMD.
-*/
+ */
 #define RADV_MAX_IBS_PER_SUBMIT 192
 
 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
@@ -68,257 +68,251 @@ typedef void* drmDevicePtr;
 #endif
 
 static struct radv_timeline_point *
-radv_timeline_find_point_at_least_locked(struct radv_device *device,
-                                         struct radv_timeline *timeline,
+radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
                                          uint64_t p);
 
-static struct radv_timeline_point *
-radv_timeline_add_point_locked(struct radv_device *device,
-                               struct radv_timeline *timeline,
-                               uint64_t p);
+static struct radv_timeline_point *radv_timeline_add_point_locked(struct radv_device *device,
+                                                                  struct radv_timeline *timeline,
+                                                                  uint64_t p);
 
-static void
-radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
-                                     struct list_head *processing_list);
+static void radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
+                                                 struct list_head *processing_list);
 
-static
-void radv_destroy_semaphore_part(struct radv_device *device,
-                                 struct radv_semaphore_part *part);
+static void radv_destroy_semaphore_part(struct radv_device *device,
+                                        struct radv_semaphore_part *part);
 
-uint64_t radv_get_current_time(void)
+uint64_t
+radv_get_current_time(void)
 {
-	return os_time_get_nano();
+   return os_time_get_nano();
 }
 
-static uint64_t radv_get_absolute_timeout(uint64_t timeout)
+static uint64_t
+radv_get_absolute_timeout(uint64_t timeout)
 {
-	uint64_t current_time = radv_get_current_time();
+   uint64_t current_time = radv_get_current_time();
 
-	timeout = MIN2(UINT64_MAX - current_time, timeout);
+   timeout = MIN2(UINT64_MAX - current_time, timeout);
 
-	return current_time + timeout;
+   return current_time + timeout;
 }
 
 static int
 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
 {
-	struct mesa_sha1 ctx;
-	unsigned char sha1[20];
-	unsigned ptr_size = sizeof(void*);
+   struct mesa_sha1 ctx;
+   unsigned char sha1[20];
+   unsigned ptr_size = sizeof(void *);
 
-	memset(uuid, 0, VK_UUID_SIZE);
-	_mesa_sha1_init(&ctx);
+   memset(uuid, 0, VK_UUID_SIZE);
+   _mesa_sha1_init(&ctx);
 
-	if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
-	    !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
-		return -1;
+   if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
+       !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
+      return -1;
 
-	_mesa_sha1_update(&ctx, &family, sizeof(family));
-	_mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
-	_mesa_sha1_final(&ctx, sha1);
+   _mesa_sha1_update(&ctx, &family, sizeof(family));
+   _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
+   _mesa_sha1_final(&ctx, sha1);
 
-	memcpy(uuid, sha1, VK_UUID_SIZE);
-	return 0;
+   memcpy(uuid, sha1, VK_UUID_SIZE);
+   return 0;
 }
 
 static void
 radv_get_driver_uuid(void *uuid)
 {
-	ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
+   ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
 }
 
 static void
 radv_get_device_uuid(struct radeon_info *info, void *uuid)
 {
-	ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
+   ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
 }
 
 static uint64_t
 radv_get_adjusted_vram_size(struct radv_physical_device *device)
 {
-	int ov = driQueryOptioni(&device->instance->dri_options,
-	                         "override_vram_size");
-	if (ov >= 0)
-		return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
-	return device->rad_info.vram_size;
+   int ov = driQueryOptioni(&device->instance->dri_options, "override_vram_size");
+   if (ov >= 0)
+      return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
+   return device->rad_info.vram_size;
 }
 
 static uint64_t
 radv_get_visible_vram_size(struct radv_physical_device *device)
 {
-	return MIN2(radv_get_adjusted_vram_size(device) , device->rad_info.vram_vis_size);
+   return MIN2(radv_get_adjusted_vram_size(device), device->rad_info.vram_vis_size);
 }
 
 static uint64_t
 radv_get_vram_size(struct radv_physical_device *device)
 {
-	uint64_t total_size = radv_get_adjusted_vram_size(device);
-	return total_size - MIN2(total_size, device->rad_info.vram_vis_size);
+   uint64_t total_size = radv_get_adjusted_vram_size(device);
+   return total_size - MIN2(total_size, device->rad_info.vram_vis_size);
 }
 
 enum radv_heap {
-	RADV_HEAP_VRAM     = 1 << 0,
-	RADV_HEAP_GTT      = 1 << 1,
-	RADV_HEAP_VRAM_VIS = 1 << 2,
-	RADV_HEAP_MAX      = 1 << 3,
+   RADV_HEAP_VRAM = 1 << 0,
+   RADV_HEAP_GTT = 1 << 1,
+   RADV_HEAP_VRAM_VIS = 1 << 2,
+   RADV_HEAP_MAX = 1 << 3,
 };
 
 static void
 radv_physical_device_init_mem_types(struct radv_physical_device *device)
 {
-	uint64_t visible_vram_size = radv_get_visible_vram_size(device);
-	uint64_t vram_size = radv_get_vram_size(device);
-	int vram_index = -1, visible_vram_index = -1, gart_index = -1;
-	device->memory_properties.memoryHeapCount = 0;
-	device->heaps = 0;
-
-	/* Only get a VRAM heap if it is significant, not if it is a 16 MiB
-	 * remainder above visible VRAM. */
-	if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
-		vram_index = device->memory_properties.memoryHeapCount++;
-		device->heaps |= RADV_HEAP_VRAM;
-		device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
-			.size = vram_size,
-			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-		};
-	}
-
-	if (device->rad_info.gart_size > 0) {
-		gart_index = device->memory_properties.memoryHeapCount++;
-		device->heaps |= RADV_HEAP_GTT;
-		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
-			.size = device->rad_info.gart_size,
-			.flags = 0,
-		};
-	}
-
-	if (visible_vram_size) {
-		visible_vram_index = device->memory_properties.memoryHeapCount++;
-		device->heaps |= RADV_HEAP_VRAM_VIS;
-		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
-			.size = visible_vram_size,
-			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-		};
-	}
-
-	unsigned type_count = 0;
-
-	if (vram_index >= 0 || visible_vram_index >= 0) {
-		device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
-		device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
-		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-			.heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
-		};
-	}
-
-	if (gart_index >= 0) {
-		device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
-		device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
-		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-			.heapIndex = gart_index,
-		};
-	}
-	if (visible_vram_index >= 0) {
-		device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
-		device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
-		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-			.heapIndex = visible_vram_index,
-		};
-	}
-
-	if (gart_index >= 0) {
-		device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
-		device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
-		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-			VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-			.heapIndex = gart_index,
-		};
-	}
-	device->memory_properties.memoryTypeCount = type_count;
-
-	if (device->rad_info.has_l2_uncached) {
-		for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
-			VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
-
-			if ((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-						       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
-			    mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
-
-				VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
-					VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
-					VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
-
-				device->memory_domains[type_count] = device->memory_domains[i];
-				device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
-				device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-					.propertyFlags = property_flags,
-					.heapIndex = mem_type.heapIndex,
-				};
-			}
-		}
-		device->memory_properties.memoryTypeCount = type_count;
-	}
+   uint64_t visible_vram_size = radv_get_visible_vram_size(device);
+   uint64_t vram_size = radv_get_vram_size(device);
+   int vram_index = -1, visible_vram_index = -1, gart_index = -1;
+   device->memory_properties.memoryHeapCount = 0;
+   device->heaps = 0;
+
+   /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
+    * remainder above visible VRAM. */
+   if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
+      vram_index = device->memory_properties.memoryHeapCount++;
+      device->heaps |= RADV_HEAP_VRAM;
+      device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
+         .size = vram_size,
+         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+      };
+   }
+
+   if (device->rad_info.gart_size > 0) {
+      gart_index = device->memory_properties.memoryHeapCount++;
+      device->heaps |= RADV_HEAP_GTT;
+      device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
+         .size = device->rad_info.gart_size,
+         .flags = 0,
+      };
+   }
+
+   if (visible_vram_size) {
+      visible_vram_index = device->memory_properties.memoryHeapCount++;
+      device->heaps |= RADV_HEAP_VRAM_VIS;
+      device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
+         .size = visible_vram_size,
+         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+      };
+   }
+
+   unsigned type_count = 0;
+
+   if (vram_index >= 0 || visible_vram_index >= 0) {
+      device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
+      device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
+      device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
+         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+         .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
+      };
+   }
+
+   if (gart_index >= 0) {
+      device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
+      device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
+      device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
+         .propertyFlags =
+            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+         .heapIndex = gart_index,
+      };
+   }
+   if (visible_vram_index >= 0) {
+      device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
+      device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
+      device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
+         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+         .heapIndex = visible_vram_index,
+      };
+   }
+
+   if (gart_index >= 0) {
+      device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
+      device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
+      device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
+         .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+         .heapIndex = gart_index,
+      };
+   }
+   device->memory_properties.memoryTypeCount = type_count;
+
+   if (device->rad_info.has_l2_uncached) {
+      for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
+         VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
+
+         if ((mem_type.propertyFlags &
+              (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
+             mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
+
+            VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
+                                                   VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
+                                                   VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
+
+            device->memory_domains[type_count] = device->memory_domains[i];
+            device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
+            device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
+               .propertyFlags = property_flags,
+               .heapIndex = mem_type.heapIndex,
+            };
+         }
+      }
+      device->memory_properties.memoryTypeCount = type_count;
+   }
 }
 
 static const char *
 radv_get_compiler_string(struct radv_physical_device *pdevice)
 {
-	if (!pdevice->use_llvm) {
-		/* Some games like SotTR apply shader workarounds if the LLVM
-		 * version is too old or if the LLVM version string is
-		 * missing. This gives 2-5% performance with SotTR and ACO.
-		 */
-		if (driQueryOptionb(&pdevice->instance->dri_options,
-				    "radv_report_llvm9_version_string")) {
-			return "ACO/LLVM 9.0.1";
-		}
+   if (!pdevice->use_llvm) {
+      /* Some games like SotTR apply shader workarounds if the LLVM
+       * version is too old or if the LLVM version string is
+       * missing. This gives 2-5% performance with SotTR and ACO.
+       */
+      if (driQueryOptionb(&pdevice->instance->dri_options, "radv_report_llvm9_version_string")) {
+         return "ACO/LLVM 9.0.1";
+      }
 
-		return "ACO";
-	}
+      return "ACO";
+   }
 
-	return "LLVM " MESA_LLVM_VERSION_STRING;
+   return "LLVM " MESA_LLVM_VERSION_STRING;
 }
 
 int
 radv_get_int_debug_option(const char *name, int default_value)
 {
-	const char *str;
-	int result;
+   const char *str;
+   int result;
 
-	str = getenv(name);
-	if (!str) {
-		result = default_value;
-	} else {
-		char *endptr;
+   str = getenv(name);
+   if (!str) {
+      result = default_value;
+   } else {
+      char *endptr;
 
-		result = strtol(str, &endptr, 0);
-		if (str == endptr) {
-			/* No digits founs. */
-			result = default_value;
-		}
-	}
+      result = strtol(str, &endptr, 0);
+      if (str == endptr) {
+         /* No digits founs. */
+         result = default_value;
+      }
+   }
 
-	return result;
+   return result;
 }
 
-static bool radv_thread_trace_enabled()
+static bool
+radv_thread_trace_enabled()
 {
-	return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
-	       getenv("RADV_THREAD_TRACE_TRIGGER");
+   return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
+          getenv("RADV_THREAD_TRACE_TRIGGER");
 }
 
-#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
-    defined(VK_USE_PLATFORM_XCB_KHR) || \
-    defined(VK_USE_PLATFORM_XLIB_KHR) || \
-    defined(VK_USE_PLATFORM_DISPLAY_KHR)
+#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) ||                    \
+   defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR)
 #define RADV_USE_WSI_PLATFORM
 #endif
 
@@ -328,42 +322,43 @@ static bool radv_thread_trace_enabled()
 #define RADV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
 #endif
 
-VkResult radv_EnumerateInstanceVersion(uint32_t* pApiVersion)
+VkResult
+radv_EnumerateInstanceVersion(uint32_t *pApiVersion)
 {
-	*pApiVersion = RADV_API_VERSION;
-	return VK_SUCCESS;
+   *pApiVersion = RADV_API_VERSION;
+   return VK_SUCCESS;
 }
 
 static const struct vk_instance_extension_table radv_instance_extensions_supported = {
-	.KHR_device_group_creation                = true,
-	.KHR_external_fence_capabilities          = true,
-	.KHR_external_memory_capabilities         = true,
-	.KHR_external_semaphore_capabilities      = true,
-	.KHR_get_physical_device_properties2      = true,
-	.EXT_debug_report                         = true,
+   .KHR_device_group_creation = true,
+   .KHR_external_fence_capabilities = true,
+   .KHR_external_memory_capabilities = true,
+   .KHR_external_semaphore_capabilities = true,
+   .KHR_get_physical_device_properties2 = true,
+   .EXT_debug_report = true,
 
 #ifdef RADV_USE_WSI_PLATFORM
-	.KHR_get_surface_capabilities2            = true,
-	.KHR_surface                              = true,
-	.KHR_surface_protected_capabilities       = true,
+   .KHR_get_surface_capabilities2 = true,
+   .KHR_surface = true,
+   .KHR_surface_protected_capabilities = true,
 #endif
 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
-	.KHR_wayland_surface                      = true,
+   .KHR_wayland_surface = true,
 #endif
 #ifdef VK_USE_PLATFORM_XCB_KHR
-	.KHR_xcb_surface                          = true,
+   .KHR_xcb_surface = true,
 #endif
 #ifdef VK_USE_PLATFORM_XLIB_KHR
-	.KHR_xlib_surface                         = true,
+   .KHR_xlib_surface = true,
 #endif
 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
-	.EXT_acquire_xlib_display                 = true,
+   .EXT_acquire_xlib_display = true,
 #endif
 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
-	.KHR_display                              = true,
-	.KHR_get_display_properties2              = true,
-	.EXT_direct_mode_display                  = true,
-	.EXT_display_surface_counter              = true,
+   .KHR_display = true,
+   .KHR_get_display_properties2 = true,
+   .EXT_direct_mode_display = true,
+   .EXT_display_surface_counter = true,
 #endif
 };
 
@@ -371,3190 +366,3066 @@ static void
 radv_physical_device_get_supported_extensions(const struct radv_physical_device *device,
                                               struct vk_device_extension_table *ext)
 {
-	*ext = (struct vk_device_extension_table) {
-	.KHR_8bit_storage                      = true,
-	.KHR_16bit_storage                     = true,
-	.KHR_bind_memory2                      = true,
-	.KHR_buffer_device_address             = true,
-	.KHR_copy_commands2                    = true,
-	.KHR_create_renderpass2                = true,
-	.KHR_dedicated_allocation              = true,
-	.KHR_deferred_host_operations          = true,
-	.KHR_depth_stencil_resolve             = true,
-	.KHR_descriptor_update_template        = true,
-	.KHR_device_group                      = true,
-	.KHR_draw_indirect_count               = true,
-	.KHR_driver_properties                 = true,
-	.KHR_external_fence                    = true,
-	.KHR_external_fence_fd                 = true,
-	.KHR_external_memory                   = true,
-	.KHR_external_memory_fd                = true,
-	.KHR_external_semaphore                = true,
-	.KHR_external_semaphore_fd             = true,
-	.KHR_fragment_shading_rate             = device->rad_info.chip_class >= GFX10_3,
-	.KHR_get_memory_requirements2          = true,
-	.KHR_image_format_list                 = true,
-	.KHR_imageless_framebuffer             = true,
+   *ext = (struct vk_device_extension_table){
+      .KHR_8bit_storage = true,
+      .KHR_16bit_storage = true,
+      .KHR_bind_memory2 = true,
+      .KHR_buffer_device_address = true,
+      .KHR_copy_commands2 = true,
+      .KHR_create_renderpass2 = true,
+      .KHR_dedicated_allocation = true,
+      .KHR_deferred_host_operations = true,
+      .KHR_depth_stencil_resolve = true,
+      .KHR_descriptor_update_template = true,
+      .KHR_device_group = true,
+      .KHR_draw_indirect_count = true,
+      .KHR_driver_properties = true,
+      .KHR_external_fence = true,
+      .KHR_external_fence_fd = true,
+      .KHR_external_memory = true,
+      .KHR_external_memory_fd = true,
+      .KHR_external_semaphore = true,
+      .KHR_external_semaphore_fd = true,
+      .KHR_fragment_shading_rate = device->rad_info.chip_class >= GFX10_3,
+      .KHR_get_memory_requirements2 = true,
+      .KHR_image_format_list = true,
+      .KHR_imageless_framebuffer = true,
 #ifdef RADV_USE_WSI_PLATFORM
-	.KHR_incremental_present               = true,
+      .KHR_incremental_present = true,
 #endif
-	.KHR_maintenance1                      = true,
-	.KHR_maintenance2                      = true,
-	.KHR_maintenance3                      = true,
-	.KHR_multiview                         = true,
-	.KHR_pipeline_executable_properties    = true,
-	.KHR_push_descriptor                   = true,
-	.KHR_relaxed_block_layout              = true,
-	.KHR_sampler_mirror_clamp_to_edge      = true,
-	.KHR_sampler_ycbcr_conversion          = true,
-	.KHR_separate_depth_stencil_layouts    = true,
-	.KHR_shader_atomic_int64               = LLVM_VERSION_MAJOR >= 9 || !device->use_llvm,
-	.KHR_shader_clock                      = true,
-	.KHR_shader_draw_parameters            = true,
-	.KHR_shader_float16_int8               = true,
-	.KHR_shader_float_controls             = true,
-	.KHR_shader_non_semantic_info          = true,
-	.KHR_shader_subgroup_extended_types    = true,
-	.KHR_shader_terminate_invocation       = true,
-	.KHR_spirv_1_4                         = true,
-	.KHR_storage_buffer_storage_class      = true,
+      .KHR_maintenance1 = true,
+      .KHR_maintenance2 = true,
+      .KHR_maintenance3 = true,
+      .KHR_multiview = true,
+      .KHR_pipeline_executable_properties = true,
+      .KHR_push_descriptor = true,
+      .KHR_relaxed_block_layout = true,
+      .KHR_sampler_mirror_clamp_to_edge = true,
+      .KHR_sampler_ycbcr_conversion = true,
+      .KHR_separate_depth_stencil_layouts = true,
+      .KHR_shader_atomic_int64 = LLVM_VERSION_MAJOR >= 9 || !device->use_llvm,
+      .KHR_shader_clock = true,
+      .KHR_shader_draw_parameters = true,
+      .KHR_shader_float16_int8 = true,
+      .KHR_shader_float_controls = true,
+      .KHR_shader_non_semantic_info = true,
+      .KHR_shader_subgroup_extended_types = true,
+      .KHR_shader_terminate_invocation = true,
+      .KHR_spirv_1_4 = true,
+      .KHR_storage_buffer_storage_class = true,
 #ifdef RADV_USE_WSI_PLATFORM
-	.KHR_swapchain                         = true,
-	.KHR_swapchain_mutable_format          = true,
+      .KHR_swapchain = true,
+      .KHR_swapchain_mutable_format = true,
 #endif
-	.KHR_timeline_semaphore                = true,
-	.KHR_uniform_buffer_standard_layout    = true,
-	.KHR_variable_pointers                 = true,
-	.KHR_vulkan_memory_model               = true,
-	.KHR_workgroup_memory_explicit_layout  = true,
-	.KHR_zero_initialize_workgroup_memory  = true,
-	.EXT_4444_formats                      = true,
-	.EXT_buffer_device_address             = true,
-	.EXT_calibrated_timestamps             = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,
-	.EXT_conditional_rendering             = true,
-	.EXT_conservative_rasterization        = device->rad_info.chip_class >= GFX9,
-	.EXT_custom_border_color               = true,
-	.EXT_debug_marker                      = radv_thread_trace_enabled(),
-	.EXT_depth_clip_enable                 = true,
-	.EXT_depth_range_unrestricted          = true,
-	.EXT_descriptor_indexing               = true,
-	.EXT_discard_rectangles                = true,
+      .KHR_timeline_semaphore = true,
+      .KHR_uniform_buffer_standard_layout = true,
+      .KHR_variable_pointers = true,
+      .KHR_vulkan_memory_model = true,
+      .KHR_workgroup_memory_explicit_layout = true,
+      .KHR_zero_initialize_workgroup_memory = true,
+      .EXT_4444_formats = true,
+      .EXT_buffer_device_address = true,
+      .EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,
+      .EXT_conditional_rendering = true,
+      .EXT_conservative_rasterization = device->rad_info.chip_class >= GFX9,
+      .EXT_custom_border_color = true,
+      .EXT_debug_marker = radv_thread_trace_enabled(),
+      .EXT_depth_clip_enable = true,
+      .EXT_depth_range_unrestricted = true,
+      .EXT_descriptor_indexing = true,
+      .EXT_discard_rectangles = true,
 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
-	.EXT_display_control                   = true,
+      .EXT_display_control = true,
 #endif
-	.EXT_extended_dynamic_state            = true,
-	.EXT_external_memory_dma_buf           = true,
-	.EXT_external_memory_host              = device->rad_info.has_userptr,
-	.EXT_global_priority                   = true,
-	.EXT_host_query_reset                  = true,
-	.EXT_image_drm_format_modifier         = device->rad_info.chip_class >= GFX9,
-	.EXT_image_robustness                  = true,
-	.EXT_index_type_uint8                  = device->rad_info.chip_class >= GFX8,
-	.EXT_inline_uniform_block              = true,
-	.EXT_line_rasterization                = true,
-	.EXT_memory_budget                     = true,
-	.EXT_memory_priority                   = true,
-	.EXT_pci_bus_info                      = true,
-	.EXT_pipeline_creation_cache_control   = true,
-	.EXT_pipeline_creation_feedback        = true,
-	.EXT_post_depth_coverage               = device->rad_info.chip_class >= GFX10,
-	.EXT_private_data                      = true,
-	.EXT_queue_family_foreign              = true,
-	.EXT_robustness2                       = true,
-	.EXT_sample_locations                  = device->rad_info.chip_class < GFX10,
-	.EXT_sampler_filter_minmax             = true,
-	.EXT_scalar_block_layout               = device->rad_info.chip_class >= GFX7,
-	.EXT_shader_atomic_float               = true,
-	.EXT_shader_demote_to_helper_invocation = LLVM_VERSION_MAJOR >= 9 || !device->use_llvm,
-	.EXT_shader_image_atomic_int64         = LLVM_VERSION_MAJOR >= 11 || !device->use_llvm,
-	.EXT_shader_stencil_export             = true,
-	.EXT_shader_subgroup_ballot            = true,
-	.EXT_shader_subgroup_vote              = true,
-	.EXT_shader_viewport_index_layer       = true,
-	.EXT_subgroup_size_control             = true,
-	.EXT_texel_buffer_alignment            = true,
-	.EXT_transform_feedback                = true,
-	.EXT_vertex_attribute_divisor          = true,
-	.EXT_ycbcr_image_arrays                = true,
-	.AMD_buffer_marker                     = true,
-	.AMD_device_coherent_memory            = true,
-	.AMD_draw_indirect_count               = true,
-	.AMD_gcn_shader                        = true,
-	.AMD_gpu_shader_half_float             = device->rad_info.has_packed_math_16bit,
-	.AMD_gpu_shader_int16                  = device->rad_info.has_packed_math_16bit,
-	.AMD_memory_overallocation_behavior    = true,
-	.AMD_mixed_attachment_samples          = true,
-	.AMD_rasterization_order               = device->rad_info.has_out_of_order_rast,
-	.AMD_shader_ballot                     = true,
-	.AMD_shader_core_properties            = true,
-	.AMD_shader_core_properties2           = true,
-	.AMD_shader_explicit_vertex_parameter  = true,
-	.AMD_shader_fragment_mask              = true,
-	.AMD_shader_image_load_store_lod       = true,
-	.AMD_shader_info                       = true,
-	.AMD_shader_trinary_minmax             = true,
-	.AMD_texture_gather_bias_lod           = true,
+      .EXT_extended_dynamic_state = true,
+      .EXT_external_memory_dma_buf = true,
+      .EXT_external_memory_host = device->rad_info.has_userptr,
+      .EXT_global_priority = true,
+      .EXT_host_query_reset = true,
+      .EXT_image_drm_format_modifier = device->rad_info.chip_class >= GFX9,
+      .EXT_image_robustness = true,
+      .EXT_index_type_uint8 = device->rad_info.chip_class >= GFX8,
+      .EXT_inline_uniform_block = true,
+      .EXT_line_rasterization = true,
+      .EXT_memory_budget = true,
+      .EXT_memory_priority = true,
+      .EXT_pci_bus_info = true,
+      .EXT_pipeline_creation_cache_control = true,
+      .EXT_pipeline_creation_feedback = true,
+      .EXT_post_depth_coverage = device->rad_info.chip_class >= GFX10,
+      .EXT_private_data = true,
+      .EXT_queue_family_foreign = true,
+      .EXT_robustness2 = true,
+      .EXT_sample_locations = device->rad_info.chip_class < GFX10,
+      .EXT_sampler_filter_minmax = true,
+      .EXT_scalar_block_layout = device->rad_info.chip_class >= GFX7,
+      .EXT_shader_atomic_float = true,
+      .EXT_shader_demote_to_helper_invocation = LLVM_VERSION_MAJOR >= 9 || !device->use_llvm,
+      .EXT_shader_image_atomic_int64 = LLVM_VERSION_MAJOR >= 11 || !device->use_llvm,
+      .EXT_shader_stencil_export = true,
+      .EXT_shader_subgroup_ballot = true,
+      .EXT_shader_subgroup_vote = true,
+      .EXT_shader_viewport_index_layer = true,
+      .EXT_subgroup_size_control = true,
+      .EXT_texel_buffer_alignment = true,
+      .EXT_transform_feedback = true,
+      .EXT_vertex_attribute_divisor = true,
+      .EXT_ycbcr_image_arrays = true,
+      .AMD_buffer_marker = true,
+      .AMD_device_coherent_memory = true,
+      .AMD_draw_indirect_count = true,
+      .AMD_gcn_shader = true,
+      .AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit,
+      .AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit,
+      .AMD_memory_overallocation_behavior = true,
+      .AMD_mixed_attachment_samples = true,
+      .AMD_rasterization_order = device->rad_info.has_out_of_order_rast,
+      .AMD_shader_ballot = true,
+      .AMD_shader_core_properties = true,
+      .AMD_shader_core_properties2 = true,
+      .AMD_shader_explicit_vertex_parameter = true,
+      .AMD_shader_fragment_mask = true,
+      .AMD_shader_image_load_store_lod = true,
+      .AMD_shader_info = true,
+      .AMD_shader_trinary_minmax = true,
+      .AMD_texture_gather_bias_lod = true,
 #ifdef ANDROID
-	.ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
-	.ANDROID_native_buffer                 = true,
+      .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
+      .ANDROID_native_buffer = true,
 #endif
-	.GOOGLE_decorate_string                = true,
-	.GOOGLE_hlsl_functionality1            = true,
-	.GOOGLE_user_type                      = true,
-	.NV_compute_shader_derivatives         = true,
-	.VALVE_mutable_descriptor_type        = true,
+      .GOOGLE_decorate_string = true,
+      .GOOGLE_hlsl_functionality1 = true,
+      .GOOGLE_user_type = true,
+      .NV_compute_shader_derivatives = true,
+      .VALVE_mutable_descriptor_type = true,
    };
 }
 
 static VkResult
-radv_physical_device_try_create(struct radv_instance *instance,
-				drmDevicePtr drm_device,
-				struct radv_physical_device **device_out)
+radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
+                                struct radv_physical_device **device_out)
 {
-	VkResult result;
-	int fd = -1;
-	int master_fd = -1;
+   VkResult result;
+   int fd = -1;
+   int master_fd = -1;
 
 #ifdef _WIN32
-	assert(drm_device == NULL);
+   assert(drm_device == NULL);
 #else
-	if (drm_device) {
-		const char *path = drm_device->nodes[DRM_NODE_RENDER];
-		drmVersionPtr version;
+   if (drm_device) {
+      const char *path = drm_device->nodes[DRM_NODE_RENDER];
+      drmVersionPtr version;
 
-		fd = open(path, O_RDWR | O_CLOEXEC);
-		if (fd < 0) {
-			if (instance->debug_flags & RADV_DEBUG_STARTUP)
-				radv_logi("Could not open device '%s'", path);
+      fd = open(path, O_RDWR | O_CLOEXEC);
+      if (fd < 0) {
+         if (instance->debug_flags & RADV_DEBUG_STARTUP)
+            radv_logi("Could not open device '%s'", path);
 
-			return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
-		}
+         return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
+      }
 
-		version = drmGetVersion(fd);
-		if (!version) {
-			close(fd);
+      version = drmGetVersion(fd);
+      if (!version) {
+         close(fd);
 
-			if (instance->debug_flags & RADV_DEBUG_STARTUP)
-				radv_logi("Could not get the kernel driver version for device '%s'", path);
+         if (instance->debug_flags & RADV_DEBUG_STARTUP)
+            radv_logi("Could not get the kernel driver version for device '%s'", path);
 
-			return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
-					 "failed to get version %s: %m", path);
-		}
+         return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "failed to get version %s: %m",
+                          path);
+      }
 
-		if (strcmp(version->name, "amdgpu")) {
-			drmFreeVersion(version);
-			close(fd);
+      if (strcmp(version->name, "amdgpu")) {
+         drmFreeVersion(version);
+         close(fd);
 
-			if (instance->debug_flags & RADV_DEBUG_STARTUP)
-				radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
+         if (instance->debug_flags & RADV_DEBUG_STARTUP)
+            radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
 
-			return VK_ERROR_INCOMPATIBLE_DRIVER;
-		}
-		drmFreeVersion(version);
+         return VK_ERROR_INCOMPATIBLE_DRIVER;
+      }
+      drmFreeVersion(version);
 
-		if (instance->debug_flags & RADV_DEBUG_STARTUP)
-				radv_logi("Found compatible device '%s'.", path);
-	}
+      if (instance->debug_flags & RADV_DEBUG_STARTUP)
+         radv_logi("Found compatible device '%s'.", path);
+   }
 #endif
 
-	struct radv_physical_device *device =
-		vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,
-			   VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
-	if (!device) {
-		result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-		goto fail_fd;
-	}
+   struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,
+                                                    VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+   if (!device) {
+      result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto fail_fd;
+   }
 
-	struct vk_physical_device_dispatch_table dispatch_table;
-	vk_physical_device_dispatch_table_from_entrypoints(
-		&dispatch_table, &radv_physical_device_entrypoints, true);
+   struct vk_physical_device_dispatch_table dispatch_table;
+   vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
+                                                      &radv_physical_device_entrypoints, true);
 
-	result = vk_physical_device_init(&device->vk, &instance->vk, NULL,
-					 &dispatch_table);
-	if (result != VK_SUCCESS) {
-		goto fail_alloc;
-	}
+   result = vk_physical_device_init(&device->vk, &instance->vk, NULL, &dispatch_table);
+   if (result != VK_SUCCESS) {
+      goto fail_alloc;
+   }
 
-	device->instance = instance;
+   device->instance = instance;
 
 #ifdef _WIN32
-	device->ws = radv_null_winsys_create();
+   device->ws = radv_null_winsys_create();
 #else
-	if (drm_device) {
-		device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
-						       instance->perftest_flags);
-	} else {
-		device->ws = radv_null_winsys_create();
-	}
+   if (drm_device) {
+      device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags);
+   } else {
+      device->ws = radv_null_winsys_create();
+   }
 #endif
 
-	if (!device->ws) {
-		result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
-				   "failed to initialize winsys");
-		goto fail_base;
-	}
+   if (!device->ws) {
+      result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
+      goto fail_base;
+   }
 
 #ifndef _WIN32
-	if (drm_device && instance->vk.enabled_extensions.KHR_display) {
-		master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
-		if (master_fd >= 0) {
-			uint32_t accel_working = 0;
-			struct drm_amdgpu_info request = {
-				.return_pointer = (uintptr_t)&accel_working,
-				.return_size = sizeof(accel_working),
-				.query = AMDGPU_INFO_ACCEL_WORKING
-			};
-
-			if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
-				close(master_fd);
-				master_fd = -1;
-			}
-		}
-	}
+   if (drm_device && instance->vk.enabled_extensions.KHR_display) {
+      master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
+      if (master_fd >= 0) {
+         uint32_t accel_working = 0;
+         struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
+                                           .return_size = sizeof(accel_working),
+                                           .query = AMDGPU_INFO_ACCEL_WORKING};
+
+         if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) <
+                0 ||
+             !accel_working) {
+            close(master_fd);
+            master_fd = -1;
+         }
+      }
+   }
 #endif
 
-	device->master_fd = master_fd;
-	device->local_fd = fd;
-	device->ws->query_info(device->ws, &device->rad_info);
+   device->master_fd = master_fd;
+   device->local_fd = fd;
+   device->ws->query_info(device->ws, &device->rad_info);
 
-	device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
+   device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
 
-	snprintf(device->name, sizeof(device->name),
-		 "AMD RADV %s (%s)",
-		 device->rad_info.name, radv_get_compiler_string(device));
+   snprintf(device->name, sizeof(device->name), "AMD RADV %s (%s)", device->rad_info.name,
+            radv_get_compiler_string(device));
 
 #ifdef ENABLE_SHADER_CACHE
-	if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
-		result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
-				   "cannot generate UUID");
-		goto fail_wsi;
-	}
-
-	/* These flags affect shader compilation. */
-	uint64_t shader_env_flags = (device->use_llvm ? 0 : 0x2);
-
-	/* The gpu id is already embedded in the uuid so we just pass "radv"
-	 * when creating the cache.
-	 */
-	char buf[VK_UUID_SIZE * 2 + 1];
-	disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
-	device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
+   if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
+      result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
+      goto fail_wsi;
+   }
+
+   /* These flags affect shader compilation. */
+   uint64_t shader_env_flags = (device->use_llvm ? 0 : 0x2);
+
+   /* The gpu id is already embedded in the uuid so we just pass "radv"
+    * when creating the cache.
+    */
+   char buf[VK_UUID_SIZE * 2 + 1];
+   disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
+   device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
 #endif
 
-	if (device->rad_info.chip_class < GFX8 ||
-	    device->rad_info.chip_class > GFX10)
-		fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
+   if (device->rad_info.chip_class < GFX8 || device->rad_info.chip_class > GFX10)
+      fprintf(stderr,
+              "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
 
-	radv_get_driver_uuid(&device->driver_uuid);
-	radv_get_device_uuid(&device->rad_info, &device->device_uuid);
+   radv_get_driver_uuid(&device->driver_uuid);
+   radv_get_device_uuid(&device->rad_info, &device->device_uuid);
 
-	device->out_of_order_rast_allowed = device->rad_info.has_out_of_order_rast &&
-					    !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
+   device->out_of_order_rast_allowed =
+      device->rad_info.has_out_of_order_rast &&
+      !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
 
-	device->dcc_msaa_allowed =
-		(device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
+   device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
 
-	device->use_ngg = device->rad_info.chip_class >= GFX10 &&
-			  device->rad_info.family != CHIP_NAVI14 &&
-			  !(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
+   device->use_ngg = device->rad_info.chip_class >= GFX10 &&
+                     device->rad_info.family != CHIP_NAVI14 &&
+                     !(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
 
-	device->use_ngg_streamout = false;
+   device->use_ngg_streamout = false;
 
-	/* Determine the number of threads per wave for all stages. */
-	device->cs_wave_size = 64;
-	device->ps_wave_size = 64;
-	device->ge_wave_size = 64;
+   /* Determine the number of threads per wave for all stages. */
+   device->cs_wave_size = 64;
+   device->ps_wave_size = 64;
+   device->ge_wave_size = 64;
 
-	if (device->rad_info.chip_class >= GFX10) {
-		if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
-			device->cs_wave_size = 32;
+   if (device->rad_info.chip_class >= GFX10) {
+      if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
+         device->cs_wave_size = 32;
 
-		/* For pixel shaders, wave64 is recommanded. */
-		if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
-			device->ps_wave_size = 32;
+      /* For pixel shaders, wave64 is recommanded. */
+      if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
+         device->ps_wave_size = 32;
 
-		if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
-			device->ge_wave_size = 32;
-	}
+      if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
+         device->ge_wave_size = 32;
+   }
 
-	radv_physical_device_init_mem_types(device);
+   radv_physical_device_init_mem_types(device);
 
-	radv_physical_device_get_supported_extensions(device,
-						      &device->vk.supported_extensions);
+   radv_physical_device_get_supported_extensions(device, &device->vk.supported_extensions);
 
 #ifndef _WIN32
-	if (drm_device)
-		device->bus_info = *drm_device->businfo.pci;
+   if (drm_device)
+      device->bus_info = *drm_device->businfo.pci;
 #endif
 
-	if ((device->instance->debug_flags & RADV_DEBUG_INFO))
-		ac_print_gpu_info(&device->rad_info, stdout);
+   if ((device->instance->debug_flags & RADV_DEBUG_INFO))
+      ac_print_gpu_info(&device->rad_info, stdout);
 
-	/* The WSI is structured as a layer on top of the driver, so this has
-	 * to be the last part of initialization (at least until we get other
-	 * semi-layers).
-	 */
-	result = radv_init_wsi(device);
-	if (result != VK_SUCCESS) {
-		vk_error(instance, result);
-		goto fail_disk_cache;
-	}
+   /* The WSI is structured as a layer on top of the driver, so this has
+    * to be the last part of initialization (at least until we get other
+    * semi-layers).
+    */
+   result = radv_init_wsi(device);
+   if (result != VK_SUCCESS) {
+      vk_error(instance, result);
+      goto fail_disk_cache;
+   }
 
-	*device_out = device;
+   *device_out = device;
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 
 fail_disk_cache:
-	disk_cache_destroy(device->disk_cache);
+   disk_cache_destroy(device->disk_cache);
 fail_wsi:
-	device->ws->destroy(device->ws);
+   device->ws->destroy(device->ws);
 fail_base:
-	vk_physical_device_finish(&device->vk);
+   vk_physical_device_finish(&device->vk);
 fail_alloc:
-	vk_free(&instance->vk.alloc, device);
+   vk_free(&instance->vk.alloc, device);
 fail_fd:
-	if (fd != -1)
-		close(fd);
-	if (master_fd != -1)
-		close(master_fd);
-	return result;
+   if (fd != -1)
+      close(fd);
+   if (master_fd != -1)
+      close(master_fd);
+   return result;
 }
 
 static void
 radv_physical_device_destroy(struct radv_physical_device *device)
 {
-	radv_finish_wsi(device);
-	device->ws->destroy(device->ws);
-	disk_cache_destroy(device->disk_cache);
-	if (device->local_fd != -1)
-		close(device->local_fd);
-	if (device->master_fd != -1)
-		close(device->master_fd);
-	vk_physical_device_finish(&device->vk);
-	vk_free(&device->instance->vk.alloc, device);
+   radv_finish_wsi(device);
+   device->ws->destroy(device->ws);
+   disk_cache_destroy(device->disk_cache);
+   if (device->local_fd != -1)
+      close(device->local_fd);
+   if (device->master_fd != -1)
+      close(device->master_fd);
+   vk_physical_device_finish(&device->vk);
+   vk_free(&device->instance->vk.alloc, device);
 }
 
 static void *
 default_alloc_func(void *pUserData, size_t size, size_t align,
                    VkSystemAllocationScope allocationScope)
 {
-	return malloc(size);
+   return malloc(size);
 }
 
 static void *
-default_realloc_func(void *pUserData, void *pOriginal, size_t size,
-                     size_t align, VkSystemAllocationScope allocationScope)
+default_realloc_func(void *pUserData, void *pOriginal, size_t size, size_t align,
+                     VkSystemAllocationScope allocationScope)
 {
-	return realloc(pOriginal, size);
+   return realloc(pOriginal, size);
 }
 
 static void
 default_free_func(void *pUserData, void *pMemory)
 {
-	free(pMemory);
+   free(pMemory);
 }
 
 static const VkAllocationCallbacks default_alloc = {
-	.pUserData = NULL,
-	.pfnAllocation = default_alloc_func,
-	.pfnReallocation = default_realloc_func,
-	.pfnFree = default_free_func,
+   .pUserData = NULL,
+   .pfnAllocation = default_alloc_func,
+   .pfnReallocation = default_realloc_func,
+   .pfnFree = default_free_func,
 };
 
 static const struct debug_control radv_debug_options[] = {
-	{"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
-	{"nodcc", RADV_DEBUG_NO_DCC},
-	{"shaders", RADV_DEBUG_DUMP_SHADERS},
-	{"nocache", RADV_DEBUG_NO_CACHE},
-	{"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
-	{"nohiz", RADV_DEBUG_NO_HIZ},
-	{"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
-	{"allbos", RADV_DEBUG_ALL_BOS},
-	{"noibs", RADV_DEBUG_NO_IBS},
-	{"spirv", RADV_DEBUG_DUMP_SPIRV},
-	{"vmfaults", RADV_DEBUG_VM_FAULTS},
-	{"zerovram", RADV_DEBUG_ZERO_VRAM},
-	{"syncshaders", RADV_DEBUG_SYNC_SHADERS},
-	{"preoptir", RADV_DEBUG_PREOPTIR},
-	{"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
-	{"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
-	{"info", RADV_DEBUG_INFO},
-	{"errors", RADV_DEBUG_ERRORS},
-	{"startup", RADV_DEBUG_STARTUP},
-	{"checkir", RADV_DEBUG_CHECKIR},
-	{"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
-	{"nobinning", RADV_DEBUG_NOBINNING},
-	{"nongg", RADV_DEBUG_NO_NGG},
-	{"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
-	{"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
-	{"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
-	{"llvm", RADV_DEBUG_LLVM},
-	{"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
-	{"hang", RADV_DEBUG_HANG},
-	{"img", RADV_DEBUG_IMG},
-	{"noumr", RADV_DEBUG_NO_UMR},
-	{"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
-	{"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
-	{"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
-	{"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
-	{NULL, 0}
-};
+   {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
+   {"nodcc", RADV_DEBUG_NO_DCC},
+   {"shaders", RADV_DEBUG_DUMP_SHADERS},
+   {"nocache", RADV_DEBUG_NO_CACHE},
+   {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
+   {"nohiz", RADV_DEBUG_NO_HIZ},
+   {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
+   {"allbos", RADV_DEBUG_ALL_BOS},
+   {"noibs", RADV_DEBUG_NO_IBS},
+   {"spirv", RADV_DEBUG_DUMP_SPIRV},
+   {"vmfaults", RADV_DEBUG_VM_FAULTS},
+   {"zerovram", RADV_DEBUG_ZERO_VRAM},
+   {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
+   {"preoptir", RADV_DEBUG_PREOPTIR},
+   {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
+   {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
+   {"info", RADV_DEBUG_INFO},
+   {"errors", RADV_DEBUG_ERRORS},
+   {"startup", RADV_DEBUG_STARTUP},
+   {"checkir", RADV_DEBUG_CHECKIR},
+   {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
+   {"nobinning", RADV_DEBUG_NOBINNING},
+   {"nongg", RADV_DEBUG_NO_NGG},
+   {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
+   {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
+   {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
+   {"llvm", RADV_DEBUG_LLVM},
+   {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
+   {"hang", RADV_DEBUG_HANG},
+   {"img", RADV_DEBUG_IMG},
+   {"noumr", RADV_DEBUG_NO_UMR},
+   {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
+   {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
+   {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
+   {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
+   {NULL, 0}};
 
 const char *
 radv_get_debug_option_name(int id)
 {
-	assert(id < ARRAY_SIZE(radv_debug_options) - 1);
-	return radv_debug_options[id].string;
+   assert(id < ARRAY_SIZE(radv_debug_options) - 1);
+   return radv_debug_options[id].string;
 }
 
 static const struct debug_control radv_perftest_options[] = {
-	{"localbos", RADV_PERFTEST_LOCAL_BOS},
-	{"dccmsaa", RADV_PERFTEST_DCC_MSAA},
-	{"bolist", RADV_PERFTEST_BO_LIST},
-	{"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
-	{"cswave32", RADV_PERFTEST_CS_WAVE_32},
-	{"pswave32", RADV_PERFTEST_PS_WAVE_32},
-	{"gewave32", RADV_PERFTEST_GE_WAVE_32},
-	{"dfsm", RADV_PERFTEST_DFSM},
-	{"nosam", RADV_PERFTEST_NO_SAM},
-	{"sam", RADV_PERFTEST_SAM},
-	{"dccstores", RADV_PERFTEST_DCC_STORES},
-	{NULL, 0}
-};
+   {"localbos", RADV_PERFTEST_LOCAL_BOS},   {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
+   {"bolist", RADV_PERFTEST_BO_LIST},       {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
+   {"cswave32", RADV_PERFTEST_CS_WAVE_32},  {"pswave32", RADV_PERFTEST_PS_WAVE_32},
+   {"gewave32", RADV_PERFTEST_GE_WAVE_32},  {"dfsm", RADV_PERFTEST_DFSM},
+   {"nosam", RADV_PERFTEST_NO_SAM},         {"sam", RADV_PERFTEST_SAM},
+   {"dccstores", RADV_PERFTEST_DCC_STORES}, {NULL, 0}};
 
 const char *
 radv_get_perftest_option_name(int id)
 {
-	assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
-	return radv_perftest_options[id].string;
+   assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
+   return radv_perftest_options[id].string;
 }
 
 static void
-radv_handle_per_app_options(struct radv_instance *instance,
-			    const VkApplicationInfo *info)
-{
-	const char *name = info ? info->pApplicationName : NULL;
-	const char *engine_name = info ? info->pEngineName : NULL;
-
-	if (name) {
-		if (!strcmp(name, "DOOM_VFR")) {
-			/* Work around a Doom VFR game bug */
-			instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
-		} else if (!strcmp(name, "Fledge")) {
-			/*
-			 * Zero VRAM for "The Surge 2"
-			 *
-			 * This avoid a hang when when rendering any level. Likely
-			 * uninitialized data in an indirect draw.
-			 */
-			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
-		} else if (!strcmp(name, "No Man's Sky")) {
-			/* Work around a NMS game bug */
-			instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
-		} else if (!strcmp(name, "DOOMEternal")) {
-			/* Zero VRAM for Doom Eternal to fix rendering issues. */
-			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
-		} else if (!strcmp(name, "ShadowOfTheTomb")) {
-			/* Work around flickering foliage for native Shadow of the Tomb Raider
-			 * on GFX10.3 */
-			instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
-		}
-	}
-
-	if (engine_name) {
-		if (!strcmp(engine_name, "vkd3d")) {
-			/* Zero VRAM for all VKD3D (DX12->VK) games to fix
-			 * rendering issues.
-			 */
-			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
-		} else if (!strcmp(engine_name, "Quantic Dream Engine")) {
-			/* Fix various artifacts in Detroit: Become Human */
-			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM |
-			                         RADV_DEBUG_DISCARD_TO_DEMOTE;
-
-			/* Fix rendering issues in Detroit: Become Human
-			 * because the game uses render loops (it
-			 * samples/renders from/to the same depth/stencil
-			 * texture inside the same draw) without input
-			 * attachments and that is invalid Vulkan usage.
-			 */
-			instance->disable_tc_compat_htile_in_general = true;
-		}
-	}
-
-	instance->enable_mrt_output_nan_fixup =
-		driQueryOptionb(&instance->dri_options,
-				"radv_enable_mrt_output_nan_fixup");
-
-	instance->disable_shrink_image_store =
-		driQueryOptionb(&instance->dri_options,
-				"radv_disable_shrink_image_store");
-
-	if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
-		instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
+radv_handle_per_app_options(struct radv_instance *instance, const VkApplicationInfo *info)
+{
+   const char *name = info ? info->pApplicationName : NULL;
+   const char *engine_name = info ? info->pEngineName : NULL;
+
+   if (name) {
+      if (!strcmp(name, "DOOM_VFR")) {
+         /* Work around a Doom VFR game bug */
+         instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
+      } else if (!strcmp(name, "Fledge")) {
+         /*
+          * Zero VRAM for "The Surge 2"
+          *
+          * This avoid a hang when when rendering any level. Likely
+          * uninitialized data in an indirect draw.
+          */
+         instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
+      } else if (!strcmp(name, "No Man's Sky")) {
+         /* Work around a NMS game bug */
+         instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
+      } else if (!strcmp(name, "DOOMEternal")) {
+         /* Zero VRAM for Doom Eternal to fix rendering issues. */
+         instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
+      } else if (!strcmp(name, "ShadowOfTheTomb")) {
+         /* Work around flickering foliage for native Shadow of the Tomb Raider
+          * on GFX10.3 */
+         instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
+      }
+   }
+
+   if (engine_name) {
+      if (!strcmp(engine_name, "vkd3d")) {
+         /* Zero VRAM for all VKD3D (DX12->VK) games to fix
+          * rendering issues.
+          */
+         instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
+      } else if (!strcmp(engine_name, "Quantic Dream Engine")) {
+         /* Fix various artifacts in Detroit: Become Human */
+         instance->debug_flags |= RADV_DEBUG_ZERO_VRAM | RADV_DEBUG_DISCARD_TO_DEMOTE;
+
+         /* Fix rendering issues in Detroit: Become Human
+          * because the game uses render loops (it
+          * samples/renders from/to the same depth/stencil
+          * texture inside the same draw) without input
+          * attachments and that is invalid Vulkan usage.
+          */
+         instance->disable_tc_compat_htile_in_general = true;
+      }
+   }
+
+   instance->enable_mrt_output_nan_fixup =
+      driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup");
+
+   instance->disable_shrink_image_store =
+      driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store");
+
+   if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
+      instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
 }
 
 static const driOptionDescription radv_dri_options[] = {
-	DRI_CONF_SECTION_PERFORMANCE
-		DRI_CONF_ADAPTIVE_SYNC(true)
-		DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
-		DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
-		DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
-		DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
-		DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
-		DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)
-		DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
-		DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
-	DRI_CONF_SECTION_END
-
-	DRI_CONF_SECTION_DEBUG
-		DRI_CONF_OVERRIDE_VRAM_SIZE()
-		DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
-	DRI_CONF_SECTION_END
-};
+   DRI_CONF_SECTION_PERFORMANCE DRI_CONF_ADAPTIVE_SYNC(
+      true) DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0) DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
+      DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false) DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
+         DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
+            DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false) DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
+               DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0) DRI_CONF_SECTION_END
+
+                  DRI_CONF_SECTION_DEBUG DRI_CONF_OVERRIDE_VRAM_SIZE()
+                     DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false) DRI_CONF_SECTION_END};
 
-static void  radv_init_dri_options(struct radv_instance *instance)
+static void
+radv_init_dri_options(struct radv_instance *instance)
 {
-	driParseOptionInfo(&instance->available_dri_options, radv_dri_options, ARRAY_SIZE(radv_dri_options));
-	driParseConfigFiles(&instance->dri_options,
-	                    &instance->available_dri_options,
-	                    0, "radv", NULL,
-	                    instance->vk.app_info.app_name,
-	                    instance->vk.app_info.app_version,
-	                    instance->vk.app_info.engine_name,
-	                    instance->vk.app_info.engine_version);
+   driParseOptionInfo(&instance->available_dri_options, radv_dri_options,
+                      ARRAY_SIZE(radv_dri_options));
+   driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL,
+                       instance->vk.app_info.app_name, instance->vk.app_info.app_version,
+                       instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
+}
+
+VkResult
+radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
+                    const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
+{
+   struct radv_instance *instance;
+   VkResult result;
+
+   if (!pAllocator)
+      pAllocator = &default_alloc;
+
+   instance = vk_zalloc(pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+   if (!instance)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   struct vk_instance_dispatch_table dispatch_table;
+   vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true);
+   result = vk_instance_init(&instance->vk, &radv_instance_extensions_supported, &dispatch_table,
+                             pCreateInfo, pAllocator);
+   if (result != VK_SUCCESS) {
+      vk_free(pAllocator, instance);
+      return vk_error(instance, result);
+   }
+
+   instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options);
+
+   const char *radv_perftest_str = getenv("RADV_PERFTEST");
+   instance->perftest_flags = parse_debug_string(radv_perftest_str, radv_perftest_options);
+
+   if (radv_perftest_str) {
+      /* Output warnings for famous RADV_PERFTEST options that no
+       * longer exist or are deprecated.
+       */
+      if (strstr(radv_perftest_str, "aco")) {
+         fprintf(
+            stderr,
+            "*******************************************************************************\n");
+         fprintf(
+            stderr,
+            "* WARNING: Unknown option RADV_PERFTEST='aco'. ACO is enabled by default now. *\n");
+         fprintf(
+            stderr,
+            "*******************************************************************************\n");
+      }
+      if (strstr(radv_perftest_str, "llvm")) {
+         fprintf(
+            stderr,
+            "*********************************************************************************\n");
+         fprintf(
+            stderr,
+            "* WARNING: Unknown option 'RADV_PERFTEST=llvm'. Did you mean 'RADV_DEBUG=llvm'? *\n");
+         fprintf(
+            stderr,
+            "*********************************************************************************\n");
+         abort();
+      }
+   }
+
+   if (instance->debug_flags & RADV_DEBUG_STARTUP)
+      radv_logi("Created an instance");
+
+   instance->physical_devices_enumerated = false;
+   list_inithead(&instance->physical_devices);
+
+   VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
+
+   radv_init_dri_options(instance);
+   radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
+
+   *pInstance = radv_instance_to_handle(instance);
+
+   return VK_SUCCESS;
 }
 
-VkResult radv_CreateInstance(
-	const VkInstanceCreateInfo*                 pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkInstance*                                 pInstance)
+void
+radv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator)
 {
-	struct radv_instance *instance;
-	VkResult result;
+   RADV_FROM_HANDLE(radv_instance, instance, _instance);
 
-	if (!pAllocator)
-		pAllocator = &default_alloc;
+   if (!instance)
+      return;
 
-	instance = vk_zalloc(pAllocator, sizeof(*instance), 8,
-			     VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
-	if (!instance)
-		return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+   list_for_each_entry_safe(struct radv_physical_device, pdevice, &instance->physical_devices, link)
+   {
+      radv_physical_device_destroy(pdevice);
+   }
 
-	struct vk_instance_dispatch_table dispatch_table;
-	vk_instance_dispatch_table_from_entrypoints(
-		&dispatch_table, &radv_instance_entrypoints, true);
-	result = vk_instance_init(&instance->vk,
-				  &radv_instance_extensions_supported,
-				  &dispatch_table,
-				  pCreateInfo, pAllocator);
-	if (result != VK_SUCCESS) {
-		vk_free(pAllocator, instance);
-		return vk_error(instance, result);
-	}
+   VG(VALGRIND_DESTROY_MEMPOOL(instance));
 
-	instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
-						   radv_debug_options);
+   driDestroyOptionCache(&instance->dri_options);
+   driDestroyOptionInfo(&instance->available_dri_options);
 
-	const char *radv_perftest_str = getenv("RADV_PERFTEST");
-	instance->perftest_flags = parse_debug_string(radv_perftest_str,
-						      radv_perftest_options);
+   vk_instance_finish(&instance->vk);
+   vk_free(&instance->vk.alloc, instance);
+}
 
-	if (radv_perftest_str) {
-		/* Output warnings for famous RADV_PERFTEST options that no
-		 * longer exist or are deprecated.
-		 */
-		if (strstr(radv_perftest_str, "aco")) {
-			fprintf(stderr, "*******************************************************************************\n");
-			fprintf(stderr, "* WARNING: Unknown option RADV_PERFTEST='aco'. ACO is enabled by default now. *\n");
-			fprintf(stderr, "*******************************************************************************\n");
-		}
-		if (strstr(radv_perftest_str, "llvm")) {
-			fprintf(stderr, "*********************************************************************************\n");
-			fprintf(stderr, "* WARNING: Unknown option 'RADV_PERFTEST=llvm'. Did you mean 'RADV_DEBUG=llvm'? *\n");
-			fprintf(stderr, "*********************************************************************************\n");
-			abort();
-		}
-	}
+static VkResult
+radv_enumerate_physical_devices(struct radv_instance *instance)
+{
+   if (instance->physical_devices_enumerated)
+      return VK_SUCCESS;
 
-	if (instance->debug_flags & RADV_DEBUG_STARTUP)
-		radv_logi("Created an instance");
+   instance->physical_devices_enumerated = true;
 
-	instance->physical_devices_enumerated = false;
-	list_inithead(&instance->physical_devices);
+   VkResult result = VK_SUCCESS;
 
-	VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
+   if (getenv("RADV_FORCE_FAMILY")) {
+      /* When RADV_FORCE_FAMILY is set, the driver creates a nul
+       * device that allows to test the compiler without having an
+       * AMDGPU instance.
+       */
+      struct radv_physical_device *pdevice;
 
-	radv_init_dri_options(instance);
-	radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
+      result = radv_physical_device_try_create(instance, NULL, &pdevice);
+      if (result != VK_SUCCESS)
+         return result;
 
-	*pInstance = radv_instance_to_handle(instance);
+      list_addtail(&pdevice->link, &instance->physical_devices);
+      return VK_SUCCESS;
+   }
 
-	return VK_SUCCESS;
+#ifndef _WIN32
+   /* TODO: Check for more devices ? */
+   drmDevicePtr devices[8];
+   int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
+
+   if (instance->debug_flags & RADV_DEBUG_STARTUP)
+      radv_logi("Found %d drm nodes", max_devices);
+
+   if (max_devices < 1)
+      return vk_error(instance, VK_SUCCESS);
+
+   for (unsigned i = 0; i < (unsigned)max_devices; i++) {
+      if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
+          devices[i]->bustype == DRM_BUS_PCI &&
+          devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
+
+         struct radv_physical_device *pdevice;
+         result = radv_physical_device_try_create(instance, devices[i], &pdevice);
+         /* Incompatible DRM device, skip. */
+         if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
+            result = VK_SUCCESS;
+            continue;
+         }
+
+         /* Error creating the physical device, report the error. */
+         if (result != VK_SUCCESS)
+            break;
+
+         list_addtail(&pdevice->link, &instance->physical_devices);
+      }
+   }
+   drmFreeDevices(devices, max_devices);
+#endif
+
+   /* If we successfully enumerated any devices, call it success */
+   return result;
 }
 
-void radv_DestroyInstance(
-	VkInstance                                  _instance,
-	const VkAllocationCallbacks*                pAllocator)
+VkResult
+radv_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount,
+                              VkPhysicalDevice *pPhysicalDevices)
 {
-	RADV_FROM_HANDLE(radv_instance, instance, _instance);
-
-	if (!instance)
-		return;
+   RADV_FROM_HANDLE(radv_instance, instance, _instance);
+   VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount);
 
-	list_for_each_entry_safe(struct radv_physical_device, pdevice,
-				 &instance->physical_devices, link) {
-		radv_physical_device_destroy(pdevice);
-	}
+   VkResult result = radv_enumerate_physical_devices(instance);
+   if (result != VK_SUCCESS)
+      return result;
 
-	VG(VALGRIND_DESTROY_MEMPOOL(instance));
+   list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
+   {
+      vk_outarray_append_typed(VkPhysicalDevice, &out, i)
+      {
+         *i = radv_physical_device_to_handle(pdevice);
+      }
+   }
 
-	driDestroyOptionCache(&instance->dri_options);
-	driDestroyOptionInfo(&instance->available_dri_options);
-
-	vk_instance_finish(&instance->vk);
-	vk_free(&instance->vk.alloc, instance);
+   return vk_outarray_status(&out);
 }
 
-static VkResult
-radv_enumerate_physical_devices(struct radv_instance *instance)
+VkResult
+radv_EnumeratePhysicalDeviceGroups(VkInstance _instance, uint32_t *pPhysicalDeviceGroupCount,
+                                   VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
 {
-	if (instance->physical_devices_enumerated)
-		return VK_SUCCESS;
-
-	instance->physical_devices_enumerated = true;
-
-	VkResult result = VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_instance, instance, _instance);
+   VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, pPhysicalDeviceGroupProperties,
+                          pPhysicalDeviceGroupCount);
 
-	if (getenv("RADV_FORCE_FAMILY")) {
-		/* When RADV_FORCE_FAMILY is set, the driver creates a nul
-		 * device that allows to test the compiler without having an
-		 * AMDGPU instance.
-		 */
-		struct radv_physical_device *pdevice;
+   VkResult result = radv_enumerate_physical_devices(instance);
+   if (result != VK_SUCCESS)
+      return result;
 
-		result = radv_physical_device_try_create(instance, NULL, &pdevice);
-		if (result != VK_SUCCESS)
-			return result;
+   list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
+   {
+      vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
+      {
+         p->physicalDeviceCount = 1;
+         memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
+         p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
+         p->subsetAllocation = false;
+      }
+   }
 
-		list_addtail(&pdevice->link, &instance->physical_devices);
-		return VK_SUCCESS;
-	}
-
-#ifndef _WIN32
-	/* TODO: Check for more devices ? */
-	drmDevicePtr devices[8];
-	int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
-
-	if (instance->debug_flags & RADV_DEBUG_STARTUP)
-		radv_logi("Found %d drm nodes", max_devices);
-
-	if (max_devices < 1)
-		return vk_error(instance, VK_SUCCESS);
-
-	for (unsigned i = 0; i < (unsigned)max_devices; i++) {
-		if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
-		    devices[i]->bustype == DRM_BUS_PCI &&
-		    devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
-
-			struct radv_physical_device *pdevice;
-			result = radv_physical_device_try_create(instance, devices[i],
-								 &pdevice);
-			/* Incompatible DRM device, skip. */
-			if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
-				result = VK_SUCCESS;
-				continue;
-			}
-
-			/* Error creating the physical device, report the error. */
-			if (result != VK_SUCCESS)
-				break;
-
-			list_addtail(&pdevice->link, &instance->physical_devices);
-		}
-	}
-	drmFreeDevices(devices, max_devices);
-#endif
+   return vk_outarray_status(&out);
+}
 
-	/* If we successfully enumerated any devices, call it success */
-	return result;
-}
-
-VkResult radv_EnumeratePhysicalDevices(
-	VkInstance                                  _instance,
-	uint32_t*                                   pPhysicalDeviceCount,
-	VkPhysicalDevice*                           pPhysicalDevices)
-{
-	RADV_FROM_HANDLE(radv_instance, instance, _instance);
-	VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices,
-			       pPhysicalDeviceCount);
-
-	VkResult result = radv_enumerate_physical_devices(instance);
-	if (result != VK_SUCCESS)
-		return result;
-
-	list_for_each_entry(struct radv_physical_device, pdevice,
-			    &instance->physical_devices, link) {
-		vk_outarray_append_typed(VkPhysicalDevice , &out, i) {
-			*i = radv_physical_device_to_handle(pdevice);
-		}
-	}
-
-	return vk_outarray_status(&out);
-}
-
-VkResult radv_EnumeratePhysicalDeviceGroups(
-    VkInstance                                  _instance,
-    uint32_t*                                   pPhysicalDeviceGroupCount,
-    VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties)
-{
-	RADV_FROM_HANDLE(radv_instance, instance, _instance);
-	VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
-			       pPhysicalDeviceGroupProperties,
-			       pPhysicalDeviceGroupCount);
-
-	VkResult result = radv_enumerate_physical_devices(instance);
-	if (result != VK_SUCCESS)
-		return result;
-
-	list_for_each_entry(struct radv_physical_device, pdevice,
-			    &instance->physical_devices, link) {
-		vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p) {
-			p->physicalDeviceCount = 1;
-			memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
-			p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
-			p->subsetAllocation = false;
-		}
-	}
-
-	return vk_outarray_status(&out);
-}
-
-void radv_GetPhysicalDeviceFeatures(
-	VkPhysicalDevice                            physicalDevice,
-	VkPhysicalDeviceFeatures*                   pFeatures)
-{
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-	memset(pFeatures, 0, sizeof(*pFeatures));
-
-	*pFeatures = (VkPhysicalDeviceFeatures) {
-		.robustBufferAccess                       = true,
-		.fullDrawIndexUint32                      = true,
-		.imageCubeArray                           = true,
-		.independentBlend                         = true,
-		.geometryShader                           = true,
-		.tessellationShader                       = true,
-		.sampleRateShading                        = true,
-		.dualSrcBlend                             = true,
-		.logicOp                                  = true,
-		.multiDrawIndirect                        = true,
-		.drawIndirectFirstInstance                = true,
-		.depthClamp                               = true,
-		.depthBiasClamp                           = true,
-		.fillModeNonSolid                         = true,
-		.depthBounds                              = true,
-		.wideLines                                = true,
-		.largePoints                              = true,
-		.alphaToOne                               = false,
-		.multiViewport                            = true,
-		.samplerAnisotropy                        = true,
-		.textureCompressionETC2                   = radv_device_supports_etc(pdevice),
-		.textureCompressionASTC_LDR               = false,
-		.textureCompressionBC                     = true,
-		.occlusionQueryPrecise                    = true,
-		.pipelineStatisticsQuery                  = true,
-		.vertexPipelineStoresAndAtomics           = true,
-		.fragmentStoresAndAtomics                 = true,
-		.shaderTessellationAndGeometryPointSize   = true,
-		.shaderImageGatherExtended                = true,
-		.shaderStorageImageExtendedFormats        = true,
-		.shaderStorageImageMultisample            = true,
-		.shaderUniformBufferArrayDynamicIndexing  = true,
-		.shaderSampledImageArrayDynamicIndexing   = true,
-		.shaderStorageBufferArrayDynamicIndexing  = true,
-		.shaderStorageImageArrayDynamicIndexing   = true,
-		.shaderStorageImageReadWithoutFormat      = true,
-		.shaderStorageImageWriteWithoutFormat     = true,
-		.shaderClipDistance                       = true,
-		.shaderCullDistance                       = true,
-		.shaderFloat64                            = true,
-		.shaderInt64                              = true,
-		.shaderInt16                              = true,
-		.sparseBinding                            = true,
-		.sparseResidencyBuffer                    = pdevice->rad_info.family >= CHIP_POLARIS10,
-		.sparseResidencyImage2D                   = pdevice->rad_info.family >= CHIP_POLARIS10,
-		.sparseResidencyAliased                   = pdevice->rad_info.family >= CHIP_POLARIS10,
-		.variableMultisampleRate                  = true,
-		.shaderResourceMinLod                     = true,
-		.shaderResourceResidency                  = true,
-		.inheritedQueries                         = true,
-	};
+void
+radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures)
+{
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+   memset(pFeatures, 0, sizeof(*pFeatures));
+
+   *pFeatures = (VkPhysicalDeviceFeatures){
+      .robustBufferAccess = true,
+      .fullDrawIndexUint32 = true,
+      .imageCubeArray = true,
+      .independentBlend = true,
+      .geometryShader = true,
+      .tessellationShader = true,
+      .sampleRateShading = true,
+      .dualSrcBlend = true,
+      .logicOp = true,
+      .multiDrawIndirect = true,
+      .drawIndirectFirstInstance = true,
+      .depthClamp = true,
+      .depthBiasClamp = true,
+      .fillModeNonSolid = true,
+      .depthBounds = true,
+      .wideLines = true,
+      .largePoints = true,
+      .alphaToOne = false,
+      .multiViewport = true,
+      .samplerAnisotropy = true,
+      .textureCompressionETC2 = radv_device_supports_etc(pdevice),
+      .textureCompressionASTC_LDR = false,
+      .textureCompressionBC = true,
+      .occlusionQueryPrecise = true,
+      .pipelineStatisticsQuery = true,
+      .vertexPipelineStoresAndAtomics = true,
+      .fragmentStoresAndAtomics = true,
+      .shaderTessellationAndGeometryPointSize = true,
+      .shaderImageGatherExtended = true,
+      .shaderStorageImageExtendedFormats = true,
+      .shaderStorageImageMultisample = true,
+      .shaderUniformBufferArrayDynamicIndexing = true,
+      .shaderSampledImageArrayDynamicIndexing = true,
+      .shaderStorageBufferArrayDynamicIndexing = true,
+      .shaderStorageImageArrayDynamicIndexing = true,
+      .shaderStorageImageReadWithoutFormat = true,
+      .shaderStorageImageWriteWithoutFormat = true,
+      .shaderClipDistance = true,
+      .shaderCullDistance = true,
+      .shaderFloat64 = true,
+      .shaderInt64 = true,
+      .shaderInt16 = true,
+      .sparseBinding = true,
+      .sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10,
+      .sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10,
+      .sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10,
+      .variableMultisampleRate = true,
+      .shaderResourceMinLod = true,
+      .shaderResourceResidency = true,
+      .inheritedQueries = true,
+   };
 }
 
 static void
 radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
-				      VkPhysicalDeviceVulkan11Features *f)
+                                      VkPhysicalDeviceVulkan11Features *f)
 {
-	assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
+   assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
 
-	f->storageBuffer16BitAccess            = true;
-	f->uniformAndStorageBuffer16BitAccess  = true;
-	f->storagePushConstant16               = true;
-	f->storageInputOutput16                = pdevice->rad_info.has_packed_math_16bit && (LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm);
-	f->multiview                           = true;
-	f->multiviewGeometryShader             = true;
-	f->multiviewTessellationShader         = true;
-	f->variablePointersStorageBuffer       = true;
-	f->variablePointers                    = true;
-	f->protectedMemory                     = false;
-	f->samplerYcbcrConversion              = true;
-	f->shaderDrawParameters                = true;
+   f->storageBuffer16BitAccess = true;
+   f->uniformAndStorageBuffer16BitAccess = true;
+   f->storagePushConstant16 = true;
+   f->storageInputOutput16 =
+      pdevice->rad_info.has_packed_math_16bit && (LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm);
+   f->multiview = true;
+   f->multiviewGeometryShader = true;
+   f->multiviewTessellationShader = true;
+   f->variablePointersStorageBuffer = true;
+   f->variablePointers = true;
+   f->protectedMemory = false;
+   f->samplerYcbcrConversion = true;
+   f->shaderDrawParameters = true;
 }
 
 static void
 radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
-				      VkPhysicalDeviceVulkan12Features *f)
-{
-	assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
-
-	f->samplerMirrorClampToEdge = true;
-	f->drawIndirectCount = true;
-	f->storageBuffer8BitAccess = true;
-	f->uniformAndStorageBuffer8BitAccess = true;
-	f->storagePushConstant8 = true;
-	f->shaderBufferInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
-	f->shaderSharedInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
-	f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
-	f->shaderInt8 = true;
-
-	f->descriptorIndexing = true;
-	f->shaderInputAttachmentArrayDynamicIndexing = true;
-	f->shaderUniformTexelBufferArrayDynamicIndexing = true;
-	f->shaderStorageTexelBufferArrayDynamicIndexing = true;
-	f->shaderUniformBufferArrayNonUniformIndexing = true;
-	f->shaderSampledImageArrayNonUniformIndexing = true;
-	f->shaderStorageBufferArrayNonUniformIndexing = true;
-	f->shaderStorageImageArrayNonUniformIndexing = true;
-	f->shaderInputAttachmentArrayNonUniformIndexing = true;
-	f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
-	f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
-	f->descriptorBindingUniformBufferUpdateAfterBind = true;
-	f->descriptorBindingSampledImageUpdateAfterBind = true;
-	f->descriptorBindingStorageImageUpdateAfterBind = true;
-	f->descriptorBindingStorageBufferUpdateAfterBind = true;
-	f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
-	f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
-	f->descriptorBindingUpdateUnusedWhilePending = true;
-	f->descriptorBindingPartiallyBound = true;
-	f->descriptorBindingVariableDescriptorCount = true;
-	f->runtimeDescriptorArray = true;
-
-	f->samplerFilterMinmax = true;
-	f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
-	f->imagelessFramebuffer = true;
-	f->uniformBufferStandardLayout = true;
-	f->shaderSubgroupExtendedTypes = true;
-	f->separateDepthStencilLayouts = true;
-	f->hostQueryReset = true;
-	f->timelineSemaphore = true,
-	f->bufferDeviceAddress = true;
-	f->bufferDeviceAddressCaptureReplay = false;
-	f->bufferDeviceAddressMultiDevice = false;
-	f->vulkanMemoryModel = true;
-	f->vulkanMemoryModelDeviceScope = true;
-	f->vulkanMemoryModelAvailabilityVisibilityChains = false;
-	f->shaderOutputViewportIndex = true;
-	f->shaderOutputLayer = true;
-	f->subgroupBroadcastDynamicId = true;
-}
-
-void radv_GetPhysicalDeviceFeatures2(
-	VkPhysicalDevice                            physicalDevice,
-	VkPhysicalDeviceFeatures2                  *pFeatures)
-{
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-	radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
-
-	VkPhysicalDeviceVulkan11Features core_1_1 = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
-	};
-	radv_get_physical_device_features_1_1(pdevice, &core_1_1);
-
-	VkPhysicalDeviceVulkan12Features core_1_2 = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
-	};
-	radv_get_physical_device_features_1_2(pdevice, &core_1_2);
-
-#define CORE_FEATURE(major, minor, feature) \
-   features->feature = core_##major##_##minor.feature
-
-	vk_foreach_struct(ext, pFeatures->pNext) {
-		switch (ext->sType) {
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
-			VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
-			CORE_FEATURE(1, 1, variablePointersStorageBuffer);
-			CORE_FEATURE(1, 1, variablePointers);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
-			VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
-			CORE_FEATURE(1, 1, multiview);
-			CORE_FEATURE(1, 1, multiviewGeometryShader);
-			CORE_FEATURE(1, 1, multiviewTessellationShader);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
-			VkPhysicalDeviceShaderDrawParametersFeatures *features =
-			    (VkPhysicalDeviceShaderDrawParametersFeatures*)ext;
-			CORE_FEATURE(1, 1, shaderDrawParameters);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
-			VkPhysicalDeviceProtectedMemoryFeatures *features =
-			    (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
-			CORE_FEATURE(1, 1, protectedMemory);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
-			VkPhysicalDevice16BitStorageFeatures *features =
-			    (VkPhysicalDevice16BitStorageFeatures*)ext;
-			CORE_FEATURE(1, 1, storageBuffer16BitAccess);
-			CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);
-			CORE_FEATURE(1, 1, storagePushConstant16);
-			CORE_FEATURE(1, 1, storageInputOutput16);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
-			VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
-			    (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
-			CORE_FEATURE(1, 1, samplerYcbcrConversion);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {
-			VkPhysicalDeviceDescriptorIndexingFeatures *features =
-				(VkPhysicalDeviceDescriptorIndexingFeatures*)ext;
-			CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);
-			CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);
-			CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);
-			CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);
-			CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);
-			CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);
-			CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);
-			CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);
-			CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);
-			CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);
-			CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);
-			CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);
-			CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);
-			CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);
-			CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);
-			CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);
-			CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);
-			CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);
-			CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);
-			CORE_FEATURE(1, 2, runtimeDescriptorArray);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
-			VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
-				(VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
-			features->conditionalRendering = true;
-			features->inheritedConditionalRendering = false;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
-			VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
-				(VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
-			features->vertexAttributeInstanceRateDivisor = true;
-			features->vertexAttributeInstanceRateZeroDivisor = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
-			VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
-				(VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
-			features->transformFeedback = true;
-			features->geometryStreams = !pdevice->use_ngg_streamout;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
-			VkPhysicalDeviceScalarBlockLayoutFeatures *features =
-				(VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
-			CORE_FEATURE(1, 2, scalarBlockLayout);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
-			VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
-				(VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
-			features->memoryPriority = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
-			VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
-				(VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
-			features->bufferDeviceAddress = true;
-			features->bufferDeviceAddressCaptureReplay = false;
-			features->bufferDeviceAddressMultiDevice = false;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: {
-			VkPhysicalDeviceBufferDeviceAddressFeatures *features =
-				(VkPhysicalDeviceBufferDeviceAddressFeatures *)ext;
-			CORE_FEATURE(1, 2, bufferDeviceAddress);
-			CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
-			CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
-			VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
-				(VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
-			features->depthClipEnable = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {
-			VkPhysicalDeviceHostQueryResetFeatures *features =
-				(VkPhysicalDeviceHostQueryResetFeatures *)ext;
-			CORE_FEATURE(1, 2, hostQueryReset);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
-			VkPhysicalDevice8BitStorageFeatures *features =
-			    (VkPhysicalDevice8BitStorageFeatures *)ext;
-			CORE_FEATURE(1, 2, storageBuffer8BitAccess);
-			CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);
-			CORE_FEATURE(1, 2, storagePushConstant8);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
-			VkPhysicalDeviceShaderFloat16Int8Features *features =
-				(VkPhysicalDeviceShaderFloat16Int8Features*)ext;
-			CORE_FEATURE(1, 2, shaderFloat16);
-			CORE_FEATURE(1, 2, shaderInt8);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES: {
-			VkPhysicalDeviceShaderAtomicInt64Features *features =
-				(VkPhysicalDeviceShaderAtomicInt64Features *)ext;
-			CORE_FEATURE(1, 2, shaderBufferInt64Atomics);
-			CORE_FEATURE(1, 2, shaderSharedInt64Atomics);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
-			VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
-				(VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
-			features->shaderDemoteToHelperInvocation = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
-			VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
-				(VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
-
-			features->inlineUniformBlock = true;
-			features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
-			VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
-				(VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
-			features->computeDerivativeGroupQuads = false;
-			features->computeDerivativeGroupLinear = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
-			VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
-				(VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext;
-			features->ycbcrImageArrays = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {
-			VkPhysicalDeviceUniformBufferStandardLayoutFeatures *features =
-				(VkPhysicalDeviceUniformBufferStandardLayoutFeatures *)ext;
-			CORE_FEATURE(1, 2, uniformBufferStandardLayout);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
-			VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
-				(VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
-			features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {
-			VkPhysicalDeviceImagelessFramebufferFeatures *features =
-				(VkPhysicalDeviceImagelessFramebufferFeatures *)ext;
-			CORE_FEATURE(1, 2, imagelessFramebuffer);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
-			VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
-				(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
-			features->pipelineExecutableInfo = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
-			VkPhysicalDeviceShaderClockFeaturesKHR *features =
-				(VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
-			features->shaderSubgroupClock = true;
-			features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
-			VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
-				(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
-			features->texelBufferAlignment = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
-			VkPhysicalDeviceTimelineSemaphoreFeatures *features =
-				(VkPhysicalDeviceTimelineSemaphoreFeatures *) ext;
-			CORE_FEATURE(1, 2, timelineSemaphore);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
-			VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
-				(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
-			features->subgroupSizeControl = true;
-			features->computeFullSubgroups = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
-			VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
-				(VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
-			features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
-			VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =
-				(VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;
-			CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
-			VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =
-				(VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;
-			CORE_FEATURE(1, 2, separateDepthStencilLayouts);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
-			radv_get_physical_device_features_1_1(pdevice, (void *)ext);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
-			radv_get_physical_device_features_1_2(pdevice, (void *)ext);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
-			VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
-				(VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
-			features->rectangularLines = false;
-			features->bresenhamLines = true;
-			features->smoothLines = false;
-			features->stippledRectangularLines = false;
-			/* FIXME: Some stippled Bresenham CTS fails on Vega10
-			 * but work on Raven.
-			 */
-			features->stippledBresenhamLines = pdevice->rad_info.chip_class != GFX9;
-			features->stippledSmoothLines = false;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
-			VkDeviceMemoryOverallocationCreateInfoAMD *features =
-				(VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
-			features->overallocationBehavior = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
-			VkPhysicalDeviceRobustness2FeaturesEXT *features =
-				(VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
-			features->robustBufferAccess2 = true;
-			features->robustImageAccess2 = true;
-			features->nullDescriptor = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
-			VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
-				(VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
-			features->customBorderColors = true;
-			features->customBorderColorWithoutFormat = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
-			VkPhysicalDevicePrivateDataFeaturesEXT *features =
-				(VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
-			features->privateData = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
-			VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
-				(VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
-			features-> pipelineCreationCacheControl = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {
-			VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *features =
-				(VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *)ext;
-			CORE_FEATURE(1, 2, vulkanMemoryModel);
-			CORE_FEATURE(1, 2, vulkanMemoryModelDeviceScope);
-			CORE_FEATURE(1, 2, vulkanMemoryModelAvailabilityVisibilityChains);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
-			VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
-				(VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *) ext;
-			features->extendedDynamicState = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
-			VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
-				(VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
-			features->robustImageAccess = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
-			VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
-				(VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
-			features->shaderBufferFloat32Atomics = true;
-			features->shaderBufferFloat32AtomicAdd = false;
-			features->shaderBufferFloat64Atomics = true;
-			features->shaderBufferFloat64AtomicAdd = false;
-			features->shaderSharedFloat32Atomics = true;
-			features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8 &&
-								 (!pdevice->use_llvm || LLVM_VERSION_MAJOR >= 10);
-			features->shaderSharedFloat64Atomics = true;
-			features->shaderSharedFloat64AtomicAdd = false;
-			features->shaderImageFloat32Atomics = true;
-			features->shaderImageFloat32AtomicAdd = false;
-			features->sparseImageFloat32Atomics = true;
-			features->sparseImageFloat32AtomicAdd = false;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
-			VkPhysicalDevice4444FormatsFeaturesEXT *features =
-				(VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
-			features->formatA4R4G4B4 = true;
-			features->formatA4B4G4R4 = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
-			VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
-				(VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
-			features->shaderTerminateInvocation = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
-			VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
-				(VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
-			features->shaderImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
-			features->sparseImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
-			VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
-				(VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
-			features->mutableDescriptorType = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
-			VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
-				(VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
-			features->pipelineFragmentShadingRate = true;
-			features->primitiveFragmentShadingRate = true;
-			features->attachmentFragmentShadingRate = false; /* TODO */
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
-			VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
-				(VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
-			features->workgroupMemoryExplicitLayout = true;
-			features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
-			features->workgroupMemoryExplicitLayout8BitAccess = true;
-			features->workgroupMemoryExplicitLayout16BitAccess = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
-			VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
-				(VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR*)ext;
-			features->shaderZeroInitializeWorkgroupMemory = true;
-			break;
-		}
-		default:
-			break;
-		}
-	}
+                                      VkPhysicalDeviceVulkan12Features *f)
+{
+   assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
+
+   f->samplerMirrorClampToEdge = true;
+   f->drawIndirectCount = true;
+   f->storageBuffer8BitAccess = true;
+   f->uniformAndStorageBuffer8BitAccess = true;
+   f->storagePushConstant8 = true;
+   f->shaderBufferInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
+   f->shaderSharedInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
+   f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
+   f->shaderInt8 = true;
+
+   f->descriptorIndexing = true;
+   f->shaderInputAttachmentArrayDynamicIndexing = true;
+   f->shaderUniformTexelBufferArrayDynamicIndexing = true;
+   f->shaderStorageTexelBufferArrayDynamicIndexing = true;
+   f->shaderUniformBufferArrayNonUniformIndexing = true;
+   f->shaderSampledImageArrayNonUniformIndexing = true;
+   f->shaderStorageBufferArrayNonUniformIndexing = true;
+   f->shaderStorageImageArrayNonUniformIndexing = true;
+   f->shaderInputAttachmentArrayNonUniformIndexing = true;
+   f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
+   f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
+   f->descriptorBindingUniformBufferUpdateAfterBind = true;
+   f->descriptorBindingSampledImageUpdateAfterBind = true;
+   f->descriptorBindingStorageImageUpdateAfterBind = true;
+   f->descriptorBindingStorageBufferUpdateAfterBind = true;
+   f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
+   f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
+   f->descriptorBindingUpdateUnusedWhilePending = true;
+   f->descriptorBindingPartiallyBound = true;
+   f->descriptorBindingVariableDescriptorCount = true;
+   f->runtimeDescriptorArray = true;
+
+   f->samplerFilterMinmax = true;
+   f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
+   f->imagelessFramebuffer = true;
+   f->uniformBufferStandardLayout = true;
+   f->shaderSubgroupExtendedTypes = true;
+   f->separateDepthStencilLayouts = true;
+   f->hostQueryReset = true;
+   f->timelineSemaphore = true, f->bufferDeviceAddress = true;
+   f->bufferDeviceAddressCaptureReplay = false;
+   f->bufferDeviceAddressMultiDevice = false;
+   f->vulkanMemoryModel = true;
+   f->vulkanMemoryModelDeviceScope = true;
+   f->vulkanMemoryModelAvailabilityVisibilityChains = false;
+   f->shaderOutputViewportIndex = true;
+   f->shaderOutputLayer = true;
+   f->subgroupBroadcastDynamicId = true;
+}
+
+void
+radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
+                                VkPhysicalDeviceFeatures2 *pFeatures)
+{
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+   radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
+
+   VkPhysicalDeviceVulkan11Features core_1_1 = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
+   };
+   radv_get_physical_device_features_1_1(pdevice, &core_1_1);
+
+   VkPhysicalDeviceVulkan12Features core_1_2 = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
+   };
+   radv_get_physical_device_features_1_2(pdevice, &core_1_2);
+
+#define CORE_FEATURE(major, minor, feature) features->feature = core_##major##_##minor.feature
+
+   vk_foreach_struct(ext, pFeatures->pNext)
+   {
+      switch (ext->sType) {
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
+         VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
+         CORE_FEATURE(1, 1, variablePointersStorageBuffer);
+         CORE_FEATURE(1, 1, variablePointers);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
+         VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures *)ext;
+         CORE_FEATURE(1, 1, multiview);
+         CORE_FEATURE(1, 1, multiviewGeometryShader);
+         CORE_FEATURE(1, 1, multiviewTessellationShader);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
+         VkPhysicalDeviceShaderDrawParametersFeatures *features =
+            (VkPhysicalDeviceShaderDrawParametersFeatures *)ext;
+         CORE_FEATURE(1, 1, shaderDrawParameters);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
+         VkPhysicalDeviceProtectedMemoryFeatures *features =
+            (VkPhysicalDeviceProtectedMemoryFeatures *)ext;
+         CORE_FEATURE(1, 1, protectedMemory);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
+         VkPhysicalDevice16BitStorageFeatures *features =
+            (VkPhysicalDevice16BitStorageFeatures *)ext;
+         CORE_FEATURE(1, 1, storageBuffer16BitAccess);
+         CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);
+         CORE_FEATURE(1, 1, storagePushConstant16);
+         CORE_FEATURE(1, 1, storageInputOutput16);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
+         VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
+            (VkPhysicalDeviceSamplerYcbcrConversionFeatures *)ext;
+         CORE_FEATURE(1, 1, samplerYcbcrConversion);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {
+         VkPhysicalDeviceDescriptorIndexingFeatures *features =
+            (VkPhysicalDeviceDescriptorIndexingFeatures *)ext;
+         CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);
+         CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);
+         CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);
+         CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);
+         CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);
+         CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);
+         CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);
+         CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);
+         CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);
+         CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);
+         CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);
+         CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);
+         CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);
+         CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);
+         CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);
+         CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);
+         CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);
+         CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);
+         CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);
+         CORE_FEATURE(1, 2, runtimeDescriptorArray);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
+         VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
+            (VkPhysicalDeviceConditionalRenderingFeaturesEXT *)ext;
+         features->conditionalRendering = true;
+         features->inheritedConditionalRendering = false;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
+         VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
+            (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
+         features->vertexAttributeInstanceRateDivisor = true;
+         features->vertexAttributeInstanceRateZeroDivisor = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
+         VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
+            (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
+         features->transformFeedback = true;
+         features->geometryStreams = !pdevice->use_ngg_streamout;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
+         VkPhysicalDeviceScalarBlockLayoutFeatures *features =
+            (VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
+         CORE_FEATURE(1, 2, scalarBlockLayout);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
+         VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
+            (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
+         features->memoryPriority = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
+         VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
+            (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
+         features->bufferDeviceAddress = true;
+         features->bufferDeviceAddressCaptureReplay = false;
+         features->bufferDeviceAddressMultiDevice = false;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: {
+         VkPhysicalDeviceBufferDeviceAddressFeatures *features =
+            (VkPhysicalDeviceBufferDeviceAddressFeatures *)ext;
+         CORE_FEATURE(1, 2, bufferDeviceAddress);
+         CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
+         CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
+         VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
+            (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
+         features->depthClipEnable = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {
+         VkPhysicalDeviceHostQueryResetFeatures *features =
+            (VkPhysicalDeviceHostQueryResetFeatures *)ext;
+         CORE_FEATURE(1, 2, hostQueryReset);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
+         VkPhysicalDevice8BitStorageFeatures *features = (VkPhysicalDevice8BitStorageFeatures *)ext;
+         CORE_FEATURE(1, 2, storageBuffer8BitAccess);
+         CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);
+         CORE_FEATURE(1, 2, storagePushConstant8);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
+         VkPhysicalDeviceShaderFloat16Int8Features *features =
+            (VkPhysicalDeviceShaderFloat16Int8Features *)ext;
+         CORE_FEATURE(1, 2, shaderFloat16);
+         CORE_FEATURE(1, 2, shaderInt8);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES: {
+         VkPhysicalDeviceShaderAtomicInt64Features *features =
+            (VkPhysicalDeviceShaderAtomicInt64Features *)ext;
+         CORE_FEATURE(1, 2, shaderBufferInt64Atomics);
+         CORE_FEATURE(1, 2, shaderSharedInt64Atomics);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
+         VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
+            (VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
+         features->shaderDemoteToHelperInvocation = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
+         VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
+            (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
+
+         features->inlineUniformBlock = true;
+         features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
+         VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
+            (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
+         features->computeDerivativeGroupQuads = false;
+         features->computeDerivativeGroupLinear = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
+         VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
+            (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
+         features->ycbcrImageArrays = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {
+         VkPhysicalDeviceUniformBufferStandardLayoutFeatures *features =
+            (VkPhysicalDeviceUniformBufferStandardLayoutFeatures *)ext;
+         CORE_FEATURE(1, 2, uniformBufferStandardLayout);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
+         VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
+            (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
+         features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {
+         VkPhysicalDeviceImagelessFramebufferFeatures *features =
+            (VkPhysicalDeviceImagelessFramebufferFeatures *)ext;
+         CORE_FEATURE(1, 2, imagelessFramebuffer);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
+         VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
+            (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
+         features->pipelineExecutableInfo = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
+         VkPhysicalDeviceShaderClockFeaturesKHR *features =
+            (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
+         features->shaderSubgroupClock = true;
+         features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
+         VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
+            (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
+         features->texelBufferAlignment = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
+         VkPhysicalDeviceTimelineSemaphoreFeatures *features =
+            (VkPhysicalDeviceTimelineSemaphoreFeatures *)ext;
+         CORE_FEATURE(1, 2, timelineSemaphore);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
+         VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
+            (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
+         features->subgroupSizeControl = true;
+         features->computeFullSubgroups = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
+         VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
+            (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
+         features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
+         VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =
+            (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;
+         CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
+         VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =
+            (VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;
+         CORE_FEATURE(1, 2, separateDepthStencilLayouts);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
+         radv_get_physical_device_features_1_1(pdevice, (void *)ext);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
+         radv_get_physical_device_features_1_2(pdevice, (void *)ext);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
+         VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
+            (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
+         features->rectangularLines = false;
+         features->bresenhamLines = true;
+         features->smoothLines = false;
+         features->stippledRectangularLines = false;
+         /* FIXME: Some stippled Bresenham CTS fails on Vega10
+          * but work on Raven.
+          */
+         features->stippledBresenhamLines = pdevice->rad_info.chip_class != GFX9;
+         features->stippledSmoothLines = false;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
+         VkDeviceMemoryOverallocationCreateInfoAMD *features =
+            (VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
+         features->overallocationBehavior = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
+         VkPhysicalDeviceRobustness2FeaturesEXT *features =
+            (VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
+         features->robustBufferAccess2 = true;
+         features->robustImageAccess2 = true;
+         features->nullDescriptor = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
+         VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
+            (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
+         features->customBorderColors = true;
+         features->customBorderColorWithoutFormat = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
+         VkPhysicalDevicePrivateDataFeaturesEXT *features =
+            (VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
+         features->privateData = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
+         VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
+            (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
+         features->pipelineCreationCacheControl = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {
+         VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *features =
+            (VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *)ext;
+         CORE_FEATURE(1, 2, vulkanMemoryModel);
+         CORE_FEATURE(1, 2, vulkanMemoryModelDeviceScope);
+         CORE_FEATURE(1, 2, vulkanMemoryModelAvailabilityVisibilityChains);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
+         VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
+            (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
+         features->extendedDynamicState = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
+         VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
+            (VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
+         features->robustImageAccess = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
+         VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
+            (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
+         features->shaderBufferFloat32Atomics = true;
+         features->shaderBufferFloat32AtomicAdd = false;
+         features->shaderBufferFloat64Atomics = true;
+         features->shaderBufferFloat64AtomicAdd = false;
+         features->shaderSharedFloat32Atomics = true;
+         features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8 &&
+                                                  (!pdevice->use_llvm || LLVM_VERSION_MAJOR >= 10);
+         features->shaderSharedFloat64Atomics = true;
+         features->shaderSharedFloat64AtomicAdd = false;
+         features->shaderImageFloat32Atomics = true;
+         features->shaderImageFloat32AtomicAdd = false;
+         features->sparseImageFloat32Atomics = true;
+         features->sparseImageFloat32AtomicAdd = false;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
+         VkPhysicalDevice4444FormatsFeaturesEXT *features =
+            (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
+         features->formatA4R4G4B4 = true;
+         features->formatA4B4G4R4 = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
+         VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
+            (VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
+         features->shaderTerminateInvocation = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
+         VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
+            (VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
+         features->shaderImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
+         features->sparseImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
+         VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
+            (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
+         features->mutableDescriptorType = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
+         VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
+            (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
+         features->pipelineFragmentShadingRate = true;
+         features->primitiveFragmentShadingRate = true;
+         features->attachmentFragmentShadingRate = false; /* TODO */
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
+         VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
+            (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
+         features->workgroupMemoryExplicitLayout = true;
+         features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
+         features->workgroupMemoryExplicitLayout8BitAccess = true;
+         features->workgroupMemoryExplicitLayout16BitAccess = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
+         VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
+            (VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
+         features->shaderZeroInitializeWorkgroupMemory = true;
+         break;
+      }
+      default:
+         break;
+      }
+   }
 #undef CORE_FEATURE
 }
 
 static size_t
 radv_max_descriptor_set_size()
 {
-	/* make sure that the entire descriptor set is addressable with a signed
-	 * 32-bit int. So the sum of all limits scaled by descriptor size has to
-	 * be at most 2 GiB. the combined image & samples object count as one of
-	 * both. This limit is for the pipeline layout, not for the set layout, but
-	 * there is no set limit, so we just set a pipeline limit. I don't think
-	 * any app is going to hit this soon. */
-	return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS
-	                     - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
-	          (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
-	           32 /* storage buffer, 32 due to potential space wasted on alignment */ +
-	           32 /* sampler, largest when combined with image */ +
-	           64 /* sampled image */ +
-	           64 /* storage image */);
+   /* make sure that the entire descriptor set is addressable with a signed
+    * 32-bit int. So the sum of all limits scaled by descriptor size has to
+    * be at most 2 GiB. the combined image & samples object count as one of
+    * both. This limit is for the pipeline layout, not for the set layout, but
+    * there is no set limit, so we just set a pipeline limit. I don't think
+    * any app is going to hit this soon. */
+   return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
+           MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
+          (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
+           32 /* storage buffer, 32 due to potential space wasted on alignment */ +
+           32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
+           64 /* storage image */);
 }
 
 static uint32_t
 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
 {
-	uint32_t uniform_offset_alignment = driQueryOptioni(&pdevice->instance->dri_options,
-	                                                   "radv_override_uniform_offset_alignment");
-	if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
-		fprintf(stderr, "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
-		                "not a power of two\n", uniform_offset_alignment);
-		uniform_offset_alignment = 0;
-	}
-
-	/* Take at least the hardware limit. */
-	return MAX2(uniform_offset_alignment, 4);
-}
-
-void radv_GetPhysicalDeviceProperties(
-	VkPhysicalDevice                            physicalDevice,
-	VkPhysicalDeviceProperties*                 pProperties)
-{
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-	VkSampleCountFlags sample_counts = 0xf;
-
-	size_t max_descriptor_set_size = radv_max_descriptor_set_size();
-
-	VkPhysicalDeviceLimits limits = {
-		.maxImageDimension1D                      = (1 << 14),
-		.maxImageDimension2D                      = (1 << 14),
-		.maxImageDimension3D                      = (1 << 11),
-		.maxImageDimensionCube                    = (1 << 14),
-		.maxImageArrayLayers                      = (1 << 11),
-		.maxTexelBufferElements                   = UINT32_MAX,
-		.maxUniformBufferRange                    = UINT32_MAX,
-		.maxStorageBufferRange                    = UINT32_MAX,
-		.maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
-		.maxMemoryAllocationCount                 = UINT32_MAX,
-		.maxSamplerAllocationCount                = 64 * 1024,
-		.bufferImageGranularity                   = 64, /* A cache line */
-		.sparseAddressSpaceSize                   = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
-		.maxBoundDescriptorSets                   = MAX_SETS,
-		.maxPerStageDescriptorSamplers            = max_descriptor_set_size,
-		.maxPerStageDescriptorUniformBuffers      = max_descriptor_set_size,
-		.maxPerStageDescriptorStorageBuffers      = max_descriptor_set_size,
-		.maxPerStageDescriptorSampledImages       = max_descriptor_set_size,
-		.maxPerStageDescriptorStorageImages       = max_descriptor_set_size,
-		.maxPerStageDescriptorInputAttachments    = max_descriptor_set_size,
-		.maxPerStageResources                     = max_descriptor_set_size,
-		.maxDescriptorSetSamplers                 = max_descriptor_set_size,
-		.maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
-		.maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_UNIFORM_BUFFERS,
-		.maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
-		.maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_STORAGE_BUFFERS,
-		.maxDescriptorSetSampledImages            = max_descriptor_set_size,
-		.maxDescriptorSetStorageImages            = max_descriptor_set_size,
-		.maxDescriptorSetInputAttachments         = max_descriptor_set_size,
-		.maxVertexInputAttributes                 = MAX_VERTEX_ATTRIBS,
-		.maxVertexInputBindings                   = MAX_VBS,
-		.maxVertexInputAttributeOffset            = 2047,
-		.maxVertexInputBindingStride              = 2048,
-		.maxVertexOutputComponents                = 128,
-		.maxTessellationGenerationLevel           = 64,
-		.maxTessellationPatchSize                 = 32,
-		.maxTessellationControlPerVertexInputComponents = 128,
-		.maxTessellationControlPerVertexOutputComponents = 128,
-		.maxTessellationControlPerPatchOutputComponents = 120,
-		.maxTessellationControlTotalOutputComponents = 4096,
-		.maxTessellationEvaluationInputComponents = 128,
-		.maxTessellationEvaluationOutputComponents = 128,
-		.maxGeometryShaderInvocations             = 127,
-		.maxGeometryInputComponents               = 64,
-		.maxGeometryOutputComponents              = 128,
-		.maxGeometryOutputVertices                = 256,
-		.maxGeometryTotalOutputComponents         = 1024,
-		.maxFragmentInputComponents               = 128,
-		.maxFragmentOutputAttachments             = 8,
-		.maxFragmentDualSrcAttachments            = 1,
-		.maxFragmentCombinedOutputResources       = 8,
-		.maxComputeSharedMemorySize               = 32768,
-		.maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
-		.maxComputeWorkGroupInvocations           = 1024,
-		.maxComputeWorkGroupSize = {
-			1024,
-			1024,
-			1024
-		},
-		.subPixelPrecisionBits                    = 8,
-		.subTexelPrecisionBits                    = 8,
-		.mipmapPrecisionBits                      = 8,
-		.maxDrawIndexedIndexValue                 = UINT32_MAX,
-		.maxDrawIndirectCount                     = UINT32_MAX,
-		.maxSamplerLodBias                        = 16,
-		.maxSamplerAnisotropy                     = 16,
-		.maxViewports                             = MAX_VIEWPORTS,
-		.maxViewportDimensions                    = { (1 << 14), (1 << 14) },
-		.viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
-		.viewportSubPixelBits                     = 8,
-		.minMemoryMapAlignment                    = 4096, /* A page */
-		.minTexelBufferOffsetAlignment            = 4,
-		.minUniformBufferOffsetAlignment          = radv_uniform_buffer_offset_alignment(pdevice),
-		.minStorageBufferOffsetAlignment          = 4,
-		.minTexelOffset                           = -32,
-		.maxTexelOffset                           = 31,
-		.minTexelGatherOffset                     = -32,
-		.maxTexelGatherOffset                     = 31,
-		.minInterpolationOffset                   = -2,
-		.maxInterpolationOffset                   = 2,
-		.subPixelInterpolationOffsetBits          = 8,
-		.maxFramebufferWidth                      = (1 << 14),
-		.maxFramebufferHeight                     = (1 << 14),
-		.maxFramebufferLayers                     = (1 << 10),
-		.framebufferColorSampleCounts             = sample_counts,
-		.framebufferDepthSampleCounts             = sample_counts,
-		.framebufferStencilSampleCounts           = sample_counts,
-		.framebufferNoAttachmentsSampleCounts     = sample_counts,
-		.maxColorAttachments                      = MAX_RTS,
-		.sampledImageColorSampleCounts            = sample_counts,
-		.sampledImageIntegerSampleCounts          = sample_counts,
-		.sampledImageDepthSampleCounts            = sample_counts,
-		.sampledImageStencilSampleCounts          = sample_counts,
-		.storageImageSampleCounts                 = sample_counts,
-		.maxSampleMaskWords                       = 1,
-		.timestampComputeAndGraphics              = true,
-		.timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
-		.maxClipDistances                         = 8,
-		.maxCullDistances                         = 8,
-		.maxCombinedClipAndCullDistances          = 8,
-		.discreteQueuePriorities                  = 2,
-		.pointSizeRange                           = { 0.0, 8191.875 },
-		.lineWidthRange                           = { 0.0, 8191.875 },
-		.pointSizeGranularity                     = (1.0 / 8.0),
-		.lineWidthGranularity                     = (1.0 / 8.0),
-		.strictLines                              = false, /* FINISHME */
-		.standardSampleLocations                  = true,
-		.optimalBufferCopyOffsetAlignment         = 128,
-		.optimalBufferCopyRowPitchAlignment       = 128,
-		.nonCoherentAtomSize                      = 64,
-	};
-
-	*pProperties = (VkPhysicalDeviceProperties) {
-		.apiVersion = RADV_API_VERSION,
-		.driverVersion = vk_get_driver_version(),
-		.vendorID = ATI_VENDOR_ID,
-		.deviceID = pdevice->rad_info.pci_id,
-		.deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
-		.limits = limits,
-		.sparseProperties = {
-			.residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,
-			.residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,
-		},
-	};
-
-	strcpy(pProperties->deviceName, pdevice->name);
-	memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
+   uint32_t uniform_offset_alignment =
+      driQueryOptioni(&pdevice->instance->dri_options, "radv_override_uniform_offset_alignment");
+   if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
+      fprintf(stderr,
+              "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
+              "not a power of two\n",
+              uniform_offset_alignment);
+      uniform_offset_alignment = 0;
+   }
+
+   /* Take at least the hardware limit. */
+   return MAX2(uniform_offset_alignment, 4);
+}
+
+void
+radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
+                                 VkPhysicalDeviceProperties *pProperties)
+{
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+   VkSampleCountFlags sample_counts = 0xf;
+
+   size_t max_descriptor_set_size = radv_max_descriptor_set_size();
+
+   VkPhysicalDeviceLimits limits = {
+      .maxImageDimension1D = (1 << 14),
+      .maxImageDimension2D = (1 << 14),
+      .maxImageDimension3D = (1 << 11),
+      .maxImageDimensionCube = (1 << 14),
+      .maxImageArrayLayers = (1 << 11),
+      .maxTexelBufferElements = UINT32_MAX,
+      .maxUniformBufferRange = UINT32_MAX,
+      .maxStorageBufferRange = UINT32_MAX,
+      .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
+      .maxMemoryAllocationCount = UINT32_MAX,
+      .maxSamplerAllocationCount = 64 * 1024,
+      .bufferImageGranularity = 64,                              /* A cache line */
+      .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
+      .maxBoundDescriptorSets = MAX_SETS,
+      .maxPerStageDescriptorSamplers = max_descriptor_set_size,
+      .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
+      .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
+      .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
+      .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
+      .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
+      .maxPerStageResources = max_descriptor_set_size,
+      .maxDescriptorSetSamplers = max_descriptor_set_size,
+      .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
+      .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
+      .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
+      .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
+      .maxDescriptorSetSampledImages = max_descriptor_set_size,
+      .maxDescriptorSetStorageImages = max_descriptor_set_size,
+      .maxDescriptorSetInputAttachments = max_descriptor_set_size,
+      .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
+      .maxVertexInputBindings = MAX_VBS,
+      .maxVertexInputAttributeOffset = 2047,
+      .maxVertexInputBindingStride = 2048,
+      .maxVertexOutputComponents = 128,
+      .maxTessellationGenerationLevel = 64,
+      .maxTessellationPatchSize = 32,
+      .maxTessellationControlPerVertexInputComponents = 128,
+      .maxTessellationControlPerVertexOutputComponents = 128,
+      .maxTessellationControlPerPatchOutputComponents = 120,
+      .maxTessellationControlTotalOutputComponents = 4096,
+      .maxTessellationEvaluationInputComponents = 128,
+      .maxTessellationEvaluationOutputComponents = 128,
+      .maxGeometryShaderInvocations = 127,
+      .maxGeometryInputComponents = 64,
+      .maxGeometryOutputComponents = 128,
+      .maxGeometryOutputVertices = 256,
+      .maxGeometryTotalOutputComponents = 1024,
+      .maxFragmentInputComponents = 128,
+      .maxFragmentOutputAttachments = 8,
+      .maxFragmentDualSrcAttachments = 1,
+      .maxFragmentCombinedOutputResources = 8,
+      .maxComputeSharedMemorySize = 32768,
+      .maxComputeWorkGroupCount = {65535, 65535, 65535},
+      .maxComputeWorkGroupInvocations = 1024,
+      .maxComputeWorkGroupSize = {1024, 1024, 1024},
+      .subPixelPrecisionBits = 8,
+      .subTexelPrecisionBits = 8,
+      .mipmapPrecisionBits = 8,
+      .maxDrawIndexedIndexValue = UINT32_MAX,
+      .maxDrawIndirectCount = UINT32_MAX,
+      .maxSamplerLodBias = 16,
+      .maxSamplerAnisotropy = 16,
+      .maxViewports = MAX_VIEWPORTS,
+      .maxViewportDimensions = {(1 << 14), (1 << 14)},
+      .viewportBoundsRange = {INT16_MIN, INT16_MAX},
+      .viewportSubPixelBits = 8,
+      .minMemoryMapAlignment = 4096, /* A page */
+      .minTexelBufferOffsetAlignment = 4,
+      .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),
+      .minStorageBufferOffsetAlignment = 4,
+      .minTexelOffset = -32,
+      .maxTexelOffset = 31,
+      .minTexelGatherOffset = -32,
+      .maxTexelGatherOffset = 31,
+      .minInterpolationOffset = -2,
+      .maxInterpolationOffset = 2,
+      .subPixelInterpolationOffsetBits = 8,
+      .maxFramebufferWidth = (1 << 14),
+      .maxFramebufferHeight = (1 << 14),
+      .maxFramebufferLayers = (1 << 10),
+      .framebufferColorSampleCounts = sample_counts,
+      .framebufferDepthSampleCounts = sample_counts,
+      .framebufferStencilSampleCounts = sample_counts,
+      .framebufferNoAttachmentsSampleCounts = sample_counts,
+      .maxColorAttachments = MAX_RTS,
+      .sampledImageColorSampleCounts = sample_counts,
+      .sampledImageIntegerSampleCounts = sample_counts,
+      .sampledImageDepthSampleCounts = sample_counts,
+      .sampledImageStencilSampleCounts = sample_counts,
+      .storageImageSampleCounts = sample_counts,
+      .maxSampleMaskWords = 1,
+      .timestampComputeAndGraphics = true,
+      .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
+      .maxClipDistances = 8,
+      .maxCullDistances = 8,
+      .maxCombinedClipAndCullDistances = 8,
+      .discreteQueuePriorities = 2,
+      .pointSizeRange = {0.0, 8191.875},
+      .lineWidthRange = {0.0, 8191.875},
+      .pointSizeGranularity = (1.0 / 8.0),
+      .lineWidthGranularity = (1.0 / 8.0),
+      .strictLines = false, /* FINISHME */
+      .standardSampleLocations = true,
+      .optimalBufferCopyOffsetAlignment = 128,
+      .optimalBufferCopyRowPitchAlignment = 128,
+      .nonCoherentAtomSize = 64,
+   };
+
+   *pProperties = (VkPhysicalDeviceProperties){
+      .apiVersion = RADV_API_VERSION,
+      .driverVersion = vk_get_driver_version(),
+      .vendorID = ATI_VENDOR_ID,
+      .deviceID = pdevice->rad_info.pci_id,
+      .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU
+                                                         : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
+      .limits = limits,
+      .sparseProperties =
+         {
+            .residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,
+            .residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,
+         },
+   };
+
+   strcpy(pProperties->deviceName, pdevice->name);
+   memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
 }
 
 static void
 radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
-					VkPhysicalDeviceVulkan11Properties *p)
-{
-	assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
-
-	memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
-	memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
-	memset(p->deviceLUID, 0, VK_LUID_SIZE);
-	/* The LUID is for Windows. */
-	p->deviceLUIDValid = false;
-	p->deviceNodeMask = 0;
-
-	p->subgroupSize = RADV_SUBGROUP_SIZE;
-	p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS |
-				     VK_SHADER_STAGE_COMPUTE_BIT;
-	p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
-					 VK_SUBGROUP_FEATURE_VOTE_BIT |
-					 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
-					 VK_SUBGROUP_FEATURE_BALLOT_BIT |
-					 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
-					 VK_SUBGROUP_FEATURE_QUAD_BIT |
-					 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
-					 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
-	p->subgroupQuadOperationsInAllStages = true;
-
-	p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
-	p->maxMultiviewViewCount = MAX_VIEWS;
-	p->maxMultiviewInstanceIndex = INT_MAX;
-	p->protectedNoFault = false;
-	p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
-	p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
+                                        VkPhysicalDeviceVulkan11Properties *p)
+{
+   assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
+
+   memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
+   memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
+   memset(p->deviceLUID, 0, VK_LUID_SIZE);
+   /* The LUID is for Windows. */
+   p->deviceLUIDValid = false;
+   p->deviceNodeMask = 0;
+
+   p->subgroupSize = RADV_SUBGROUP_SIZE;
+   p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
+   p->subgroupSupportedOperations =
+      VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
+      VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
+      VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
+      VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
+   p->subgroupQuadOperationsInAllStages = true;
+
+   p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
+   p->maxMultiviewViewCount = MAX_VIEWS;
+   p->maxMultiviewInstanceIndex = INT_MAX;
+   p->protectedNoFault = false;
+   p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
+   p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
 }
 
 static void
 radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
-					VkPhysicalDeviceVulkan12Properties *p)
-{
-	assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
-
-	p->driverID = VK_DRIVER_ID_MESA_RADV;
-	snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
-	snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
-		 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 " (%s)",
-		 radv_get_compiler_string(pdevice));
-	p->conformanceVersion = (VkConformanceVersion) {
-		.major = 1,
-		.minor = 2,
-		.subminor = 3,
-		.patch = 0,
-	};
-
-	/* On AMD hardware, denormals and rounding modes for fp16/fp64 are
-	 * controlled by the same config register.
-	 */
-	if (pdevice->rad_info.has_packed_math_16bit) {
-		p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
-		p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
-	} else {
-		p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
-		p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
-	}
-
-	/* With LLVM, do not allow both preserving and flushing denorms because
-	 * different shaders in the same pipeline can have different settings and
-	 * this won't work for merged shaders. To make it work, this requires LLVM
-	 * support for changing the register. The same logic applies for the
-	 * rounding modes because they are configured with the same config
-	 * register.
-	 */
-	p->shaderDenormFlushToZeroFloat32 = true;
-	p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
-	p->shaderRoundingModeRTEFloat32 = true;
-	p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
-	p->shaderSignedZeroInfNanPreserveFloat32 = true;
-
-	p->shaderDenormFlushToZeroFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
-	p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
-	p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
-	p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
-	p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
-
-	p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
-	p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
-	p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
-	p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
-	p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
-
-	p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
-	p->shaderUniformBufferArrayNonUniformIndexingNative = false;
-	p->shaderSampledImageArrayNonUniformIndexingNative = false;
-	p->shaderStorageBufferArrayNonUniformIndexingNative = false;
-	p->shaderStorageImageArrayNonUniformIndexingNative = false;
-	p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
-	p->robustBufferAccessUpdateAfterBind = true;
-	p->quadDivergentImplicitLod = false;
-
-	size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
-		MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
-			(32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
-			 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
-			 32 /* sampler, largest when combined with image */ +
-			 64 /* sampled image */ +
-			 64 /* storage image */);
-	p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
-	p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
-	p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
-	p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
-	p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
-	p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
-	p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
-	p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
-	p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
-	p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
-	p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
-	p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
-	p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
-	p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
-	p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
-
-	/* We support all of the depth resolve modes */
-	p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
-					    VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
-					    VK_RESOLVE_MODE_MIN_BIT_KHR |
-					    VK_RESOLVE_MODE_MAX_BIT_KHR;
-
-	/* Average doesn't make sense for stencil so we don't support that */
-	p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
-					      VK_RESOLVE_MODE_MIN_BIT_KHR |
-					      VK_RESOLVE_MODE_MAX_BIT_KHR;
-
-	p->independentResolveNone = true;
-	p->independentResolve = true;
-
-	/* GFX6-8 only support single channel min/max filter. */
-	p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
-	p->filterMinmaxSingleComponentFormats = true;
-
-	p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
-
-	p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
-}
-
-void radv_GetPhysicalDeviceProperties2(
-	VkPhysicalDevice                            physicalDevice,
-	VkPhysicalDeviceProperties2                *pProperties)
-{
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-	radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
-
-	VkPhysicalDeviceVulkan11Properties core_1_1 = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
-	};
-	radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
-
-	VkPhysicalDeviceVulkan12Properties core_1_2 = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
-	};
-	radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
-
-#define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property) \
-   memcpy(&properties->ext_property, &core_##major##_##minor.core_property, \
+                                        VkPhysicalDeviceVulkan12Properties *p)
+{
+   assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
+
+   p->driverID = VK_DRIVER_ID_MESA_RADV;
+   snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
+   snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 " (%s)",
+            radv_get_compiler_string(pdevice));
+   p->conformanceVersion = (VkConformanceVersion){
+      .major = 1,
+      .minor = 2,
+      .subminor = 3,
+      .patch = 0,
+   };
+
+   /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
+    * controlled by the same config register.
+    */
+   if (pdevice->rad_info.has_packed_math_16bit) {
+      p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
+      p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
+   } else {
+      p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
+      p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
+   }
+
+   /* With LLVM, do not allow both preserving and flushing denorms because
+    * different shaders in the same pipeline can have different settings and
+    * this won't work for merged shaders. To make it work, this requires LLVM
+    * support for changing the register. The same logic applies for the
+    * rounding modes because they are configured with the same config
+    * register.
+    */
+   p->shaderDenormFlushToZeroFloat32 = true;
+   p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
+   p->shaderRoundingModeRTEFloat32 = true;
+   p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
+   p->shaderSignedZeroInfNanPreserveFloat32 = true;
+
+   p->shaderDenormFlushToZeroFloat16 =
+      pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
+   p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
+   p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
+   p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
+   p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
+
+   p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
+   p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
+   p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
+   p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
+   p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
+
+   p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
+   p->shaderUniformBufferArrayNonUniformIndexingNative = false;
+   p->shaderSampledImageArrayNonUniformIndexingNative = false;
+   p->shaderStorageBufferArrayNonUniformIndexingNative = false;
+   p->shaderStorageImageArrayNonUniformIndexingNative = false;
+   p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
+   p->robustBufferAccessUpdateAfterBind = true;
+   p->quadDivergentImplicitLod = false;
+
+   size_t max_descriptor_set_size =
+      ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
+       MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
+      (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
+       32 /* storage buffer, 32 due to potential space wasted on alignment */ +
+       32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
+       64 /* storage image */);
+   p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
+   p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
+   p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
+   p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
+   p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
+   p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
+   p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
+   p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
+   p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
+   p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
+   p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
+   p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
+   p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
+   p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
+   p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
+
+   /* We support all of the depth resolve modes */
+   p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
+                                   VK_RESOLVE_MODE_AVERAGE_BIT_KHR | VK_RESOLVE_MODE_MIN_BIT_KHR |
+                                   VK_RESOLVE_MODE_MAX_BIT_KHR;
+
+   /* Average doesn't make sense for stencil so we don't support that */
+   p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
+                                     VK_RESOLVE_MODE_MIN_BIT_KHR | VK_RESOLVE_MODE_MAX_BIT_KHR;
+
+   p->independentResolveNone = true;
+   p->independentResolve = true;
+
+   /* GFX6-8 only support single channel min/max filter. */
+   p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
+   p->filterMinmaxSingleComponentFormats = true;
+
+   p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
+
+   p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
+}
+
+void
+radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
+                                  VkPhysicalDeviceProperties2 *pProperties)
+{
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+   radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
+
+   VkPhysicalDeviceVulkan11Properties core_1_1 = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
+   };
+   radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
+
+   VkPhysicalDeviceVulkan12Properties core_1_2 = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
+   };
+   radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
+
+#define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property)                           \
+   memcpy(&properties->ext_property, &core_##major##_##minor.core_property,                        \
           sizeof(core_##major##_##minor.core_property))
 
-#define CORE_PROPERTY(major, minor, property) \
+#define CORE_PROPERTY(major, minor, property)                                                      \
    CORE_RENAMED_PROPERTY(major, minor, property, property)
 
-	vk_foreach_struct(ext, pProperties->pNext) {
-		switch (ext->sType) {
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
-			VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
-				(VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
-			properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
-			VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
-			CORE_PROPERTY(1, 1, deviceUUID);
-			CORE_PROPERTY(1, 1, driverUUID);
-			CORE_PROPERTY(1, 1, deviceLUID);
-			CORE_PROPERTY(1, 1, deviceLUIDValid);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
-			VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
-			CORE_PROPERTY(1, 1, maxMultiviewViewCount);
-			CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
-			VkPhysicalDevicePointClippingProperties *properties =
-			    (VkPhysicalDevicePointClippingProperties*)ext;
-			CORE_PROPERTY(1, 1, pointClippingBehavior);
-			break;
-		}
-		case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
-			VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
-			    (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
-			properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
-			VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
-			    (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
-			properties->minImportedHostPointerAlignment = 4096;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
-			VkPhysicalDeviceSubgroupProperties *properties =
-			    (VkPhysicalDeviceSubgroupProperties*)ext;
-			CORE_PROPERTY(1, 1, subgroupSize);
-			CORE_RENAMED_PROPERTY(1, 1, supportedStages,
-						    subgroupSupportedStages);
-			CORE_RENAMED_PROPERTY(1, 1, supportedOperations,
-						    subgroupSupportedOperations);
-			CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages,
-						    subgroupQuadOperationsInAllStages);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
-			VkPhysicalDeviceMaintenance3Properties *properties =
-			    (VkPhysicalDeviceMaintenance3Properties*)ext;
-			CORE_PROPERTY(1, 1, maxPerSetDescriptors);
-			CORE_PROPERTY(1, 1, maxMemoryAllocationSize);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
-			VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
-				(VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
-			CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);
-			CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
-			VkPhysicalDeviceShaderCorePropertiesAMD *properties =
-				(VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
-
-			/* Shader engines. */
-			properties->shaderEngineCount =
-				pdevice->rad_info.max_se;
-			properties->shaderArraysPerEngineCount =
-				pdevice->rad_info.max_sa_per_se;
-			properties->computeUnitsPerShaderArray =
-				pdevice->rad_info.min_good_cu_per_sa;
-			properties->simdPerComputeUnit =
-				pdevice->rad_info.num_simd_per_compute_unit;
-			properties->wavefrontsPerSimd =
-				pdevice->rad_info.max_wave64_per_simd;
-			properties->wavefrontSize = 64;
-
-			/* SGPR. */
-			properties->sgprsPerSimd =
-				pdevice->rad_info.num_physical_sgprs_per_simd;
-			properties->minSgprAllocation =
-				pdevice->rad_info.min_sgpr_alloc;
-			properties->maxSgprAllocation =
-				pdevice->rad_info.max_sgpr_alloc;
-			properties->sgprAllocationGranularity =
-				pdevice->rad_info.sgpr_alloc_granularity;
-
-			/* VGPR. */
-			properties->vgprsPerSimd =
-				pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
-			properties->minVgprAllocation =
-				pdevice->rad_info.min_wave64_vgpr_alloc;
-			properties->maxVgprAllocation =
-				pdevice->rad_info.max_vgpr_alloc;
-			properties->vgprAllocationGranularity =
-				pdevice->rad_info.wave64_vgpr_alloc_granularity;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
-			VkPhysicalDeviceShaderCoreProperties2AMD *properties =
-				(VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
-
-			properties->shaderCoreFeatures = 0;
-			properties->activeComputeUnitCount =
-				pdevice->rad_info.num_good_compute_units;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
-			VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
-				(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
-			properties->maxVertexAttribDivisor = UINT32_MAX;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {
-			VkPhysicalDeviceDescriptorIndexingProperties *properties =
-				(VkPhysicalDeviceDescriptorIndexingProperties*)ext;
-			CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);
-			CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);
-			CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);
-			CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);
-			CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);
-			CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);
-			CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);
-			CORE_PROPERTY(1, 2, quadDivergentImplicitLod);
-			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);
-			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);
-			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);
-			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);
-			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);
-			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);
-			CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);
-			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);
-			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);
-			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);
-			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);
-			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);
-			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);
-			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);
-			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
-			VkPhysicalDeviceProtectedMemoryProperties *properties =
-				(VkPhysicalDeviceProtectedMemoryProperties *)ext;
-			CORE_PROPERTY(1, 1, protectedNoFault);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
-			VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
-				(VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
-			properties->primitiveOverestimationSize = 0;
-			properties->maxExtraPrimitiveOverestimationSize = 0;
-			properties->extraPrimitiveOverestimationSizeGranularity = 0;
-			properties->primitiveUnderestimation = false;
-			properties->conservativePointAndLineRasterization = false;
-			properties->degenerateTrianglesRasterized = true;
-			properties->degenerateLinesRasterized = false;
-			properties->fullyCoveredFragmentShaderInputVariable = false;
-			properties->conservativeRasterizationPostDepthCoverage = false;
-			break;
-		}
+   vk_foreach_struct(ext, pProperties->pNext)
+   {
+      switch (ext->sType) {
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
+         VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
+            (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
+         properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
+         VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties *)ext;
+         CORE_PROPERTY(1, 1, deviceUUID);
+         CORE_PROPERTY(1, 1, driverUUID);
+         CORE_PROPERTY(1, 1, deviceLUID);
+         CORE_PROPERTY(1, 1, deviceLUIDValid);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
+         VkPhysicalDeviceMultiviewProperties *properties =
+            (VkPhysicalDeviceMultiviewProperties *)ext;
+         CORE_PROPERTY(1, 1, maxMultiviewViewCount);
+         CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
+         VkPhysicalDevicePointClippingProperties *properties =
+            (VkPhysicalDevicePointClippingProperties *)ext;
+         CORE_PROPERTY(1, 1, pointClippingBehavior);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
+         VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
+            (VkPhysicalDeviceDiscardRectanglePropertiesEXT *)ext;
+         properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
+         VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
+            (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *)ext;
+         properties->minImportedHostPointerAlignment = 4096;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
+         VkPhysicalDeviceSubgroupProperties *properties = (VkPhysicalDeviceSubgroupProperties *)ext;
+         CORE_PROPERTY(1, 1, subgroupSize);
+         CORE_RENAMED_PROPERTY(1, 1, supportedStages, subgroupSupportedStages);
+         CORE_RENAMED_PROPERTY(1, 1, supportedOperations, subgroupSupportedOperations);
+         CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages, subgroupQuadOperationsInAllStages);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
+         VkPhysicalDeviceMaintenance3Properties *properties =
+            (VkPhysicalDeviceMaintenance3Properties *)ext;
+         CORE_PROPERTY(1, 1, maxPerSetDescriptors);
+         CORE_PROPERTY(1, 1, maxMemoryAllocationSize);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
+         VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
+            (VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
+         CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);
+         CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
+         VkPhysicalDeviceShaderCorePropertiesAMD *properties =
+            (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
+
+         /* Shader engines. */
+         properties->shaderEngineCount = pdevice->rad_info.max_se;
+         properties->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se;
+         properties->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa;
+         properties->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit;
+         properties->wavefrontsPerSimd = pdevice->rad_info.max_wave64_per_simd;
+         properties->wavefrontSize = 64;
+
+         /* SGPR. */
+         properties->sgprsPerSimd = pdevice->rad_info.num_physical_sgprs_per_simd;
+         properties->minSgprAllocation = pdevice->rad_info.min_sgpr_alloc;
+         properties->maxSgprAllocation = pdevice->rad_info.max_sgpr_alloc;
+         properties->sgprAllocationGranularity = pdevice->rad_info.sgpr_alloc_granularity;
+
+         /* VGPR. */
+         properties->vgprsPerSimd = pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
+         properties->minVgprAllocation = pdevice->rad_info.min_wave64_vgpr_alloc;
+         properties->maxVgprAllocation = pdevice->rad_info.max_vgpr_alloc;
+         properties->vgprAllocationGranularity = pdevice->rad_info.wave64_vgpr_alloc_granularity;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
+         VkPhysicalDeviceShaderCoreProperties2AMD *properties =
+            (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
+
+         properties->shaderCoreFeatures = 0;
+         properties->activeComputeUnitCount = pdevice->rad_info.num_good_compute_units;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
+         VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
+            (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
+         properties->maxVertexAttribDivisor = UINT32_MAX;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {
+         VkPhysicalDeviceDescriptorIndexingProperties *properties =
+            (VkPhysicalDeviceDescriptorIndexingProperties *)ext;
+         CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);
+         CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);
+         CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);
+         CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);
+         CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);
+         CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);
+         CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);
+         CORE_PROPERTY(1, 2, quadDivergentImplicitLod);
+         CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);
+         CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);
+         CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);
+         CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);
+         CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);
+         CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);
+         CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);
+         CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);
+         CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);
+         CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);
+         CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);
+         CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);
+         CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);
+         CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);
+         CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
+         VkPhysicalDeviceProtectedMemoryProperties *properties =
+            (VkPhysicalDeviceProtectedMemoryProperties *)ext;
+         CORE_PROPERTY(1, 1, protectedNoFault);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
+         VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
+            (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
+         properties->primitiveOverestimationSize = 0;
+         properties->maxExtraPrimitiveOverestimationSize = 0;
+         properties->extraPrimitiveOverestimationSizeGranularity = 0;
+         properties->primitiveUnderestimation = false;
+         properties->conservativePointAndLineRasterization = false;
+         properties->degenerateTrianglesRasterized = true;
+         properties->degenerateLinesRasterized = false;
+         properties->fullyCoveredFragmentShaderInputVariable = false;
+         properties->conservativeRasterizationPostDepthCoverage = false;
+         break;
+      }
 #ifndef _WIN32
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
-			VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
-				(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
-			properties->pciDomain = pdevice->bus_info.domain;
-			properties->pciBus = pdevice->bus_info.bus;
-			properties->pciDevice = pdevice->bus_info.dev;
-			properties->pciFunction = pdevice->bus_info.func;
-			break;
-		}
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
+         VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
+            (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
+         properties->pciDomain = pdevice->bus_info.domain;
+         properties->pciBus = pdevice->bus_info.bus;
+         properties->pciDevice = pdevice->bus_info.dev;
+         properties->pciFunction = pdevice->bus_info.func;
+         break;
+      }
 #endif
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {
-			VkPhysicalDeviceDriverProperties *properties =
-				(VkPhysicalDeviceDriverProperties *) ext;
-			CORE_PROPERTY(1, 2, driverID);
-			CORE_PROPERTY(1, 2, driverName);
-			CORE_PROPERTY(1, 2, driverInfo);
-			CORE_PROPERTY(1, 2, conformanceVersion);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
-			VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
-				(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
-			properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
-			properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
-			properties->maxTransformFeedbackBufferSize = UINT32_MAX;
-			properties->maxTransformFeedbackStreamDataSize = 512;
-			properties->maxTransformFeedbackBufferDataSize = 512;
-			properties->maxTransformFeedbackBufferDataStride = 512;
-			properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
-			properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
-			properties->transformFeedbackRasterizationStreamSelect = false;
-			properties->transformFeedbackDraw = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
-			VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
-				(VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
-
-			props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
-			props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
-			props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
-			props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
-			props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
-			VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
-				(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
-
-			VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT |
-								  VK_SAMPLE_COUNT_4_BIT;
-			if (pdevice->rad_info.chip_class < GFX10) {
-				/* FIXME: Some MSAA8x tests fail for weird
-				 * reasons on GFX10+ when the same pattern is
-				 * used inside the same render pass.
-				 */
-				supported_samples |= VK_SAMPLE_COUNT_8_BIT;
-			}
-
-			properties->sampleLocationSampleCounts = supported_samples;
-			properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
-			properties->sampleLocationCoordinateRange[0] = 0.0f;
-			properties->sampleLocationCoordinateRange[1] = 0.9375f;
-			properties->sampleLocationSubPixelBits = 4;
-			properties->variableSampleLocations = false;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
-			VkPhysicalDeviceDepthStencilResolveProperties *properties =
-				(VkPhysicalDeviceDepthStencilResolveProperties *)ext;
-			CORE_PROPERTY(1, 2, supportedDepthResolveModes);
-			CORE_PROPERTY(1, 2, supportedStencilResolveModes);
-			CORE_PROPERTY(1, 2, independentResolveNone);
-			CORE_PROPERTY(1, 2, independentResolve);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
-			VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
-				(VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
-			properties->storageTexelBufferOffsetAlignmentBytes = 4;
-			properties->storageTexelBufferOffsetSingleTexelAlignment = true;
-			properties->uniformTexelBufferOffsetAlignmentBytes = 4;
-			properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES : {
-			VkPhysicalDeviceFloatControlsProperties *properties =
-				(VkPhysicalDeviceFloatControlsProperties *)ext;
-			CORE_PROPERTY(1, 2, denormBehaviorIndependence);
-			CORE_PROPERTY(1, 2, roundingModeIndependence);
-			CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);
-			CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);
-			CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);
-			CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);
-			CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);
-			CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);
-			CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);
-			CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);
-			CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);
-			CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);
-			CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);
-			CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);
-			CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);
-			CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);
-			CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
-			VkPhysicalDeviceTimelineSemaphoreProperties *properties =
-				(VkPhysicalDeviceTimelineSemaphoreProperties *) ext;
-			CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
-			VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
-				(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
-			props->minSubgroupSize = 64;
-			props->maxSubgroupSize = 64;
-			props->maxComputeWorkgroupSubgroups = UINT32_MAX;
-			props->requiredSubgroupSizeStages = 0;
-
-			if (pdevice->rad_info.chip_class >= GFX10) {
-				/* Only GFX10+ supports wave32. */
-				props->minSubgroupSize = 32;
-				props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
-			}
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:
-			radv_get_physical_device_properties_1_1(pdevice, (void *)ext);
-			break;
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
-			radv_get_physical_device_properties_1_2(pdevice, (void *)ext);
-			break;
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
-			VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
-				(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
-			props->lineSubPixelPrecisionBits = 4;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
-			VkPhysicalDeviceRobustness2PropertiesEXT *properties =
-				(VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
-			properties->robustStorageBufferAccessSizeAlignment = 4;
-			properties->robustUniformBufferAccessSizeAlignment = 4;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
-			VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
-				(VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
-			props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
-			VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
-				(VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
-			props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
-			props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
-			props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
-			props->primitiveFragmentShadingRateWithMultipleViewports = true;
-			props->layeredShadingRateAttachments = false;
-			props->fragmentShadingRateNonTrivialCombinerOps = true;
-			props->maxFragmentSize = (VkExtent2D) { 2, 2 };
-			props->maxFragmentSizeAspectRatio = 1;
-			props->maxFragmentShadingRateCoverageSamples = 2 * 2;
-			props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
-			props->fragmentShadingRateWithShaderDepthStencilWrites = false;
-			props->fragmentShadingRateWithSampleMask = true;
-			props->fragmentShadingRateWithShaderSampleMask = false;
-			props->fragmentShadingRateWithConservativeRasterization = true;
-			props->fragmentShadingRateWithFragmentShaderInterlock = false;
-			props->fragmentShadingRateWithCustomSampleLocations = true;
-			props->fragmentShadingRateStrictMultiplyCombiner = true;
-			break;
-		}
-		default:
-			break;
-		}
-	}
-}
-
-static void radv_get_physical_device_queue_family_properties(
-	struct radv_physical_device*                pdevice,
-	uint32_t*                                   pCount,
-	VkQueueFamilyProperties**                    pQueueFamilyProperties)
-{
-	int num_queue_families = 1;
-	int idx;
-	if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
-	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
-		num_queue_families++;
-
-	if (pQueueFamilyProperties == NULL) {
-		*pCount = num_queue_families;
-		return;
-	}
-
-	if (!*pCount)
-		return;
-
-	idx = 0;
-	if (*pCount >= 1) {
-		*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
-			.queueFlags = VK_QUEUE_GRAPHICS_BIT |
-			              VK_QUEUE_COMPUTE_BIT |
-			              VK_QUEUE_TRANSFER_BIT |
-			              VK_QUEUE_SPARSE_BINDING_BIT,
-			.queueCount = 1,
-			.timestampValidBits = 64,
-			.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
-		};
-		idx++;
-	}
-
-	if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
-	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
-		if (*pCount > idx) {
-			*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
-				.queueFlags = VK_QUEUE_COMPUTE_BIT |
-				              VK_QUEUE_TRANSFER_BIT |
-				              VK_QUEUE_SPARSE_BINDING_BIT,
-				.queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
-				.timestampValidBits = 64,
-				.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
-			};
-			idx++;
-		}
-	}
-	*pCount = idx;
-}
-
-void radv_GetPhysicalDeviceQueueFamilyProperties(
-	VkPhysicalDevice                            physicalDevice,
-	uint32_t*                                   pCount,
-	VkQueueFamilyProperties*                    pQueueFamilyProperties)
-{
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-	if (!pQueueFamilyProperties) {
-		radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
-		return;
-	}
-	VkQueueFamilyProperties *properties[] = {
-		pQueueFamilyProperties + 0,
-		pQueueFamilyProperties + 1,
-		pQueueFamilyProperties + 2,
-	};
-	radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
-	assert(*pCount <= 3);
-}
-
-void radv_GetPhysicalDeviceQueueFamilyProperties2(
-	VkPhysicalDevice                            physicalDevice,
-	uint32_t*                                   pCount,
-	VkQueueFamilyProperties2                   *pQueueFamilyProperties)
-{
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-	if (!pQueueFamilyProperties) {
-		radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
-		return;
-	}
-	VkQueueFamilyProperties *properties[] = {
-		&pQueueFamilyProperties[0].queueFamilyProperties,
-		&pQueueFamilyProperties[1].queueFamilyProperties,
-		&pQueueFamilyProperties[2].queueFamilyProperties,
-	};
-	radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
-	assert(*pCount <= 3);
-}
-
-void radv_GetPhysicalDeviceMemoryProperties(
-	VkPhysicalDevice                            physicalDevice,
-	VkPhysicalDeviceMemoryProperties           *pMemoryProperties)
-{
-	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
-
-	*pMemoryProperties = physical_device->memory_properties;
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {
+         VkPhysicalDeviceDriverProperties *properties = (VkPhysicalDeviceDriverProperties *)ext;
+         CORE_PROPERTY(1, 2, driverID);
+         CORE_PROPERTY(1, 2, driverName);
+         CORE_PROPERTY(1, 2, driverInfo);
+         CORE_PROPERTY(1, 2, conformanceVersion);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
+         VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
+            (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
+         properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
+         properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
+         properties->maxTransformFeedbackBufferSize = UINT32_MAX;
+         properties->maxTransformFeedbackStreamDataSize = 512;
+         properties->maxTransformFeedbackBufferDataSize = 512;
+         properties->maxTransformFeedbackBufferDataStride = 512;
+         properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
+         properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
+         properties->transformFeedbackRasterizationStreamSelect = false;
+         properties->transformFeedbackDraw = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
+         VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
+            (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
+
+         props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
+         props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
+         props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
+            MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
+         props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
+         props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
+         VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
+            (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
+
+         VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
+         if (pdevice->rad_info.chip_class < GFX10) {
+            /* FIXME: Some MSAA8x tests fail for weird
+             * reasons on GFX10+ when the same pattern is
+             * used inside the same render pass.
+             */
+            supported_samples |= VK_SAMPLE_COUNT_8_BIT;
+         }
+
+         properties->sampleLocationSampleCounts = supported_samples;
+         properties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
+         properties->sampleLocationCoordinateRange[0] = 0.0f;
+         properties->sampleLocationCoordinateRange[1] = 0.9375f;
+         properties->sampleLocationSubPixelBits = 4;
+         properties->variableSampleLocations = false;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
+         VkPhysicalDeviceDepthStencilResolveProperties *properties =
+            (VkPhysicalDeviceDepthStencilResolveProperties *)ext;
+         CORE_PROPERTY(1, 2, supportedDepthResolveModes);
+         CORE_PROPERTY(1, 2, supportedStencilResolveModes);
+         CORE_PROPERTY(1, 2, independentResolveNone);
+         CORE_PROPERTY(1, 2, independentResolve);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
+         VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
+            (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
+         properties->storageTexelBufferOffsetAlignmentBytes = 4;
+         properties->storageTexelBufferOffsetSingleTexelAlignment = true;
+         properties->uniformTexelBufferOffsetAlignmentBytes = 4;
+         properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES: {
+         VkPhysicalDeviceFloatControlsProperties *properties =
+            (VkPhysicalDeviceFloatControlsProperties *)ext;
+         CORE_PROPERTY(1, 2, denormBehaviorIndependence);
+         CORE_PROPERTY(1, 2, roundingModeIndependence);
+         CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);
+         CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);
+         CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);
+         CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);
+         CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);
+         CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);
+         CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);
+         CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);
+         CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);
+         CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);
+         CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);
+         CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);
+         CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);
+         CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);
+         CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
+         VkPhysicalDeviceTimelineSemaphoreProperties *properties =
+            (VkPhysicalDeviceTimelineSemaphoreProperties *)ext;
+         CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
+         VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
+            (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
+         props->minSubgroupSize = 64;
+         props->maxSubgroupSize = 64;
+         props->maxComputeWorkgroupSubgroups = UINT32_MAX;
+         props->requiredSubgroupSizeStages = 0;
+
+         if (pdevice->rad_info.chip_class >= GFX10) {
+            /* Only GFX10+ supports wave32. */
+            props->minSubgroupSize = 32;
+            props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
+         }
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:
+         radv_get_physical_device_properties_1_1(pdevice, (void *)ext);
+         break;
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
+         radv_get_physical_device_properties_1_2(pdevice, (void *)ext);
+         break;
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
+         VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
+            (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
+         props->lineSubPixelPrecisionBits = 4;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
+         VkPhysicalDeviceRobustness2PropertiesEXT *properties =
+            (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
+         properties->robustStorageBufferAccessSizeAlignment = 4;
+         properties->robustUniformBufferAccessSizeAlignment = 4;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
+         VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
+            (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
+         props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
+         VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
+            (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
+         props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){0, 0};
+         props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){0, 0};
+         props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
+         props->primitiveFragmentShadingRateWithMultipleViewports = true;
+         props->layeredShadingRateAttachments = false;
+         props->fragmentShadingRateNonTrivialCombinerOps = true;
+         props->maxFragmentSize = (VkExtent2D){2, 2};
+         props->maxFragmentSizeAspectRatio = 1;
+         props->maxFragmentShadingRateCoverageSamples = 2 * 2;
+         props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
+         props->fragmentShadingRateWithShaderDepthStencilWrites = false;
+         props->fragmentShadingRateWithSampleMask = true;
+         props->fragmentShadingRateWithShaderSampleMask = false;
+         props->fragmentShadingRateWithConservativeRasterization = true;
+         props->fragmentShadingRateWithFragmentShaderInterlock = false;
+         props->fragmentShadingRateWithCustomSampleLocations = true;
+         props->fragmentShadingRateStrictMultiplyCombiner = true;
+         break;
+      }
+      default:
+         break;
+      }
+   }
+}
+
+static void
+radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice,
+                                                 uint32_t *pCount,
+                                                 VkQueueFamilyProperties **pQueueFamilyProperties)
+{
+   int num_queue_families = 1;
+   int idx;
+   if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
+       !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
+      num_queue_families++;
+
+   if (pQueueFamilyProperties == NULL) {
+      *pCount = num_queue_families;
+      return;
+   }
+
+   if (!*pCount)
+      return;
+
+   idx = 0;
+   if (*pCount >= 1) {
+      *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
+         .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |
+                       VK_QUEUE_SPARSE_BINDING_BIT,
+         .queueCount = 1,
+         .timestampValidBits = 64,
+         .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
+      };
+      idx++;
+   }
+
+   if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
+       !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
+      if (*pCount > idx) {
+         *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
+            .queueFlags =
+               VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
+            .queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
+            .timestampValidBits = 64,
+            .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
+         };
+         idx++;
+      }
+   }
+   *pCount = idx;
+}
+
+void
+radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t *pCount,
+                                            VkQueueFamilyProperties *pQueueFamilyProperties)
+{
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+   if (!pQueueFamilyProperties) {
+      radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
+      return;
+   }
+   VkQueueFamilyProperties *properties[] = {
+      pQueueFamilyProperties + 0,
+      pQueueFamilyProperties + 1,
+      pQueueFamilyProperties + 2,
+   };
+   radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
+   assert(*pCount <= 3);
+}
+
+void
+radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
+                                             VkQueueFamilyProperties2 *pQueueFamilyProperties)
+{
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+   if (!pQueueFamilyProperties) {
+      radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
+      return;
+   }
+   VkQueueFamilyProperties *properties[] = {
+      &pQueueFamilyProperties[0].queueFamilyProperties,
+      &pQueueFamilyProperties[1].queueFamilyProperties,
+      &pQueueFamilyProperties[2].queueFamilyProperties,
+   };
+   radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
+   assert(*pCount <= 3);
+}
+
+void
+radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
+                                       VkPhysicalDeviceMemoryProperties *pMemoryProperties)
+{
+   RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+
+   *pMemoryProperties = physical_device->memory_properties;
 }
 
 static void
 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
-				  VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
-{
-	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
-
-	/* For all memory heaps, the computation of budget is as follow:
-	 *	heap_budget = heap_size - global_heap_usage + app_heap_usage
-	 *
-	 * The Vulkan spec 1.1.97 says that the budget should include any
-	 * currently allocated device memory.
-	 *
-	 * Note that the application heap usages are not really accurate (eg.
-	 * in presence of shared buffers).
-	 */
-	unsigned mask = device->heaps;
-	unsigned heap = 0;
-	while (mask) {
-		uint64_t internal_usage = 0, total_usage = 0;
-		unsigned type = 1u << u_bit_scan(&mask);
-
-		switch(type) {
-		case RADV_HEAP_VRAM:
-			internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
-			total_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
-			break;
-		case RADV_HEAP_VRAM_VIS:
-			internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
-			if (!(device->heaps & RADV_HEAP_VRAM))
-				internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
-			total_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
-			break;
-		case RADV_HEAP_GTT:
-			internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
-			total_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
-			break;
-		}
-
-		uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
-			MIN2(device->memory_properties.memoryHeaps[heap].size,
-			     total_usage);
-		memoryBudget->heapBudget[heap] = free_space + internal_usage;
-		memoryBudget->heapUsage[heap] = internal_usage;
-		++heap;
-	}
-
-	assert(heap == memory_properties->memoryHeapCount);
-
-	/* The heapBudget and heapUsage values must be zero for array elements
-	 * greater than or equal to
-	 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
-	 */
-	for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
-		memoryBudget->heapBudget[i] = 0;
-		memoryBudget->heapUsage[i] = 0;
-	}
-}
-
-void radv_GetPhysicalDeviceMemoryProperties2(
-	VkPhysicalDevice                            physicalDevice,
-	VkPhysicalDeviceMemoryProperties2          *pMemoryProperties)
-{
-	radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
-					       &pMemoryProperties->memoryProperties);
-
-	VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
-		vk_find_struct(pMemoryProperties->pNext,
-			       PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
-	if (memory_budget)
-		radv_get_memory_budget_properties(physicalDevice, memory_budget);
-}
-
-VkResult radv_GetMemoryHostPointerPropertiesEXT(
-	VkDevice                                    _device,
-	VkExternalMemoryHandleTypeFlagBits          handleType,
-	const void                                 *pHostPointer,
-	VkMemoryHostPointerPropertiesEXT           *pMemoryHostPointerProperties)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-
-	switch (handleType)
-	{
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
-		const struct radv_physical_device *physical_device = device->physical_device;
-		uint32_t memoryTypeBits = 0;
-		for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
-			if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
-			    !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
-				memoryTypeBits = (1 << i);
-				break;
-			}
-		}
-		pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
-		return VK_SUCCESS;
-	}
-	default:
-		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-	}
+                                  VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
+{
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+   VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
+
+   /* For all memory heaps, the computation of budget is as follow:
+    *	heap_budget = heap_size - global_heap_usage + app_heap_usage
+    *
+    * The Vulkan spec 1.1.97 says that the budget should include any
+    * currently allocated device memory.
+    *
+    * Note that the application heap usages are not really accurate (eg.
+    * in presence of shared buffers).
+    */
+   unsigned mask = device->heaps;
+   unsigned heap = 0;
+   while (mask) {
+      uint64_t internal_usage = 0, total_usage = 0;
+      unsigned type = 1u << u_bit_scan(&mask);
+
+      switch (type) {
+      case RADV_HEAP_VRAM:
+         internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
+         total_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
+         break;
+      case RADV_HEAP_VRAM_VIS:
+         internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
+         if (!(device->heaps & RADV_HEAP_VRAM))
+            internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
+         total_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
+         break;
+      case RADV_HEAP_GTT:
+         internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
+         total_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
+         break;
+      }
+
+      uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
+                            MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage);
+      memoryBudget->heapBudget[heap] = free_space + internal_usage;
+      memoryBudget->heapUsage[heap] = internal_usage;
+      ++heap;
+   }
+
+   assert(heap == memory_properties->memoryHeapCount);
+
+   /* The heapBudget and heapUsage values must be zero for array elements
+    * greater than or equal to
+    * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
+    */
+   for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
+      memoryBudget->heapBudget[i] = 0;
+      memoryBudget->heapUsage[i] = 0;
+   }
+}
+
+void
+radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
+                                        VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
+{
+   radv_GetPhysicalDeviceMemoryProperties(physicalDevice, &pMemoryProperties->memoryProperties);
+
+   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
+      vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
+   if (memory_budget)
+      radv_get_memory_budget_properties(physicalDevice, memory_budget);
+}
+
+VkResult
+radv_GetMemoryHostPointerPropertiesEXT(
+   VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer,
+   VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+
+   switch (handleType) {
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
+      const struct radv_physical_device *physical_device = device->physical_device;
+      uint32_t memoryTypeBits = 0;
+      for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
+         if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
+             !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
+            memoryTypeBits = (1 << i);
+            break;
+         }
+      }
+      pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
+      return VK_SUCCESS;
+   }
+   default:
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+   }
 }
 
 static enum radeon_ctx_priority
 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
 {
-	/* Default to MEDIUM when a specific global priority isn't requested */
-	if (!pObj)
-		return RADEON_CTX_PRIORITY_MEDIUM;
-
-	switch(pObj->globalPriority) {
-	case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
-		return RADEON_CTX_PRIORITY_REALTIME;
-	case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
-		return RADEON_CTX_PRIORITY_HIGH;
-	case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
-		return RADEON_CTX_PRIORITY_MEDIUM;
-	case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
-		return RADEON_CTX_PRIORITY_LOW;
-	default:
-		unreachable("Illegal global priority value");
-		return RADEON_CTX_PRIORITY_INVALID;
-	}
+   /* Default to MEDIUM when a specific global priority isn't requested */
+   if (!pObj)
+      return RADEON_CTX_PRIORITY_MEDIUM;
+
+   switch (pObj->globalPriority) {
+   case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
+      return RADEON_CTX_PRIORITY_REALTIME;
+   case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
+      return RADEON_CTX_PRIORITY_HIGH;
+   case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
+      return RADEON_CTX_PRIORITY_MEDIUM;
+   case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
+      return RADEON_CTX_PRIORITY_LOW;
+   default:
+      unreachable("Illegal global priority value");
+      return RADEON_CTX_PRIORITY_INVALID;
+   }
 }
 
 static int
-radv_queue_init(struct radv_device *device, struct radv_queue *queue,
-		uint32_t queue_family_index, int idx,
-		VkDeviceQueueCreateFlags flags,
-		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
+radv_queue_init(struct radv_device *device, struct radv_queue *queue, uint32_t queue_family_index,
+                int idx, VkDeviceQueueCreateFlags flags,
+                const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
 {
-	queue->device = device;
-	queue->queue_family_index = queue_family_index;
-	queue->queue_idx = idx;
-	queue->priority = radv_get_queue_global_priority(global_priority);
-	queue->flags = flags;
-	queue->hw_ctx = device->hw_ctx[queue->priority];
+   queue->device = device;
+   queue->queue_family_index = queue_family_index;
+   queue->queue_idx = idx;
+   queue->priority = radv_get_queue_global_priority(global_priority);
+   queue->flags = flags;
+   queue->hw_ctx = device->hw_ctx[queue->priority];
 
-	vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
+   vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
 
-	list_inithead(&queue->pending_submissions);
-	mtx_init(&queue->pending_mutex, mtx_plain);
+   list_inithead(&queue->pending_submissions);
+   mtx_init(&queue->pending_mutex, mtx_plain);
 
-	mtx_init(&queue->thread_mutex, mtx_plain);
-	if (u_cnd_monotonic_init(&queue->thread_cond)) {
-		vk_object_base_finish(&queue->base);
-		return vk_error(device->instance, VK_ERROR_INITIALIZATION_FAILED);
-	}
-	queue->cond_created = true;
+   mtx_init(&queue->thread_mutex, mtx_plain);
+   if (u_cnd_monotonic_init(&queue->thread_cond)) {
+      vk_object_base_finish(&queue->base);
+      return vk_error(device->instance, VK_ERROR_INITIALIZATION_FAILED);
+   }
+   queue->cond_created = true;
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
 static void
 radv_queue_finish(struct radv_queue *queue)
 {
-	if (queue->hw_ctx) {
-		if (queue->cond_created) {
-			if (queue->thread_running) {
-				p_atomic_set(&queue->thread_exit, true);
-				u_cnd_monotonic_broadcast(&queue->thread_cond);
-				thrd_join(queue->submission_thread, NULL);
-			}
-
-			u_cnd_monotonic_destroy(&queue->thread_cond);
-		}
-
-		mtx_destroy(&queue->pending_mutex);
-		mtx_destroy(&queue->thread_mutex);
-	}
-
-	if (queue->initial_full_flush_preamble_cs)
-		queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
-	if (queue->initial_preamble_cs)
-		queue->device->ws->cs_destroy(queue->initial_preamble_cs);
-	if (queue->continue_preamble_cs)
-		queue->device->ws->cs_destroy(queue->continue_preamble_cs);
-	if (queue->descriptor_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
-	if (queue->scratch_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
-	if (queue->esgs_ring_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
-	if (queue->gsvs_ring_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
-	if (queue->tess_rings_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, queue->tess_rings_bo);
-	if (queue->gds_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_bo);
-	if (queue->gds_oa_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_oa_bo);
-	if (queue->compute_scratch_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
-
-	vk_object_base_finish(&queue->base);
+   if (queue->hw_ctx) {
+      if (queue->cond_created) {
+         if (queue->thread_running) {
+            p_atomic_set(&queue->thread_exit, true);
+            u_cnd_monotonic_broadcast(&queue->thread_cond);
+            thrd_join(queue->submission_thread, NULL);
+         }
+
+         u_cnd_monotonic_destroy(&queue->thread_cond);
+      }
+
+      mtx_destroy(&queue->pending_mutex);
+      mtx_destroy(&queue->thread_mutex);
+   }
+
+   if (queue->initial_full_flush_preamble_cs)
+      queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
+   if (queue->initial_preamble_cs)
+      queue->device->ws->cs_destroy(queue->initial_preamble_cs);
+   if (queue->continue_preamble_cs)
+      queue->device->ws->cs_destroy(queue->continue_preamble_cs);
+   if (queue->descriptor_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
+   if (queue->scratch_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
+   if (queue->esgs_ring_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
+   if (queue->gsvs_ring_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
+   if (queue->tess_rings_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, queue->tess_rings_bo);
+   if (queue->gds_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_bo);
+   if (queue->gds_oa_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_oa_bo);
+   if (queue->compute_scratch_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
+
+   vk_object_base_finish(&queue->base);
 }
 
 static void
 radv_device_init_gs_info(struct radv_device *device)
 {
-	device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
-						       device->physical_device->rad_info.family);
+   device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
+                                                  device->physical_device->rad_info.family);
 }
 
 static VkResult
 check_physical_device_features(VkPhysicalDevice physicalDevice,
-			       const VkPhysicalDeviceFeatures *features)
+                               const VkPhysicalDeviceFeatures *features)
 {
-	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
-	VkPhysicalDeviceFeatures supported_features;
-	radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
-	VkBool32 *supported_feature = (VkBool32 *)&supported_features;
-	VkBool32 *enabled_feature = (VkBool32 *)features;
-	unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
-	for (uint32_t i = 0; i < num_features; i++) {
-		if (enabled_feature[i] && !supported_feature[i])
-			return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
-	}
+   RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+   VkPhysicalDeviceFeatures supported_features;
+   radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
+   VkBool32 *supported_feature = (VkBool32 *)&supported_features;
+   VkBool32 *enabled_feature = (VkBool32 *)features;
+   unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
+   for (uint32_t i = 0; i < num_features; i++) {
+      if (enabled_feature[i] && !supported_feature[i])
+         return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
+   }
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-static VkResult radv_device_init_border_color(struct radv_device *device)
+static VkResult
+radv_device_init_border_color(struct radv_device *device)
 {
-	VkResult result;
+   VkResult result;
 
-	device->border_color_data.bo =
-	device->ws->buffer_create(device->ws,
-					RADV_BORDER_COLOR_BUFFER_SIZE,
-					4096,
-					RADEON_DOMAIN_VRAM,
-					RADEON_FLAG_CPU_ACCESS |
-					RADEON_FLAG_READ_ONLY |
-					RADEON_FLAG_NO_INTERPROCESS_SHARING,
-					RADV_BO_PRIORITY_SHADER);
+   device->border_color_data.bo = device->ws->buffer_create(
+      device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
+      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
+      RADV_BO_PRIORITY_SHADER);
 
-	if (device->border_color_data.bo == NULL)
-		return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   if (device->border_color_data.bo == NULL)
+      return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
 
-	result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
-	if (result != VK_SUCCESS)
-		return vk_error(device->physical_device->instance, result);
+   result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
+   if (result != VK_SUCCESS)
+      return vk_error(device->physical_device->instance, result);
 
-	device->border_color_data.colors_gpu_ptr =
-		device->ws->buffer_map(device->border_color_data.bo);
-	if (!device->border_color_data.colors_gpu_ptr)
-		return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-	mtx_init(&device->border_color_data.mutex, mtx_plain);
+   device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo);
+   if (!device->border_color_data.colors_gpu_ptr)
+      return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   mtx_init(&device->border_color_data.mutex, mtx_plain);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-static void radv_device_finish_border_color(struct radv_device *device)
+static void
+radv_device_finish_border_color(struct radv_device *device)
 {
-	if (device->border_color_data.bo) {
-		device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
-		device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
+   if (device->border_color_data.bo) {
+      device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
+      device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
 
-		mtx_destroy(&device->border_color_data.mutex);
-	}
+      mtx_destroy(&device->border_color_data.mutex);
+   }
 }
 
 VkResult
-_radv_device_set_lost(struct radv_device *device,
-		      const char *file, int line,
-		      const char *msg, ...)
-{
-	VkResult err;
-	va_list ap;
-
-	p_atomic_inc(&device->lost);
-
-	va_start(ap, msg);
-	err = __vk_errorv(device->physical_device->instance, device,
-			  VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,
-			  VK_ERROR_DEVICE_LOST, file, line, msg, ap);
-	va_end(ap);
-
-	return err;
-}
-
-VkResult radv_CreateDevice(
-	VkPhysicalDevice                            physicalDevice,
-	const VkDeviceCreateInfo*                   pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkDevice*                                   pDevice)
-{
-	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
-	VkResult result;
-	struct radv_device *device;
-
-	bool keep_shader_info = false;
-	bool robust_buffer_access = false;
-	bool robust_buffer_access2 = false;
-	bool overallocation_disallowed = false;
-	bool custom_border_colors = false;
-	bool vrs_enabled = false;
-
-	/* Check enabled features */
-	if (pCreateInfo->pEnabledFeatures) {
-		result = check_physical_device_features(physicalDevice,
-							pCreateInfo->pEnabledFeatures);
-		if (result != VK_SUCCESS)
-			return result;
-
-		if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
-			robust_buffer_access = true;
-	}
-
-	vk_foreach_struct_const(ext, pCreateInfo->pNext) {
-		switch (ext->sType) {
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
-			const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
-			result = check_physical_device_features(physicalDevice,
-								&features->features);
-			if (result != VK_SUCCESS)
-				return result;
-
-			if (features->features.robustBufferAccess)
-				robust_buffer_access = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
-			const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
-			if (overallocation->overallocationBehavior == VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
-				overallocation_disallowed = true;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
-			const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
-			custom_border_colors = border_color_features->customBorderColors;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
-			const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext;
-			vrs_enabled = vrs->pipelineFragmentShadingRate ||
-				      vrs->primitiveFragmentShadingRate ||
-				      vrs->attachmentFragmentShadingRate;
-			break;
-		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
-			const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext;
-			if (features->robustBufferAccess2)
-				robust_buffer_access2 = true;
-			break;
-		}
-		default:
-			break;
-		}
-	}
-
-	device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
-			    sizeof(*device), 8,
-			    VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
-	if (!device)
-		return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	struct vk_device_dispatch_table dispatch_table;
-
-	if (radv_thread_trace_enabled()) {
-		vk_device_dispatch_table_from_entrypoints(&dispatch_table,
-							  &sqtt_device_entrypoints, true);
-		vk_device_dispatch_table_from_entrypoints(&dispatch_table,
-							  &radv_device_entrypoints, false);
-	} else {
-		vk_device_dispatch_table_from_entrypoints(&dispatch_table,
-							  &radv_device_entrypoints, true);
-	}
-
-	result = vk_device_init(&device->vk, &physical_device->vk,
-				&dispatch_table, pCreateInfo, pAllocator);
-	if (result != VK_SUCCESS) {
-		vk_free(&device->vk.alloc, device);
-		return result;
-	}
-
-	device->instance = physical_device->instance;
-	device->physical_device = physical_device;
-
-	device->ws = physical_device->ws;
-
-	keep_shader_info = device->vk.enabled_extensions.AMD_shader_info;
-
-	/* With update after bind we can't attach bo's to the command buffer
-	 * from the descriptor set anymore, so we have to use a global BO list.
-	 */
-	device->use_global_bo_list =
-		(device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
-		device->vk.enabled_extensions.EXT_descriptor_indexing ||
-		device->vk.enabled_extensions.EXT_buffer_device_address ||
-		device->vk.enabled_extensions.KHR_buffer_device_address;
-
-	device->robust_buffer_access = robust_buffer_access || robust_buffer_access2;
-	device->robust_buffer_access2 = robust_buffer_access2;
-
-	device->adjust_frag_coord_z = (vrs_enabled ||
-				       device->vk.enabled_extensions.KHR_fragment_shading_rate ||
-				       device->force_vrs != RADV_FORCE_VRS_NONE) &&
-				      (device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID ||
-				       device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER ||
-				       device->physical_device->rad_info.family == CHIP_VANGOGH);
-
-	mtx_init(&device->shader_slab_mutex, mtx_plain);
-	list_inithead(&device->shader_slabs);
-
-	device->overallocation_disallowed = overallocation_disallowed;
-	mtx_init(&device->overallocation_mutex, mtx_plain);
-
-	/* Create one context per queue priority. */
-	for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
-		const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
-		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
-			vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
-		enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
-
-		if (device->hw_ctx[priority])
-			continue;
-
-		result = device->ws->ctx_create(device->ws, priority,
-						&device->hw_ctx[priority]);
-		if (result != VK_SUCCESS)
-			goto fail;
-	}
-
-	for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
-		const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
-		uint32_t qfi = queue_create->queueFamilyIndex;
-		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
-			vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
-
-		device->queues[qfi] = vk_alloc(&device->vk.alloc,
-					       queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
-		if (!device->queues[qfi]) {
-			result = VK_ERROR_OUT_OF_HOST_MEMORY;
-			goto fail;
-		}
-
-		memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
-
-		device->queue_count[qfi] = queue_create->queueCount;
-
-		for (unsigned q = 0; q < queue_create->queueCount; q++) {
-			result = radv_queue_init(device, &device->queues[qfi][q],
-						 qfi, q, queue_create->flags,
-						 global_priority);
-			if (result != VK_SUCCESS)
-				goto fail;
-		}
-	}
-
-	device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
-			      !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
-
-	/* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
-	device->dfsm_allowed = device->pbb_allowed &&
-	                       (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
-
-	/* The maximum number of scratch waves. Scratch space isn't divided
-	 * evenly between CUs. The number is only a function of the number of CUs.
-	 * We can decrease the constant to decrease the scratch buffer size.
-	 *
-	 * sctx->scratch_waves must be >= the maximum possible size of
-	 * 1 threadgroup, so that the hw doesn't hang from being unable
-	 * to start any.
-	 *
-	 * The recommended value is 4 per CU at most. Higher numbers don't
-	 * bring much benefit, but they still occupy chip resources (think
-	 * async compute). I've seen ~2% performance difference between 4 and 32.
-	 */
-	uint32_t max_threads_per_block = 2048;
-	device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
-				     max_threads_per_block / 64);
-
-	device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
-
-	if (device->physical_device->rad_info.chip_class >= GFX7) {
-		/* If the KMD allows it (there is a KMD hw register for it),
-		 * allow launching waves out-of-order.
-		 */
-		device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
-	}
-
-	radv_device_init_gs_info(device);
-
-	device->tess_offchip_block_dw_size =
-		device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
-
-	if (getenv("RADV_TRACE_FILE")) {
-		fprintf(stderr, "***********************************************************************************\n");
-		fprintf(stderr, "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
-		fprintf(stderr, "***********************************************************************************\n");
-		abort();
-	}
-
-	if (device->instance->debug_flags & RADV_DEBUG_HANG) {
-		/* Enable GPU hangs detection and dump logs if a GPU hang is
-		 * detected.
-		 */
-		keep_shader_info = true;
-
-		if (!radv_init_trace(device))
-			goto fail;
-
-		fprintf(stderr, "*****************************************************************************\n");
-		fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
-		fprintf(stderr, "*****************************************************************************\n");
-
-		/* Wait for idle after every draw/dispatch to identify the
-		 * first bad call.
-		 */
-		device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
-
-		radv_dump_enabled_options(device, stderr);
-	}
-
-	if (radv_thread_trace_enabled()) {
-		fprintf(stderr, "*************************************************\n");
-		fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
-		fprintf(stderr, "*************************************************\n");
-
-		if (device->physical_device->rad_info.chip_class < GFX8 ||
-		    device->physical_device->rad_info.chip_class > GFX10_3) {
-			fprintf(stderr, "GPU hardware not supported: refer to "
-					"the RGP documentation for the list of "
-					"supported GPUs!\n");
-			abort();
-		}
-
-		if (!radv_thread_trace_init(device))
-			goto fail;
-	}
-
-	if (getenv("RADV_TRAP_HANDLER")) {
-		/* TODO: Add support for more hardware. */
-		assert(device->physical_device->rad_info.chip_class == GFX8);
-
-		fprintf(stderr, "**********************************************************************\n");
-		fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
-		fprintf(stderr, "**********************************************************************\n");
-
-		/* To get the disassembly of the faulty shaders, we have to
-		 * keep some shader info around.
-		 */
-		keep_shader_info = true;
-
-		if (!radv_trap_handler_init(device))
-			goto fail;
-	}
-
-	if (getenv("RADV_FORCE_VRS")) {
-		const char *vrs_rates = getenv("RADV_FORCE_VRS");
-
-		if (device->physical_device->rad_info.chip_class < GFX10_3)
-			fprintf(stderr, "radv: VRS is only supported on RDNA2+\n");
-		else if (device->physical_device->use_llvm)
-			fprintf(stderr, "radv: Forcing VRS rates is only supported with ACO\n");
-		else if (!strcmp(vrs_rates, "2x2"))
-			device->force_vrs = RADV_FORCE_VRS_2x2;
-		else if (!strcmp(vrs_rates, "2x1"))
-			device->force_vrs = RADV_FORCE_VRS_2x1;
-		else if (!strcmp(vrs_rates, "1x2"))
-			device->force_vrs = RADV_FORCE_VRS_1x2;
-		else
-			fprintf(stderr, "radv: Invalid VRS rates specified "
-					"(valid values are 2x2, 2x1 and 1x2)\n");
-	}
-
-	device->keep_shader_info = keep_shader_info;
-	result = radv_device_init_meta(device);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	radv_device_init_msaa(device);
-
- 	/* If the border color extension is enabled, let's create the buffer we need. */
-	if (custom_border_colors) {
-		result = radv_device_init_border_color(device);
-		if (result != VK_SUCCESS)
-			goto fail;
-	}
-
-	for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
-		device->empty_cs[family] = device->ws->cs_create(device->ws, family);
-		if (!device->empty_cs[family])
-			goto fail;
-
-		switch (family) {
-		case RADV_QUEUE_GENERAL:
-			radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
-			radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
-			radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
-			break;
-		case RADV_QUEUE_COMPUTE:
-			radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
-			radeon_emit(device->empty_cs[family], 0);
-			break;
-		}
-
-		result = device->ws->cs_finalize(device->empty_cs[family]);
-		if (result != VK_SUCCESS)
-			goto fail;
-	}
-
-	if (device->physical_device->rad_info.chip_class >= GFX7)
-		cik_create_gfx_config(device);
-
-	VkPipelineCacheCreateInfo ci;
-	ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
-	ci.pNext = NULL;
-	ci.flags = 0;
-	ci.pInitialData = NULL;
-	ci.initialDataSize = 0;
-	VkPipelineCache pc;
-	result = radv_CreatePipelineCache(radv_device_to_handle(device),
-					  &ci, NULL, &pc);
-	if (result != VK_SUCCESS)
-		goto fail_meta;
-
-	device->mem_cache = radv_pipeline_cache_from_handle(pc);
-
-	if (u_cnd_monotonic_init(&device->timeline_cond)) {
-		result = VK_ERROR_INITIALIZATION_FAILED;
-		goto fail_mem_cache;
-	}
-
-	device->force_aniso =
-		MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
-	if (device->force_aniso >= 0) {
-		fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
-			1 << util_logbase2(device->force_aniso));
-	}
-
-	*pDevice = radv_device_to_handle(device);
-	return VK_SUCCESS;
+_radv_device_set_lost(struct radv_device *device, const char *file, int line, const char *msg, ...)
+{
+   VkResult err;
+   va_list ap;
+
+   p_atomic_inc(&device->lost);
+
+   va_start(ap, msg);
+   err =
+      __vk_errorv(device->physical_device->instance, device, VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,
+                  VK_ERROR_DEVICE_LOST, file, line, msg, ap);
+   va_end(ap);
+
+   return err;
+}
+
+VkResult
+radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
+                  const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
+{
+   RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+   VkResult result;
+   struct radv_device *device;
+
+   bool keep_shader_info = false;
+   bool robust_buffer_access = false;
+   bool robust_buffer_access2 = false;
+   bool overallocation_disallowed = false;
+   bool custom_border_colors = false;
+   bool vrs_enabled = false;
+
+   /* Check enabled features */
+   if (pCreateInfo->pEnabledFeatures) {
+      result = check_physical_device_features(physicalDevice, pCreateInfo->pEnabledFeatures);
+      if (result != VK_SUCCESS)
+         return result;
+
+      if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
+         robust_buffer_access = true;
+   }
+
+   vk_foreach_struct_const(ext, pCreateInfo->pNext)
+   {
+      switch (ext->sType) {
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
+         const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
+         result = check_physical_device_features(physicalDevice, &features->features);
+         if (result != VK_SUCCESS)
+            return result;
+
+         if (features->features.robustBufferAccess)
+            robust_buffer_access = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
+         const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
+         if (overallocation->overallocationBehavior ==
+             VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
+            overallocation_disallowed = true;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
+         const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features =
+            (const void *)ext;
+         custom_border_colors = border_color_features->customBorderColors;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
+         const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext;
+         vrs_enabled = vrs->pipelineFragmentShadingRate || vrs->primitiveFragmentShadingRate ||
+                       vrs->attachmentFragmentShadingRate;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
+         const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext;
+         if (features->robustBufferAccess2)
+            robust_buffer_access2 = true;
+         break;
+      }
+      default:
+         break;
+      }
+   }
+
+   device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
+                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!device)
+      return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   struct vk_device_dispatch_table dispatch_table;
+
+   if (radv_thread_trace_enabled()) {
+      vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true);
+      vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
+   } else {
+      vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, true);
+   }
+
+   result =
+      vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator);
+   if (result != VK_SUCCESS) {
+      vk_free(&device->vk.alloc, device);
+      return result;
+   }
+
+   device->instance = physical_device->instance;
+   device->physical_device = physical_device;
+
+   device->ws = physical_device->ws;
+
+   keep_shader_info = device->vk.enabled_extensions.AMD_shader_info;
+
+   /* With update after bind we can't attach bo's to the command buffer
+    * from the descriptor set anymore, so we have to use a global BO list.
+    */
+   device->use_global_bo_list = (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
+                                device->vk.enabled_extensions.EXT_descriptor_indexing ||
+                                device->vk.enabled_extensions.EXT_buffer_device_address ||
+                                device->vk.enabled_extensions.KHR_buffer_device_address;
+
+   device->robust_buffer_access = robust_buffer_access || robust_buffer_access2;
+   device->robust_buffer_access2 = robust_buffer_access2;
+
+   device->adjust_frag_coord_z =
+      (vrs_enabled || device->vk.enabled_extensions.KHR_fragment_shading_rate ||
+       device->force_vrs != RADV_FORCE_VRS_NONE) &&
+      (device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID ||
+       device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER ||
+       device->physical_device->rad_info.family == CHIP_VANGOGH);
+
+   mtx_init(&device->shader_slab_mutex, mtx_plain);
+   list_inithead(&device->shader_slabs);
+
+   device->overallocation_disallowed = overallocation_disallowed;
+   mtx_init(&device->overallocation_mutex, mtx_plain);
+
+   /* Create one context per queue priority. */
+   for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+      const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
+      const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
+         vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
+      enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
+
+      if (device->hw_ctx[priority])
+         continue;
+
+      result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+      const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
+      uint32_t qfi = queue_create->queueFamilyIndex;
+      const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
+         vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
+
+      device->queues[qfi] =
+         vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
+                  VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+      if (!device->queues[qfi]) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail;
+      }
+
+      memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
+
+      device->queue_count[qfi] = queue_create->queueCount;
+
+      for (unsigned q = 0; q < queue_create->queueCount; q++) {
+         result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, queue_create->flags,
+                                  global_priority);
+         if (result != VK_SUCCESS)
+            goto fail;
+      }
+   }
+
+   device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
+                         !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
+
+   /* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
+   device->dfsm_allowed =
+      device->pbb_allowed && (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
+
+   /* The maximum number of scratch waves. Scratch space isn't divided
+    * evenly between CUs. The number is only a function of the number of CUs.
+    * We can decrease the constant to decrease the scratch buffer size.
+    *
+    * sctx->scratch_waves must be >= the maximum possible size of
+    * 1 threadgroup, so that the hw doesn't hang from being unable
+    * to start any.
+    *
+    * The recommended value is 4 per CU at most. Higher numbers don't
+    * bring much benefit, but they still occupy chip resources (think
+    * async compute). I've seen ~2% performance difference between 4 and 32.
+    */
+   uint32_t max_threads_per_block = 2048;
+   device->scratch_waves =
+      MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64);
+
+   device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
+
+   if (device->physical_device->rad_info.chip_class >= GFX7) {
+      /* If the KMD allows it (there is a KMD hw register for it),
+       * allow launching waves out-of-order.
+       */
+      device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
+   }
+
+   radv_device_init_gs_info(device);
+
+   device->tess_offchip_block_dw_size =
+      device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
+
+   if (getenv("RADV_TRACE_FILE")) {
+      fprintf(
+         stderr,
+         "***********************************************************************************\n");
+      fprintf(
+         stderr,
+         "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
+      fprintf(
+         stderr,
+         "***********************************************************************************\n");
+      abort();
+   }
+
+   if (device->instance->debug_flags & RADV_DEBUG_HANG) {
+      /* Enable GPU hangs detection and dump logs if a GPU hang is
+       * detected.
+       */
+      keep_shader_info = true;
+
+      if (!radv_init_trace(device))
+         goto fail;
+
+      fprintf(stderr,
+              "*****************************************************************************\n");
+      fprintf(stderr,
+              "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
+      fprintf(stderr,
+              "*****************************************************************************\n");
+
+      /* Wait for idle after every draw/dispatch to identify the
+       * first bad call.
+       */
+      device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
+
+      radv_dump_enabled_options(device, stderr);
+   }
+
+   if (radv_thread_trace_enabled()) {
+      fprintf(stderr, "*************************************************\n");
+      fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
+      fprintf(stderr, "*************************************************\n");
+
+      if (device->physical_device->rad_info.chip_class < GFX8 ||
+          device->physical_device->rad_info.chip_class > GFX10_3) {
+         fprintf(stderr, "GPU hardware not supported: refer to "
+                         "the RGP documentation for the list of "
+                         "supported GPUs!\n");
+         abort();
+      }
+
+      if (!radv_thread_trace_init(device))
+         goto fail;
+   }
+
+   if (getenv("RADV_TRAP_HANDLER")) {
+      /* TODO: Add support for more hardware. */
+      assert(device->physical_device->rad_info.chip_class == GFX8);
+
+      fprintf(stderr, "**********************************************************************\n");
+      fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
+      fprintf(stderr, "**********************************************************************\n");
+
+      /* To get the disassembly of the faulty shaders, we have to
+       * keep some shader info around.
+       */
+      keep_shader_info = true;
+
+      if (!radv_trap_handler_init(device))
+         goto fail;
+   }
+
+   if (getenv("RADV_FORCE_VRS")) {
+      const char *vrs_rates = getenv("RADV_FORCE_VRS");
+
+      if (device->physical_device->rad_info.chip_class < GFX10_3)
+         fprintf(stderr, "radv: VRS is only supported on RDNA2+\n");
+      else if (device->physical_device->use_llvm)
+         fprintf(stderr, "radv: Forcing VRS rates is only supported with ACO\n");
+      else if (!strcmp(vrs_rates, "2x2"))
+         device->force_vrs = RADV_FORCE_VRS_2x2;
+      else if (!strcmp(vrs_rates, "2x1"))
+         device->force_vrs = RADV_FORCE_VRS_2x1;
+      else if (!strcmp(vrs_rates, "1x2"))
+         device->force_vrs = RADV_FORCE_VRS_1x2;
+      else
+         fprintf(stderr, "radv: Invalid VRS rates specified "
+                         "(valid values are 2x2, 2x1 and 1x2)\n");
+   }
+
+   device->keep_shader_info = keep_shader_info;
+   result = radv_device_init_meta(device);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   radv_device_init_msaa(device);
+
+   /* If the border color extension is enabled, let's create the buffer we need. */
+   if (custom_border_colors) {
+      result = radv_device_init_border_color(device);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
+      device->empty_cs[family] = device->ws->cs_create(device->ws, family);
+      if (!device->empty_cs[family])
+         goto fail;
+
+      switch (family) {
+      case RADV_QUEUE_GENERAL:
+         radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+         radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
+         radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
+         break;
+      case RADV_QUEUE_COMPUTE:
+         radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
+         radeon_emit(device->empty_cs[family], 0);
+         break;
+      }
+
+      result = device->ws->cs_finalize(device->empty_cs[family]);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   if (device->physical_device->rad_info.chip_class >= GFX7)
+      cik_create_gfx_config(device);
+
+   VkPipelineCacheCreateInfo ci;
+   ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
+   ci.pNext = NULL;
+   ci.flags = 0;
+   ci.pInitialData = NULL;
+   ci.initialDataSize = 0;
+   VkPipelineCache pc;
+   result = radv_CreatePipelineCache(radv_device_to_handle(device), &ci, NULL, &pc);
+   if (result != VK_SUCCESS)
+      goto fail_meta;
+
+   device->mem_cache = radv_pipeline_cache_from_handle(pc);
+
+   if (u_cnd_monotonic_init(&device->timeline_cond)) {
+      result = VK_ERROR_INITIALIZATION_FAILED;
+      goto fail_mem_cache;
+   }
+
+   device->force_aniso = MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
+   if (device->force_aniso >= 0) {
+      fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
+              1 << util_logbase2(device->force_aniso));
+   }
+
+   *pDevice = radv_device_to_handle(device);
+   return VK_SUCCESS;
 
 fail_mem_cache:
-	radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
+   radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
 fail_meta:
-	radv_device_finish_meta(device);
+   radv_device_finish_meta(device);
 fail:
-	radv_thread_trace_finish(device);
-	free(device->thread_trace.trigger_file);
+   radv_thread_trace_finish(device);
+   free(device->thread_trace.trigger_file);
 
-	radv_trap_handler_finish(device);
-	radv_finish_trace(device);
+   radv_trap_handler_finish(device);
+   radv_finish_trace(device);
 
-	if (device->gfx_init)
-		device->ws->buffer_destroy(device->ws, device->gfx_init);
+   if (device->gfx_init)
+      device->ws->buffer_destroy(device->ws, device->gfx_init);
 
-	radv_device_finish_border_color(device);
+   radv_device_finish_border_color(device);
 
-	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
-		for (unsigned q = 0; q < device->queue_count[i]; q++)
-			radv_queue_finish(&device->queues[i][q]);
-		if (device->queue_count[i])
-			vk_free(&device->vk.alloc, device->queues[i]);
-	}
+   for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
+      for (unsigned q = 0; q < device->queue_count[i]; q++)
+         radv_queue_finish(&device->queues[i][q]);
+      if (device->queue_count[i])
+         vk_free(&device->vk.alloc, device->queues[i]);
+   }
 
-	for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
-		if (device->hw_ctx[i])
-			device->ws->ctx_destroy(device->hw_ctx[i]);
-	}
+   for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
+      if (device->hw_ctx[i])
+         device->ws->ctx_destroy(device->hw_ctx[i]);
+   }
 
-	vk_device_finish(&device->vk);
-	vk_free(&device->vk.alloc, device);
-	return result;
+   vk_device_finish(&device->vk);
+   vk_free(&device->vk.alloc, device);
+   return result;
 }
 
-void radv_DestroyDevice(
-	VkDevice                                    _device,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device, device, _device);
 
-	if (!device)
-		return;
+   if (!device)
+      return;
 
-	if (device->gfx_init)
-		device->ws->buffer_destroy(device->ws, device->gfx_init);
+   if (device->gfx_init)
+      device->ws->buffer_destroy(device->ws, device->gfx_init);
 
-	radv_device_finish_border_color(device);
+   radv_device_finish_border_color(device);
 
-	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
-		for (unsigned q = 0; q < device->queue_count[i]; q++)
-			radv_queue_finish(&device->queues[i][q]);
-		if (device->queue_count[i])
-			vk_free(&device->vk.alloc, device->queues[i]);
-		if (device->empty_cs[i])
-			device->ws->cs_destroy(device->empty_cs[i]);
-	}
+   for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
+      for (unsigned q = 0; q < device->queue_count[i]; q++)
+         radv_queue_finish(&device->queues[i][q]);
+      if (device->queue_count[i])
+         vk_free(&device->vk.alloc, device->queues[i]);
+      if (device->empty_cs[i])
+         device->ws->cs_destroy(device->empty_cs[i]);
+   }
 
-	for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
-		if (device->hw_ctx[i])
-			device->ws->ctx_destroy(device->hw_ctx[i]);
-	}
+   for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
+      if (device->hw_ctx[i])
+         device->ws->ctx_destroy(device->hw_ctx[i]);
+   }
 
-	radv_device_finish_meta(device);
+   radv_device_finish_meta(device);
 
-	VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
-	radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
+   VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
+   radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
 
-	radv_trap_handler_finish(device);
-	radv_finish_trace(device);
+   radv_trap_handler_finish(device);
+   radv_finish_trace(device);
 
-	radv_destroy_shader_slabs(device);
+   radv_destroy_shader_slabs(device);
 
-	u_cnd_monotonic_destroy(&device->timeline_cond);
+   u_cnd_monotonic_destroy(&device->timeline_cond);
 
-	free(device->thread_trace.trigger_file);
-	radv_thread_trace_finish(device);
+   free(device->thread_trace.trigger_file);
+   radv_thread_trace_finish(device);
 
-	vk_device_finish(&device->vk);
-	vk_free(&device->vk.alloc, device);
+   vk_device_finish(&device->vk);
+   vk_free(&device->vk.alloc, device);
 }
 
-VkResult radv_EnumerateInstanceLayerProperties(
-	uint32_t*                                   pPropertyCount,
-	VkLayerProperties*                          pProperties)
+VkResult
+radv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties)
 {
-	if (pProperties == NULL) {
-		*pPropertyCount = 0;
-		return VK_SUCCESS;
-	}
+   if (pProperties == NULL) {
+      *pPropertyCount = 0;
+      return VK_SUCCESS;
+   }
 
-	/* None supported at this time */
-	return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+   /* None supported at this time */
+   return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
 }
 
-VkResult radv_EnumerateDeviceLayerProperties(
-	VkPhysicalDevice                            physicalDevice,
-	uint32_t*                                   pPropertyCount,
-	VkLayerProperties*                          pProperties)
+VkResult
+radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
+                                    VkLayerProperties *pProperties)
 {
-	if (pProperties == NULL) {
-		*pPropertyCount = 0;
-		return VK_SUCCESS;
-	}
+   if (pProperties == NULL) {
+      *pPropertyCount = 0;
+      return VK_SUCCESS;
+   }
 
-	/* None supported at this time */
-	return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+   /* None supported at this time */
+   return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
 }
 
-void radv_GetDeviceQueue2(
-	VkDevice                                    _device,
-	const VkDeviceQueueInfo2*                   pQueueInfo,
-	VkQueue*                                    pQueue)
+void
+radv_GetDeviceQueue2(VkDevice _device, const VkDeviceQueueInfo2 *pQueueInfo, VkQueue *pQueue)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_queue *queue;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_queue *queue;
 
-	queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
-	if (pQueueInfo->flags != queue->flags) {
-		/* From the Vulkan 1.1.70 spec:
-		 *
-		 * "The queue returned by vkGetDeviceQueue2 must have the same
-		 * flags value from this structure as that used at device
-		 * creation time in a VkDeviceQueueCreateInfo instance. If no
-		 * matching flags were specified at device creation time then
-		 * pQueue will return VK_NULL_HANDLE."
-		 */
-		*pQueue = VK_NULL_HANDLE;
-		return;
-	}
+   queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
+   if (pQueueInfo->flags != queue->flags) {
+      /* From the Vulkan 1.1.70 spec:
+       *
+       * "The queue returned by vkGetDeviceQueue2 must have the same
+       * flags value from this structure as that used at device
+       * creation time in a VkDeviceQueueCreateInfo instance. If no
+       * matching flags were specified at device creation time then
+       * pQueue will return VK_NULL_HANDLE."
+       */
+      *pQueue = VK_NULL_HANDLE;
+      return;
+   }
 
-	*pQueue = radv_queue_to_handle(queue);
+   *pQueue = radv_queue_to_handle(queue);
 }
 
-void radv_GetDeviceQueue(
-	VkDevice                                    _device,
-	uint32_t                                    queueFamilyIndex,
-	uint32_t                                    queueIndex,
-	VkQueue*                                    pQueue)
+void
+radv_GetDeviceQueue(VkDevice _device, uint32_t queueFamilyIndex, uint32_t queueIndex,
+                    VkQueue *pQueue)
 {
-	const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
-		.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
-		.queueFamilyIndex = queueFamilyIndex,
-		.queueIndex = queueIndex
-	};
+   const VkDeviceQueueInfo2 info =
+      (VkDeviceQueueInfo2){.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
+                           .queueFamilyIndex = queueFamilyIndex,
+                           .queueIndex = queueIndex};
 
-	radv_GetDeviceQueue2(_device, &info, pQueue);
+   radv_GetDeviceQueue2(_device, &info, pQueue);
 }
 
 static void
-fill_geom_tess_rings(struct radv_queue *queue,
-		     uint32_t *map,
-		     bool add_sample_positions,
-		     uint32_t esgs_ring_size,
-		     struct radeon_winsys_bo *esgs_ring_bo,
-		     uint32_t gsvs_ring_size,
-		     struct radeon_winsys_bo *gsvs_ring_bo,
-		     uint32_t tess_factor_ring_size,
-		     uint32_t tess_offchip_ring_offset,
-		     uint32_t tess_offchip_ring_size,
-		     struct radeon_winsys_bo *tess_rings_bo)
-{
-	uint32_t *desc = &map[4];
-
-	if (esgs_ring_bo) {
-		uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
-
-		/* stride 0, num records - size, add tid, swizzle, elsize4,
-		   index stride 64 */
-		desc[0] = esgs_va;
-		desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
-			  S_008F04_SWIZZLE_ENABLE(true);
-		desc[2] = esgs_ring_size;
-		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-			  S_008F0C_INDEX_STRIDE(3) |
-			  S_008F0C_ADD_TID_ENABLE(1);
-
-		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
-			desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
-				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
-				   S_008F0C_RESOURCE_LEVEL(1);
-		} else {
-			desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-				   S_008F0C_ELEMENT_SIZE(1);
-		}
-
-		/* GS entry for ES->GS ring */
-		/* stride 0, num records - size, elsize0,
-		   index stride 0 */
-		desc[4] = esgs_va;
-		desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
-		desc[6] = esgs_ring_size;
-		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
-		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
-			desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
-				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
-				   S_008F0C_RESOURCE_LEVEL(1);
-		} else {
-			desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-		}
-	}
-
-	desc += 8;
-
-	if (gsvs_ring_bo) {
-		uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
-
-		/* VS entry for GS->VS ring */
-		/* stride 0, num records - size, elsize0,
-		   index stride 0 */
-		desc[0] = gsvs_va;
-		desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
-		desc[2] = gsvs_ring_size;
-		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
-		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
-			desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
-				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
-				   S_008F0C_RESOURCE_LEVEL(1);
-		} else {
-			desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-		}
-
-		/* stride gsvs_itemsize, num records 64
-		   elsize 4, index stride 16 */
-		/* shader will patch stride and desc[2] */
-		desc[4] = gsvs_va;
-		desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
-			  S_008F04_SWIZZLE_ENABLE(1);
-		desc[6] = 0;
-		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-			  S_008F0C_INDEX_STRIDE(1) |
-			  S_008F0C_ADD_TID_ENABLE(true);
-
-		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
-			desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
-				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
-				   S_008F0C_RESOURCE_LEVEL(1);
-		} else {
-			desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-				   S_008F0C_ELEMENT_SIZE(1);
-		}
-
-	}
-
-	desc += 8;
-
-	if (tess_rings_bo) {
-		uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
-		uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
-
-		desc[0] = tess_va;
-		desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
-		desc[2] = tess_factor_ring_size;
-		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
-		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
-			desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
-				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
-				   S_008F0C_RESOURCE_LEVEL(1);
-		} else {
-			desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-		}
-
-		desc[4] = tess_offchip_va;
-		desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
-		desc[6] = tess_offchip_ring_size;
-		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
-		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
-			desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
-				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
-				   S_008F0C_RESOURCE_LEVEL(1);
-		} else {
-			desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-		}
-	}
-
-	desc += 8;
-
-	if (add_sample_positions) {
-		/* add sample positions after all rings */
-		memcpy(desc, queue->device->sample_locations_1x, 8);
-		desc += 2;
-		memcpy(desc, queue->device->sample_locations_2x, 16);
-		desc += 4;
-		memcpy(desc, queue->device->sample_locations_4x, 32);
-		desc += 8;
-		memcpy(desc, queue->device->sample_locations_8x, 64);
-	}
+fill_geom_tess_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_positions,
+                     uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo,
+                     uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo,
+                     uint32_t tess_factor_ring_size, uint32_t tess_offchip_ring_offset,
+                     uint32_t tess_offchip_ring_size, struct radeon_winsys_bo *tess_rings_bo)
+{
+   uint32_t *desc = &map[4];
+
+   if (esgs_ring_bo) {
+      uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
+
+      /* stride 0, num records - size, add tid, swizzle, elsize4,
+         index stride 64 */
+      desc[0] = esgs_va;
+      desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | S_008F04_SWIZZLE_ENABLE(true);
+      desc[2] = esgs_ring_size;
+      desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
+
+      if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+         desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+      } else {
+         desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                    S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
+      }
+
+      /* GS entry for ES->GS ring */
+      /* stride 0, num records - size, elsize0,
+         index stride 0 */
+      desc[4] = esgs_va;
+      desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
+      desc[6] = esgs_ring_size;
+      desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+      if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+         desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+      } else {
+         desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                    S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+      }
+   }
+
+   desc += 8;
+
+   if (gsvs_ring_bo) {
+      uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
+
+      /* VS entry for GS->VS ring */
+      /* stride 0, num records - size, elsize0,
+         index stride 0 */
+      desc[0] = gsvs_va;
+      desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
+      desc[2] = gsvs_ring_size;
+      desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+      if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+         desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+      } else {
+         desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                    S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+      }
+
+      /* stride gsvs_itemsize, num records 64
+         elsize 4, index stride 16 */
+      /* shader will patch stride and desc[2] */
+      desc[4] = gsvs_va;
+      desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);
+      desc[6] = 0;
+      desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
+
+      if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+         desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+      } else {
+         desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                    S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
+      }
+   }
+
+   desc += 8;
+
+   if (tess_rings_bo) {
+      uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
+      uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
+
+      desc[0] = tess_va;
+      desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
+      desc[2] = tess_factor_ring_size;
+      desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+      if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+         desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+      } else {
+         desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                    S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+      }
+
+      desc[4] = tess_offchip_va;
+      desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
+      desc[6] = tess_offchip_ring_size;
+      desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+      if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+         desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+      } else {
+         desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                    S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+      }
+   }
+
+   desc += 8;
+
+   if (add_sample_positions) {
+      /* add sample positions after all rings */
+      memcpy(desc, queue->device->sample_locations_1x, 8);
+      desc += 2;
+      memcpy(desc, queue->device->sample_locations_2x, 16);
+      desc += 4;
+      memcpy(desc, queue->device->sample_locations_4x, 32);
+      desc += 8;
+      memcpy(desc, queue->device->sample_locations_8x, 64);
+   }
 }
 
 static unsigned
 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
 {
-	bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
-		device->physical_device->rad_info.family != CHIP_CARRIZO &&
-		device->physical_device->rad_info.family != CHIP_STONEY;
-	unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
-	unsigned max_offchip_buffers;
-	unsigned offchip_granularity;
-	unsigned hs_offchip_param;
-
-	/*
-	 * Per RadeonSI:
-	 * This must be one less than the maximum number due to a hw limitation.
-         * Various hardware bugs need thGFX7
-	 *
-	 * Per AMDVLK:
-	 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
-	 * Gfx7 should limit max_offchip_buffers to 508
-	 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
-	 *
-	 * Follow AMDVLK here.
-	 */
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		max_offchip_buffers_per_se = 128;
-	} else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
-		   device->physical_device->rad_info.chip_class == GFX7 ||
-		   device->physical_device->rad_info.chip_class == GFX6)
-		--max_offchip_buffers_per_se;
-
-	max_offchip_buffers = max_offchip_buffers_per_se *
-		device->physical_device->rad_info.max_se;
-
-	/* Hawaii has a bug with offchip buffers > 256 that can be worked
-	 * around by setting 4K granularity.
-	 */
-	if (device->tess_offchip_block_dw_size == 4096) {
-		assert(device->physical_device->rad_info.family == CHIP_HAWAII);
-		offchip_granularity = V_03093C_X_4K_DWORDS;
-	} else {
-		assert(device->tess_offchip_block_dw_size == 8192);
-		offchip_granularity = V_03093C_X_8K_DWORDS;
-	}
-
-	switch (device->physical_device->rad_info.chip_class) {
-	case GFX6:
-		max_offchip_buffers = MIN2(max_offchip_buffers, 126);
-		break;
-	case GFX7:
-	case GFX8:
-	case GFX9:
-		max_offchip_buffers = MIN2(max_offchip_buffers, 508);
-		break;
-	case GFX10:
-		break;
-	default:
-		break;
-	}
-
-	*max_offchip_buffers_p = max_offchip_buffers;
-	if (device->physical_device->rad_info.chip_class >= GFX10_3) {
-		hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
-				   S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
-	} else if (device->physical_device->rad_info.chip_class >= GFX7) {
-		if (device->physical_device->rad_info.chip_class >= GFX8)
-			--max_offchip_buffers;
-		hs_offchip_param =
-			S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
-			S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
-	} else {
-		hs_offchip_param =
-			S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
-	}
-	return hs_offchip_param;
+   bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
+                                 device->physical_device->rad_info.family != CHIP_CARRIZO &&
+                                 device->physical_device->rad_info.family != CHIP_STONEY;
+   unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
+   unsigned max_offchip_buffers;
+   unsigned offchip_granularity;
+   unsigned hs_offchip_param;
+
+   /*
+    * Per RadeonSI:
+    * This must be one less than the maximum number due to a hw limitation.
+    * Various hardware bugs need thGFX7
+    *
+    * Per AMDVLK:
+    * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
+    * Gfx7 should limit max_offchip_buffers to 508
+    * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
+    *
+    * Follow AMDVLK here.
+    */
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      max_offchip_buffers_per_se = 128;
+   } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
+              device->physical_device->rad_info.chip_class == GFX7 ||
+              device->physical_device->rad_info.chip_class == GFX6)
+      --max_offchip_buffers_per_se;
+
+   max_offchip_buffers = max_offchip_buffers_per_se * device->physical_device->rad_info.max_se;
+
+   /* Hawaii has a bug with offchip buffers > 256 that can be worked
+    * around by setting 4K granularity.
+    */
+   if (device->tess_offchip_block_dw_size == 4096) {
+      assert(device->physical_device->rad_info.family == CHIP_HAWAII);
+      offchip_granularity = V_03093C_X_4K_DWORDS;
+   } else {
+      assert(device->tess_offchip_block_dw_size == 8192);
+      offchip_granularity = V_03093C_X_8K_DWORDS;
+   }
+
+   switch (device->physical_device->rad_info.chip_class) {
+   case GFX6:
+      max_offchip_buffers = MIN2(max_offchip_buffers, 126);
+      break;
+   case GFX7:
+   case GFX8:
+   case GFX9:
+      max_offchip_buffers = MIN2(max_offchip_buffers, 508);
+      break;
+   case GFX10:
+      break;
+   default:
+      break;
+   }
+
+   *max_offchip_buffers_p = max_offchip_buffers;
+   if (device->physical_device->rad_info.chip_class >= GFX10_3) {
+      hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
+                         S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
+   } else if (device->physical_device->rad_info.chip_class >= GFX7) {
+      if (device->physical_device->rad_info.chip_class >= GFX8)
+         --max_offchip_buffers;
+      hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
+                         S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
+   } else {
+      hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
+   }
+   return hs_offchip_param;
 }
 
 static void
 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
-			struct radeon_winsys_bo *esgs_ring_bo,
-			uint32_t esgs_ring_size,
-			struct radeon_winsys_bo *gsvs_ring_bo,
-			uint32_t gsvs_ring_size)
+                        struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size,
+                        struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
 {
-	if (!esgs_ring_bo && !gsvs_ring_bo)
-		return;
+   if (!esgs_ring_bo && !gsvs_ring_bo)
+      return;
 
-	if (esgs_ring_bo)
-		radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
+   if (esgs_ring_bo)
+      radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
 
-	if (gsvs_ring_bo)
-		radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
+   if (gsvs_ring_bo)
+      radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
 
-	if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
-		radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
-		radeon_emit(cs, esgs_ring_size >> 8);
-		radeon_emit(cs, gsvs_ring_size >> 8);
-	} else {
-		radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
-		radeon_emit(cs, esgs_ring_size >> 8);
-		radeon_emit(cs, gsvs_ring_size >> 8);
-	}
+   if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
+      radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
+      radeon_emit(cs, esgs_ring_size >> 8);
+      radeon_emit(cs, gsvs_ring_size >> 8);
+   } else {
+      radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
+      radeon_emit(cs, esgs_ring_size >> 8);
+      radeon_emit(cs, gsvs_ring_size >> 8);
+   }
 }
 
 static void
 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
-			   unsigned hs_offchip_param, unsigned tf_ring_size,
-			   struct radeon_winsys_bo *tess_rings_bo)
-{
-	uint64_t tf_va;
-
-	if (!tess_rings_bo)
-		return;
-
-	tf_va = radv_buffer_get_va(tess_rings_bo);
-
-	radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
-
-	if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
-		radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
-				       S_030938_SIZE(tf_ring_size / 4));
-		radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
-				       tf_va >> 8);
-
-		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
-			radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
-					       S_030984_BASE_HI(tf_va >> 40));
-		} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
-			radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
-					       S_030944_BASE_HI(tf_va >> 40));
-		}
-		radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
-				       hs_offchip_param);
-	} else {
-		radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
-				      S_008988_SIZE(tf_ring_size / 4));
-		radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
-				      tf_va >> 8);
-		radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
-				     hs_offchip_param);
-	}
+                           unsigned hs_offchip_param, unsigned tf_ring_size,
+                           struct radeon_winsys_bo *tess_rings_bo)
+{
+   uint64_t tf_va;
+
+   if (!tess_rings_bo)
+      return;
+
+   tf_va = radv_buffer_get_va(tess_rings_bo);
+
+   radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
+
+   if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
+      radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size / 4));
+      radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
+
+      if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+         radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
+                                S_030984_BASE_HI(tf_va >> 40));
+      } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
+         radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
+      }
+      radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
+   } else {
+      radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size / 4));
+      radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
+      radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
+   }
 }
 
 static void
@@ -3562,17 +3433,17 @@ radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
                            uint32_t size_per_wave, uint32_t waves,
                            struct radeon_winsys_bo *scratch_bo)
 {
-	if (queue->queue_family_index != RADV_QUEUE_GENERAL)
-		return;
+   if (queue->queue_family_index != RADV_QUEUE_GENERAL)
+      return;
 
-	if (!scratch_bo)
-		return;
+   if (!scratch_bo)
+      return;
 
-	radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
+   radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
 
-	radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
-	                       S_0286E8_WAVES(waves) |
-	                       S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
+   radeon_set_context_reg(
+      cs, R_0286E8_SPI_TMPRING_SIZE,
+      S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
 }
 
 static void
@@ -3580,3163 +3451,2965 @@ radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
                           uint32_t size_per_wave, uint32_t waves,
                           struct radeon_winsys_bo *compute_scratch_bo)
 {
-	uint64_t scratch_va;
+   uint64_t scratch_va;
 
-	if (!compute_scratch_bo)
-		return;
+   if (!compute_scratch_bo)
+      return;
 
-	scratch_va = radv_buffer_get_va(compute_scratch_bo);
+   scratch_va = radv_buffer_get_va(compute_scratch_bo);
 
-	radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
+   radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
 
-	radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
-	radeon_emit(cs, scratch_va);
-	radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
-			S_008F04_SWIZZLE_ENABLE(1));
+   radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
+   radeon_emit(cs, scratch_va);
+   radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1));
 
-	radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
-	                 S_00B860_WAVES(waves) |
-	                 S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
+   radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
+                     S_00B860_WAVES(waves) | S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
 }
 
 static void
-radv_emit_global_shader_pointers(struct radv_queue *queue,
-				 struct radeon_cmdbuf *cs,
-				 struct radeon_winsys_bo *descriptor_bo)
-{
-	uint64_t va;
-
-	if (!descriptor_bo)
-		return;
-
-	va = radv_buffer_get_va(descriptor_bo);
-
-	radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
-
-	if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
-		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
-				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
-				   R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
-				   R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
-
-		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
-			radv_emit_shader_pointer(queue->device, cs, regs[i],
-						 va, true);
-		}
-	} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
-		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
-				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
-				   R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
-				   R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
-
-		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
-			radv_emit_shader_pointer(queue->device, cs, regs[i],
-						 va, true);
-		}
-	} else {
-		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
-				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
-				   R_00B230_SPI_SHADER_USER_DATA_GS_0,
-				   R_00B330_SPI_SHADER_USER_DATA_ES_0,
-				   R_00B430_SPI_SHADER_USER_DATA_HS_0,
-				   R_00B530_SPI_SHADER_USER_DATA_LS_0};
-
-		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
-			radv_emit_shader_pointer(queue->device, cs, regs[i],
-						 va, true);
-		}
-	}
+radv_emit_global_shader_pointers(struct radv_queue *queue, struct radeon_cmdbuf *cs,
+                                 struct radeon_winsys_bo *descriptor_bo)
+{
+   uint64_t va;
+
+   if (!descriptor_bo)
+      return;
+
+   va = radv_buffer_get_va(descriptor_bo);
+
+   radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
+
+   if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+      uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
+                         R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
+                         R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
+
+      for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+         radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
+      }
+   } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
+      uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
+                         R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
+                         R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
+
+      for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+         radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
+      }
+   } else {
+      uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
+                         R_00B230_SPI_SHADER_USER_DATA_GS_0, R_00B330_SPI_SHADER_USER_DATA_ES_0,
+                         R_00B430_SPI_SHADER_USER_DATA_HS_0, R_00B530_SPI_SHADER_USER_DATA_LS_0};
+
+      for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+         radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
+      }
+   }
 }
 
 static void
 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
 {
-	struct radv_device *device = queue->device;
+   struct radv_device *device = queue->device;
 
-	if (device->gfx_init) {
-		uint64_t va = radv_buffer_get_va(device->gfx_init);
+   if (device->gfx_init) {
+      uint64_t va = radv_buffer_get_va(device->gfx_init);
 
-		radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
+      radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
 
-		radv_cs_add_buffer(device->ws, cs, device->gfx_init);
-	} else {
-		si_emit_graphics(device, cs);
-	}
+      radv_cs_add_buffer(device->ws, cs, device->gfx_init);
+   } else {
+      si_emit_graphics(device, cs);
+   }
 }
 
 static void
 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
 {
-	si_emit_compute(queue->device, cs);
+   si_emit_compute(queue->device, cs);
 }
 
 static VkResult
-radv_get_preamble_cs(struct radv_queue *queue,
-		     uint32_t scratch_size_per_wave,
-		     uint32_t scratch_waves,
-		     uint32_t compute_scratch_size_per_wave,
-		     uint32_t compute_scratch_waves,
-		     uint32_t esgs_ring_size,
-		     uint32_t gsvs_ring_size,
-		     bool needs_tess_rings,
-		     bool needs_gds,
-		     bool needs_gds_oa,
-		     bool needs_sample_positions,
-		     struct radeon_cmdbuf **initial_full_flush_preamble_cs,
+radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
+                     uint32_t scratch_waves, uint32_t compute_scratch_size_per_wave,
+                     uint32_t compute_scratch_waves, uint32_t esgs_ring_size,
+                     uint32_t gsvs_ring_size, bool needs_tess_rings, bool needs_gds,
+                     bool needs_gds_oa, bool needs_sample_positions,
+                     struct radeon_cmdbuf **initial_full_flush_preamble_cs,
                      struct radeon_cmdbuf **initial_preamble_cs,
                      struct radeon_cmdbuf **continue_preamble_cs)
 {
-	struct radeon_winsys_bo *scratch_bo = NULL;
-	struct radeon_winsys_bo *descriptor_bo = NULL;
-	struct radeon_winsys_bo *compute_scratch_bo = NULL;
-	struct radeon_winsys_bo *esgs_ring_bo = NULL;
-	struct radeon_winsys_bo *gsvs_ring_bo = NULL;
-	struct radeon_winsys_bo *tess_rings_bo = NULL;
-	struct radeon_winsys_bo *gds_bo = NULL;
-	struct radeon_winsys_bo *gds_oa_bo = NULL;
-	struct radeon_cmdbuf *dest_cs[3] = {0};
-	bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
-	unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
-	unsigned max_offchip_buffers;
-	unsigned hs_offchip_param = 0;
-	unsigned tess_offchip_ring_offset;
-	uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
-	if (!queue->has_tess_rings) {
-		if (needs_tess_rings)
-			add_tess_rings = true;
-	}
-	if (!queue->has_gds) {
-		if (needs_gds)
-			add_gds = true;
-	}
-	if (!queue->has_gds_oa) {
-		if (needs_gds_oa)
-			add_gds_oa = true;
-	}
-	if (!queue->has_sample_positions) {
-		if (needs_sample_positions)
-			add_sample_positions = true;
-	}
-	tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
-	hs_offchip_param = radv_get_hs_offchip_param(queue->device,
-						     &max_offchip_buffers);
-	tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
-	tess_offchip_ring_size = max_offchip_buffers *
-		queue->device->tess_offchip_block_dw_size * 4;
-
-	scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
-	if (scratch_size_per_wave)
-		scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
-	else
-		scratch_waves = 0;
-
-	compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
-	if (compute_scratch_size_per_wave)
-		compute_scratch_waves = MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
-	else
-		compute_scratch_waves = 0;
-
-	if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
-	    scratch_waves <= queue->scratch_waves &&
-	    compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
-	    compute_scratch_waves <= queue->compute_scratch_waves &&
-	    esgs_ring_size <= queue->esgs_ring_size &&
-	    gsvs_ring_size <= queue->gsvs_ring_size &&
-	    !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
-	    queue->initial_preamble_cs) {
-		*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
-		*initial_preamble_cs = queue->initial_preamble_cs;
-		*continue_preamble_cs = queue->continue_preamble_cs;
-		if (!scratch_size_per_wave && !compute_scratch_size_per_wave &&
-		    !esgs_ring_size && !gsvs_ring_size && !needs_tess_rings &&
-		    !needs_gds && !needs_gds_oa && !needs_sample_positions)
-			*continue_preamble_cs = NULL;
-		return VK_SUCCESS;
-	}
-
-	uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
-	uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
-	if (scratch_size > queue_scratch_size) {
-		scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
-		                                              scratch_size,
-		                                              4096,
-		                                              RADEON_DOMAIN_VRAM,
-		                                              ring_bo_flags,
-		                                              RADV_BO_PRIORITY_SCRATCH);
-		if (!scratch_bo)
-			goto fail;
-	} else
-		scratch_bo = queue->scratch_bo;
-
-	uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
-	uint32_t compute_queue_scratch_size = queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
-	if (compute_scratch_size > compute_queue_scratch_size) {
-		compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
-		                                                      compute_scratch_size,
-		                                                      4096,
-		                                                      RADEON_DOMAIN_VRAM,
-		                                                      ring_bo_flags,
-		                                                      RADV_BO_PRIORITY_SCRATCH);
-		if (!compute_scratch_bo)
-			goto fail;
-
-	} else
-		compute_scratch_bo = queue->compute_scratch_bo;
-
-	if (esgs_ring_size > queue->esgs_ring_size) {
-		esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
-								esgs_ring_size,
-								4096,
-								RADEON_DOMAIN_VRAM,
-								ring_bo_flags,
-								RADV_BO_PRIORITY_SCRATCH);
-		if (!esgs_ring_bo)
-			goto fail;
-	} else {
-		esgs_ring_bo = queue->esgs_ring_bo;
-		esgs_ring_size = queue->esgs_ring_size;
-	}
-
-	if (gsvs_ring_size > queue->gsvs_ring_size) {
-		gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
-								gsvs_ring_size,
-								4096,
-								RADEON_DOMAIN_VRAM,
-								ring_bo_flags,
-								RADV_BO_PRIORITY_SCRATCH);
-		if (!gsvs_ring_bo)
-			goto fail;
-	} else {
-		gsvs_ring_bo = queue->gsvs_ring_bo;
-		gsvs_ring_size = queue->gsvs_ring_size;
-	}
-
-	if (add_tess_rings) {
-		tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
-								 tess_offchip_ring_offset + tess_offchip_ring_size,
-								 256,
-								 RADEON_DOMAIN_VRAM,
-								 ring_bo_flags,
-								 RADV_BO_PRIORITY_SCRATCH);
-		if (!tess_rings_bo)
-			goto fail;
-	} else {
-		tess_rings_bo = queue->tess_rings_bo;
-	}
-
-	if (add_gds) {
-		assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
-
-		/* 4 streamout GDS counters.
-		 * We need 256B (64 dw) of GDS, otherwise streamout hangs.
-		 */
-		gds_bo = queue->device->ws->buffer_create(queue->device->ws,
-							  256, 4,
-							  RADEON_DOMAIN_GDS,
-							  ring_bo_flags,
-							  RADV_BO_PRIORITY_SCRATCH);
-		if (!gds_bo)
-			goto fail;
-	} else {
-		gds_bo = queue->gds_bo;
-	}
-
-	if (add_gds_oa) {
-		assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
-
-		gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws,
-							     4, 1,
-							     RADEON_DOMAIN_OA,
-							     ring_bo_flags,
-							     RADV_BO_PRIORITY_SCRATCH);
-		if (!gds_oa_bo)
-			goto fail;
-	} else {
-		gds_oa_bo = queue->gds_oa_bo;
-	}
-
-	if (scratch_bo != queue->scratch_bo ||
-	    esgs_ring_bo != queue->esgs_ring_bo ||
-	    gsvs_ring_bo != queue->gsvs_ring_bo ||
-	    tess_rings_bo != queue->tess_rings_bo ||
-	    add_sample_positions) {
-		uint32_t size = 0;
-		if (gsvs_ring_bo || esgs_ring_bo ||
-		    tess_rings_bo || add_sample_positions) {
-			size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
-			if (add_sample_positions)
-				size += 128; /* 64+32+16+8 = 120 bytes */
-		}
-		else if (scratch_bo)
-			size = 8; /* 2 dword */
-
-		descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
-		                                                 size,
-		                                                 4096,
-		                                                 RADEON_DOMAIN_VRAM,
-		                                                 RADEON_FLAG_CPU_ACCESS |
-								 RADEON_FLAG_NO_INTERPROCESS_SHARING |
-								 RADEON_FLAG_READ_ONLY,
-								 RADV_BO_PRIORITY_DESCRIPTOR);
-		if (!descriptor_bo)
-			goto fail;
-	} else
-		descriptor_bo = queue->descriptor_bo;
-
-	if (descriptor_bo != queue->descriptor_bo) {
-		uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
-		if (!map)
-			goto fail;
-
-		if (scratch_bo) {
-			uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
-			uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
-				         S_008F04_SWIZZLE_ENABLE(1);
-			map[0] = scratch_va;
-			map[1] = rsrc1;
-		}
-
-		if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
-			fill_geom_tess_rings(queue, map, add_sample_positions,
-					     esgs_ring_size, esgs_ring_bo,
-					     gsvs_ring_size, gsvs_ring_bo,
-					     tess_factor_ring_size,
-					     tess_offchip_ring_offset,
-					     tess_offchip_ring_size,
-					     tess_rings_bo);
-
-		queue->device->ws->buffer_unmap(descriptor_bo);
-	}
-
-	for(int i = 0; i < 3; ++i) {
-		enum rgp_flush_bits sqtt_flush_bits = 0;
-		struct radeon_cmdbuf *cs = NULL;
-		cs = queue->device->ws->cs_create(queue->device->ws,
-						  queue->queue_family_index ? RING_COMPUTE : RING_GFX);
-		if (!cs)
-			goto fail;
-
-		dest_cs[i] = cs;
-
-		if (scratch_bo)
-			radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
-
-		/* Emit initial configuration. */
-		switch (queue->queue_family_index) {
-		case RADV_QUEUE_GENERAL:
-			radv_init_graphics_state(cs, queue);
-			break;
-		case RADV_QUEUE_COMPUTE:
-			radv_init_compute_state(cs, queue);
-			break;
-		case RADV_QUEUE_TRANSFER:
-			break;
-		}
-
-		if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
-			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-			radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
-			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-			radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
-		}
-
-		radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
-					gsvs_ring_bo, gsvs_ring_size);
-		radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
-					   tess_factor_ring_size, tess_rings_bo);
-		radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
-		radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave,
-		                          compute_scratch_waves, compute_scratch_bo);
-		radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave,
-		                           scratch_waves, scratch_bo);
-
-		if (gds_bo)
-			radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
-		if (gds_oa_bo)
-			radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
-
-		if (i == 0) {
-			si_cs_emit_cache_flush(cs,
-			                       queue->device->physical_device->rad_info.chip_class,
-					       NULL, 0,
-			                       queue->queue_family_index == RING_COMPUTE &&
-			                         queue->device->physical_device->rad_info.chip_class >= GFX7,
-			                       (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
-			                       RADV_CMD_FLAG_INV_ICACHE |
-			                       RADV_CMD_FLAG_INV_SCACHE |
-			                       RADV_CMD_FLAG_INV_VCACHE |
-			                       RADV_CMD_FLAG_INV_L2 |
-					       RADV_CMD_FLAG_START_PIPELINE_STATS, &sqtt_flush_bits, 0);
-		} else if (i == 1) {
-			si_cs_emit_cache_flush(cs,
-			                       queue->device->physical_device->rad_info.chip_class,
-					       NULL, 0,
-			                       queue->queue_family_index == RING_COMPUTE &&
-			                         queue->device->physical_device->rad_info.chip_class >= GFX7,
-			                       RADV_CMD_FLAG_INV_ICACHE |
-			                       RADV_CMD_FLAG_INV_SCACHE |
-			                       RADV_CMD_FLAG_INV_VCACHE |
-			                       RADV_CMD_FLAG_INV_L2 |
-					       RADV_CMD_FLAG_START_PIPELINE_STATS, &sqtt_flush_bits, 0);
-		}
-
-		if (queue->device->ws->cs_finalize(cs) != VK_SUCCESS)
-			goto fail;
-	}
-
-	if (queue->initial_full_flush_preamble_cs)
-			queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
-
-	if (queue->initial_preamble_cs)
-			queue->device->ws->cs_destroy(queue->initial_preamble_cs);
-
-	if (queue->continue_preamble_cs)
-			queue->device->ws->cs_destroy(queue->continue_preamble_cs);
-
-	queue->initial_full_flush_preamble_cs = dest_cs[0];
-	queue->initial_preamble_cs = dest_cs[1];
-	queue->continue_preamble_cs = dest_cs[2];
-
-	if (scratch_bo != queue->scratch_bo) {
-		if (queue->scratch_bo)
-			queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
-		queue->scratch_bo = scratch_bo;
-	}
-	queue->scratch_size_per_wave = scratch_size_per_wave;
-	queue->scratch_waves = scratch_waves;
-
-	if (compute_scratch_bo != queue->compute_scratch_bo) {
-		if (queue->compute_scratch_bo)
-			queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
-		queue->compute_scratch_bo = compute_scratch_bo;
-	}
-	queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
-	queue->compute_scratch_waves = compute_scratch_waves;
-
-	if (esgs_ring_bo != queue->esgs_ring_bo) {
-		if (queue->esgs_ring_bo)
-			queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
-		queue->esgs_ring_bo = esgs_ring_bo;
-		queue->esgs_ring_size = esgs_ring_size;
-	}
-
-	if (gsvs_ring_bo != queue->gsvs_ring_bo) {
-		if (queue->gsvs_ring_bo)
-			queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
-		queue->gsvs_ring_bo = gsvs_ring_bo;
-		queue->gsvs_ring_size = gsvs_ring_size;
-	}
-
-	if (tess_rings_bo != queue->tess_rings_bo) {
-		queue->tess_rings_bo = tess_rings_bo;
-		queue->has_tess_rings = true;
-	}
-
-	if (gds_bo != queue->gds_bo) {
-		queue->gds_bo = gds_bo;
-		queue->has_gds = true;
-	}
-
-	if (gds_oa_bo != queue->gds_oa_bo) {
-		queue->gds_oa_bo = gds_oa_bo;
-		queue->has_gds_oa = true;
-	}
-
-	if (descriptor_bo != queue->descriptor_bo) {
-		if (queue->descriptor_bo)
-			queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
-
-		queue->descriptor_bo = descriptor_bo;
-	}
-
-	if (add_sample_positions)
-		queue->has_sample_positions = true;
-
-	*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
-	*initial_preamble_cs = queue->initial_preamble_cs;
-	*continue_preamble_cs = queue->continue_preamble_cs;
-	if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
-			*continue_preamble_cs = NULL;
-	return VK_SUCCESS;
+   struct radeon_winsys_bo *scratch_bo = NULL;
+   struct radeon_winsys_bo *descriptor_bo = NULL;
+   struct radeon_winsys_bo *compute_scratch_bo = NULL;
+   struct radeon_winsys_bo *esgs_ring_bo = NULL;
+   struct radeon_winsys_bo *gsvs_ring_bo = NULL;
+   struct radeon_winsys_bo *tess_rings_bo = NULL;
+   struct radeon_winsys_bo *gds_bo = NULL;
+   struct radeon_winsys_bo *gds_oa_bo = NULL;
+   struct radeon_cmdbuf *dest_cs[3] = {0};
+   bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
+   unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
+   unsigned max_offchip_buffers;
+   unsigned hs_offchip_param = 0;
+   unsigned tess_offchip_ring_offset;
+   uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
+   if (!queue->has_tess_rings) {
+      if (needs_tess_rings)
+         add_tess_rings = true;
+   }
+   if (!queue->has_gds) {
+      if (needs_gds)
+         add_gds = true;
+   }
+   if (!queue->has_gds_oa) {
+      if (needs_gds_oa)
+         add_gds_oa = true;
+   }
+   if (!queue->has_sample_positions) {
+      if (needs_sample_positions)
+         add_sample_positions = true;
+   }
+   tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
+   hs_offchip_param = radv_get_hs_offchip_param(queue->device, &max_offchip_buffers);
+   tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
+   tess_offchip_ring_size = max_offchip_buffers * queue->device->tess_offchip_block_dw_size * 4;
+
+   scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
+   if (scratch_size_per_wave)
+      scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
+   else
+      scratch_waves = 0;
+
+   compute_scratch_size_per_wave =
+      MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
+   if (compute_scratch_size_per_wave)
+      compute_scratch_waves =
+         MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
+   else
+      compute_scratch_waves = 0;
+
+   if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
+       scratch_waves <= queue->scratch_waves &&
+       compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
+       compute_scratch_waves <= queue->compute_scratch_waves &&
+       esgs_ring_size <= queue->esgs_ring_size && gsvs_ring_size <= queue->gsvs_ring_size &&
+       !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
+       queue->initial_preamble_cs) {
+      *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
+      *initial_preamble_cs = queue->initial_preamble_cs;
+      *continue_preamble_cs = queue->continue_preamble_cs;
+      if (!scratch_size_per_wave && !compute_scratch_size_per_wave && !esgs_ring_size &&
+          !gsvs_ring_size && !needs_tess_rings && !needs_gds && !needs_gds_oa &&
+          !needs_sample_positions)
+         *continue_preamble_cs = NULL;
+      return VK_SUCCESS;
+   }
+
+   uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
+   uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
+   if (scratch_size > queue_scratch_size) {
+      scratch_bo =
+         queue->device->ws->buffer_create(queue->device->ws, scratch_size, 4096, RADEON_DOMAIN_VRAM,
+                                          ring_bo_flags, RADV_BO_PRIORITY_SCRATCH);
+      if (!scratch_bo)
+         goto fail;
+   } else
+      scratch_bo = queue->scratch_bo;
+
+   uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
+   uint32_t compute_queue_scratch_size =
+      queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
+   if (compute_scratch_size > compute_queue_scratch_size) {
+      compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws, compute_scratch_size,
+                                                            4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
+                                                            RADV_BO_PRIORITY_SCRATCH);
+      if (!compute_scratch_bo)
+         goto fail;
+
+   } else
+      compute_scratch_bo = queue->compute_scratch_bo;
+
+   if (esgs_ring_size > queue->esgs_ring_size) {
+      esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, esgs_ring_size, 4096,
+                                                      RADEON_DOMAIN_VRAM, ring_bo_flags,
+                                                      RADV_BO_PRIORITY_SCRATCH);
+      if (!esgs_ring_bo)
+         goto fail;
+   } else {
+      esgs_ring_bo = queue->esgs_ring_bo;
+      esgs_ring_size = queue->esgs_ring_size;
+   }
+
+   if (gsvs_ring_size > queue->gsvs_ring_size) {
+      gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, gsvs_ring_size, 4096,
+                                                      RADEON_DOMAIN_VRAM, ring_bo_flags,
+                                                      RADV_BO_PRIORITY_SCRATCH);
+      if (!gsvs_ring_bo)
+         goto fail;
+   } else {
+      gsvs_ring_bo = queue->gsvs_ring_bo;
+      gsvs_ring_size = queue->gsvs_ring_size;
+   }
+
+   if (add_tess_rings) {
+      tess_rings_bo = queue->device->ws->buffer_create(
+         queue->device->ws, tess_offchip_ring_offset + tess_offchip_ring_size, 256,
+         RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH);
+      if (!tess_rings_bo)
+         goto fail;
+   } else {
+      tess_rings_bo = queue->tess_rings_bo;
+   }
+
+   if (add_gds) {
+      assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
+
+      /* 4 streamout GDS counters.
+       * We need 256B (64 dw) of GDS, otherwise streamout hangs.
+       */
+      gds_bo = queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS,
+                                                ring_bo_flags, RADV_BO_PRIORITY_SCRATCH);
+      if (!gds_bo)
+         goto fail;
+   } else {
+      gds_bo = queue->gds_bo;
+   }
+
+   if (add_gds_oa) {
+      assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
+
+      gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws, 4, 1, RADEON_DOMAIN_OA,
+                                                   ring_bo_flags, RADV_BO_PRIORITY_SCRATCH);
+      if (!gds_oa_bo)
+         goto fail;
+   } else {
+      gds_oa_bo = queue->gds_oa_bo;
+   }
+
+   if (scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo ||
+       gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo ||
+       add_sample_positions) {
+      uint32_t size = 0;
+      if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || add_sample_positions) {
+         size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
+         if (add_sample_positions)
+            size += 128; /* 64+32+16+8 = 120 bytes */
+      } else if (scratch_bo)
+         size = 8; /* 2 dword */
+
+      descriptor_bo = queue->device->ws->buffer_create(
+         queue->device->ws, size, 4096, RADEON_DOMAIN_VRAM,
+         RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
+         RADV_BO_PRIORITY_DESCRIPTOR);
+      if (!descriptor_bo)
+         goto fail;
+   } else
+      descriptor_bo = queue->descriptor_bo;
+
+   if (descriptor_bo != queue->descriptor_bo) {
+      uint32_t *map = (uint32_t *)queue->device->ws->buffer_map(descriptor_bo);
+      if (!map)
+         goto fail;
+
+      if (scratch_bo) {
+         uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
+         uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);
+         map[0] = scratch_va;
+         map[1] = rsrc1;
+      }
+
+      if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
+         fill_geom_tess_rings(queue, map, add_sample_positions, esgs_ring_size, esgs_ring_bo,
+                              gsvs_ring_size, gsvs_ring_bo, tess_factor_ring_size,
+                              tess_offchip_ring_offset, tess_offchip_ring_size, tess_rings_bo);
+
+      queue->device->ws->buffer_unmap(descriptor_bo);
+   }
+
+   for (int i = 0; i < 3; ++i) {
+      enum rgp_flush_bits sqtt_flush_bits = 0;
+      struct radeon_cmdbuf *cs = NULL;
+      cs = queue->device->ws->cs_create(queue->device->ws,
+                                        queue->queue_family_index ? RING_COMPUTE : RING_GFX);
+      if (!cs)
+         goto fail;
+
+      dest_cs[i] = cs;
+
+      if (scratch_bo)
+         radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
+
+      /* Emit initial configuration. */
+      switch (queue->queue_family_index) {
+      case RADV_QUEUE_GENERAL:
+         radv_init_graphics_state(cs, queue);
+         break;
+      case RADV_QUEUE_COMPUTE:
+         radv_init_compute_state(cs, queue);
+         break;
+      case RADV_QUEUE_TRANSFER:
+         break;
+      }
+
+      if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
+         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+         radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+         radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+      }
+
+      radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size, gsvs_ring_bo,
+                              gsvs_ring_size);
+      radv_emit_tess_factor_ring(queue, cs, hs_offchip_param, tess_factor_ring_size, tess_rings_bo);
+      radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
+      radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave, compute_scratch_waves,
+                                compute_scratch_bo);
+      radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave, scratch_waves, scratch_bo);
+
+      if (gds_bo)
+         radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
+      if (gds_oa_bo)
+         radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
+
+      if (i == 0) {
+         si_cs_emit_cache_flush(
+            cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,
+            queue->queue_family_index == RING_COMPUTE &&
+               queue->device->physical_device->rad_info.chip_class >= GFX7,
+            (queue->queue_family_index == RADV_QUEUE_COMPUTE
+                ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
+                : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
+               RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
+               RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_START_PIPELINE_STATS,
+            &sqtt_flush_bits, 0);
+      } else if (i == 1) {
+         si_cs_emit_cache_flush(cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,
+                                queue->queue_family_index == RING_COMPUTE &&
+                                   queue->device->physical_device->rad_info.chip_class >= GFX7,
+                                RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
+                                   RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
+                                   RADV_CMD_FLAG_START_PIPELINE_STATS,
+                                &sqtt_flush_bits, 0);
+      }
+
+      if (queue->device->ws->cs_finalize(cs) != VK_SUCCESS)
+         goto fail;
+   }
+
+   if (queue->initial_full_flush_preamble_cs)
+      queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
+
+   if (queue->initial_preamble_cs)
+      queue->device->ws->cs_destroy(queue->initial_preamble_cs);
+
+   if (queue->continue_preamble_cs)
+      queue->device->ws->cs_destroy(queue->continue_preamble_cs);
+
+   queue->initial_full_flush_preamble_cs = dest_cs[0];
+   queue->initial_preamble_cs = dest_cs[1];
+   queue->continue_preamble_cs = dest_cs[2];
+
+   if (scratch_bo != queue->scratch_bo) {
+      if (queue->scratch_bo)
+         queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
+      queue->scratch_bo = scratch_bo;
+   }
+   queue->scratch_size_per_wave = scratch_size_per_wave;
+   queue->scratch_waves = scratch_waves;
+
+   if (compute_scratch_bo != queue->compute_scratch_bo) {
+      if (queue->compute_scratch_bo)
+         queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
+      queue->compute_scratch_bo = compute_scratch_bo;
+   }
+   queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
+   queue->compute_scratch_waves = compute_scratch_waves;
+
+   if (esgs_ring_bo != queue->esgs_ring_bo) {
+      if (queue->esgs_ring_bo)
+         queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
+      queue->esgs_ring_bo = esgs_ring_bo;
+      queue->esgs_ring_size = esgs_ring_size;
+   }
+
+   if (gsvs_ring_bo != queue->gsvs_ring_bo) {
+      if (queue->gsvs_ring_bo)
+         queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
+      queue->gsvs_ring_bo = gsvs_ring_bo;
+      queue->gsvs_ring_size = gsvs_ring_size;
+   }
+
+   if (tess_rings_bo != queue->tess_rings_bo) {
+      queue->tess_rings_bo = tess_rings_bo;
+      queue->has_tess_rings = true;
+   }
+
+   if (gds_bo != queue->gds_bo) {
+      queue->gds_bo = gds_bo;
+      queue->has_gds = true;
+   }
+
+   if (gds_oa_bo != queue->gds_oa_bo) {
+      queue->gds_oa_bo = gds_oa_bo;
+      queue->has_gds_oa = true;
+   }
+
+   if (descriptor_bo != queue->descriptor_bo) {
+      if (queue->descriptor_bo)
+         queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
+
+      queue->descriptor_bo = descriptor_bo;
+   }
+
+   if (add_sample_positions)
+      queue->has_sample_positions = true;
+
+   *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
+   *initial_preamble_cs = queue->initial_preamble_cs;
+   *continue_preamble_cs = queue->continue_preamble_cs;
+   if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
+      *continue_preamble_cs = NULL;
+   return VK_SUCCESS;
 fail:
-	for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
-		if (dest_cs[i])
-			queue->device->ws->cs_destroy(dest_cs[i]);
-	if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, descriptor_bo);
-	if (scratch_bo && scratch_bo != queue->scratch_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, scratch_bo);
-	if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, compute_scratch_bo);
-	if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, esgs_ring_bo);
-	if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, gsvs_ring_bo);
-	if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, tess_rings_bo);
-	if (gds_bo && gds_bo != queue->gds_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, gds_bo);
-	if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
-		queue->device->ws->buffer_destroy(queue->device->ws, gds_oa_bo);
-
-	return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-}
-
-static VkResult radv_alloc_sem_counts(struct radv_device *device,
-				      struct radv_winsys_sem_counts *counts,
-				      int num_sems,
-				      struct radv_semaphore_part **sems,
-				      const uint64_t *timeline_values,
-				      VkFence _fence,
-				      bool is_signal)
-{
-	int syncobj_idx = 0, non_reset_idx = 0, timeline_idx = 0;
-
-	if (num_sems == 0 && _fence == VK_NULL_HANDLE)
-		return VK_SUCCESS;
-
-	for (uint32_t i = 0; i < num_sems; i++) {
-		switch(sems[i]->kind) {
-		case RADV_SEMAPHORE_SYNCOBJ:
-			counts->syncobj_count++;
-			counts->syncobj_reset_count++;
-			break;
-		case RADV_SEMAPHORE_NONE:
-			break;
-		case RADV_SEMAPHORE_TIMELINE:
-			counts->syncobj_count++;
-			break;
-		case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
-			counts->timeline_syncobj_count++;
-			break;
-		}
-	}
-
-	if (_fence != VK_NULL_HANDLE)
-		counts->syncobj_count++;
-
-	if (counts->syncobj_count || counts->timeline_syncobj_count) {
-		counts->points = (uint64_t *)malloc(
-			sizeof(*counts->syncobj) * counts->syncobj_count +
-			(sizeof(*counts->syncobj) + sizeof(*counts->points)) * counts->timeline_syncobj_count);
-		if (!counts->points)
-			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-		counts->syncobj = (uint32_t*)(counts->points + counts->timeline_syncobj_count);
-	}
-
-	non_reset_idx = counts->syncobj_reset_count;
-
-	for (uint32_t i = 0; i < num_sems; i++) {
-		switch(sems[i]->kind) {
-		case RADV_SEMAPHORE_NONE:
-			unreachable("Empty semaphore");
-			break;
-		case RADV_SEMAPHORE_SYNCOBJ:
-			counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
-			break;
-		case RADV_SEMAPHORE_TIMELINE: {
-			mtx_lock(&sems[i]->timeline.mutex);
-			struct radv_timeline_point *point = NULL;
-			if (is_signal) {
-				point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
-			} else {
-				point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline, timeline_values[i]);
-			}
-
-			mtx_unlock(&sems[i]->timeline.mutex);
-
-			if (point) {
-				counts->syncobj[non_reset_idx++] = point->syncobj;
-			} else {
-				/* Explicitly remove the semaphore so we might not find
-				 * a point later post-submit. */
-				sems[i] = NULL;
-			}
-			break;
-		}
-		case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
-			counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;
-			counts->points[timeline_idx] = timeline_values[i];
-			++timeline_idx;
-			break;
-		}
-	}
-
-	if (_fence != VK_NULL_HANDLE) {
-		RADV_FROM_HANDLE(radv_fence, fence, _fence);
-
-		struct radv_fence_part *part =
-			fence->temporary.kind != RADV_FENCE_NONE ?
-			&fence->temporary : &fence->permanent;
-		counts->syncobj[non_reset_idx++] = part->syncobj;
-	}
-
-	assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
-	counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
-
-	return VK_SUCCESS;
+   for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
+      if (dest_cs[i])
+         queue->device->ws->cs_destroy(dest_cs[i]);
+   if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, descriptor_bo);
+   if (scratch_bo && scratch_bo != queue->scratch_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, scratch_bo);
+   if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, compute_scratch_bo);
+   if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, esgs_ring_bo);
+   if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, gsvs_ring_bo);
+   if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, tess_rings_bo);
+   if (gds_bo && gds_bo != queue->gds_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, gds_bo);
+   if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
+      queue->device->ws->buffer_destroy(queue->device->ws, gds_oa_bo);
+
+   return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+}
+
+static VkResult
+radv_alloc_sem_counts(struct radv_device *device, struct radv_winsys_sem_counts *counts,
+                      int num_sems, struct radv_semaphore_part **sems,
+                      const uint64_t *timeline_values, VkFence _fence, bool is_signal)
+{
+   int syncobj_idx = 0, non_reset_idx = 0, timeline_idx = 0;
+
+   if (num_sems == 0 && _fence == VK_NULL_HANDLE)
+      return VK_SUCCESS;
+
+   for (uint32_t i = 0; i < num_sems; i++) {
+      switch (sems[i]->kind) {
+      case RADV_SEMAPHORE_SYNCOBJ:
+         counts->syncobj_count++;
+         counts->syncobj_reset_count++;
+         break;
+      case RADV_SEMAPHORE_NONE:
+         break;
+      case RADV_SEMAPHORE_TIMELINE:
+         counts->syncobj_count++;
+         break;
+      case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
+         counts->timeline_syncobj_count++;
+         break;
+      }
+   }
+
+   if (_fence != VK_NULL_HANDLE)
+      counts->syncobj_count++;
+
+   if (counts->syncobj_count || counts->timeline_syncobj_count) {
+      counts->points = (uint64_t *)malloc(sizeof(*counts->syncobj) * counts->syncobj_count +
+                                          (sizeof(*counts->syncobj) + sizeof(*counts->points)) *
+                                             counts->timeline_syncobj_count);
+      if (!counts->points)
+         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+      counts->syncobj = (uint32_t *)(counts->points + counts->timeline_syncobj_count);
+   }
+
+   non_reset_idx = counts->syncobj_reset_count;
+
+   for (uint32_t i = 0; i < num_sems; i++) {
+      switch (sems[i]->kind) {
+      case RADV_SEMAPHORE_NONE:
+         unreachable("Empty semaphore");
+         break;
+      case RADV_SEMAPHORE_SYNCOBJ:
+         counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
+         break;
+      case RADV_SEMAPHORE_TIMELINE: {
+         mtx_lock(&sems[i]->timeline.mutex);
+         struct radv_timeline_point *point = NULL;
+         if (is_signal) {
+            point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
+         } else {
+            point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline,
+                                                             timeline_values[i]);
+         }
+
+         mtx_unlock(&sems[i]->timeline.mutex);
+
+         if (point) {
+            counts->syncobj[non_reset_idx++] = point->syncobj;
+         } else {
+            /* Explicitly remove the semaphore so we might not find
+             * a point later post-submit. */
+            sems[i] = NULL;
+         }
+         break;
+      }
+      case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
+         counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;
+         counts->points[timeline_idx] = timeline_values[i];
+         ++timeline_idx;
+         break;
+      }
+   }
+
+   if (_fence != VK_NULL_HANDLE) {
+      RADV_FROM_HANDLE(radv_fence, fence, _fence);
+
+      struct radv_fence_part *part =
+         fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
+      counts->syncobj[non_reset_idx++] = part->syncobj;
+   }
+
+   assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
+   counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
+
+   return VK_SUCCESS;
 }
 
 static void
 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
 {
-	free(sem_info->wait.points);
-	free(sem_info->signal.points);
+   free(sem_info->wait.points);
+   free(sem_info->signal.points);
 }
 
-
-static void radv_free_temp_syncobjs(struct radv_device *device,
-				    int num_sems,
-				    struct radv_semaphore_part *sems)
+static void
+radv_free_temp_syncobjs(struct radv_device *device, int num_sems, struct radv_semaphore_part *sems)
 {
-	for (uint32_t i = 0; i < num_sems; i++) {
-		radv_destroy_semaphore_part(device, sems + i);
-	}
+   for (uint32_t i = 0; i < num_sems; i++) {
+      radv_destroy_semaphore_part(device, sems + i);
+   }
 }
 
 static VkResult
-radv_alloc_sem_info(struct radv_device *device,
-		    struct radv_winsys_sem_info *sem_info,
-		    int num_wait_sems,
-		    struct radv_semaphore_part **wait_sems,
-		    const uint64_t *wait_values,
-		    int num_signal_sems,
-		    struct radv_semaphore_part **signal_sems,
-		    const uint64_t *signal_values,
-		    VkFence fence)
-{
-	VkResult ret;
-
-	ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values, VK_NULL_HANDLE, false);
-	if (ret)
-		return ret;
-	ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems, signal_values, fence, true);
-	if (ret)
-		radv_free_sem_info(sem_info);
-
-	/* caller can override these */
-	sem_info->cs_emit_wait = true;
-	sem_info->cs_emit_signal = true;
-	return ret;
+radv_alloc_sem_info(struct radv_device *device, struct radv_winsys_sem_info *sem_info,
+                    int num_wait_sems, struct radv_semaphore_part **wait_sems,
+                    const uint64_t *wait_values, int num_signal_sems,
+                    struct radv_semaphore_part **signal_sems, const uint64_t *signal_values,
+                    VkFence fence)
+{
+   VkResult ret;
+
+   ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values,
+                               VK_NULL_HANDLE, false);
+   if (ret)
+      return ret;
+   ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems,
+                               signal_values, fence, true);
+   if (ret)
+      radv_free_sem_info(sem_info);
+
+   /* caller can override these */
+   sem_info->cs_emit_wait = true;
+   sem_info->cs_emit_signal = true;
+   return ret;
 }
 
 static void
-radv_finalize_timelines(struct radv_device *device,
-                        uint32_t num_wait_sems,
-                        struct radv_semaphore_part **wait_sems,
-                        const uint64_t *wait_values,
-                        uint32_t num_signal_sems,
-                        struct radv_semaphore_part **signal_sems,
-                        const uint64_t *signal_values,
-                        struct list_head *processing_list)
-{
-	for (uint32_t i = 0; i < num_wait_sems; ++i) {
-		if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
-			mtx_lock(&wait_sems[i]->timeline.mutex);
-			struct radv_timeline_point *point =
-				radv_timeline_find_point_at_least_locked(device, &wait_sems[i]->timeline, wait_values[i]);
-			point->wait_count -= 2;
-			mtx_unlock(&wait_sems[i]->timeline.mutex);
-		}
-	}
-	for (uint32_t i = 0; i < num_signal_sems; ++i) {
-		if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
-			mtx_lock(&signal_sems[i]->timeline.mutex);
-			struct radv_timeline_point *point =
-				radv_timeline_find_point_at_least_locked(device, &signal_sems[i]->timeline, signal_values[i]);
-			signal_sems[i]->timeline.highest_submitted =
-				MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
-			point->wait_count -= 2;
-			radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
-			mtx_unlock(&signal_sems[i]->timeline.mutex);
-		} else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {
-			signal_sems[i]->timeline_syncobj.max_point =
-				MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);
-		}
-	}
+radv_finalize_timelines(struct radv_device *device, uint32_t num_wait_sems,
+                        struct radv_semaphore_part **wait_sems, const uint64_t *wait_values,
+                        uint32_t num_signal_sems, struct radv_semaphore_part **signal_sems,
+                        const uint64_t *signal_values, struct list_head *processing_list)
+{
+   for (uint32_t i = 0; i < num_wait_sems; ++i) {
+      if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
+         mtx_lock(&wait_sems[i]->timeline.mutex);
+         struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(
+            device, &wait_sems[i]->timeline, wait_values[i]);
+         point->wait_count -= 2;
+         mtx_unlock(&wait_sems[i]->timeline.mutex);
+      }
+   }
+   for (uint32_t i = 0; i < num_signal_sems; ++i) {
+      if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
+         mtx_lock(&signal_sems[i]->timeline.mutex);
+         struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(
+            device, &signal_sems[i]->timeline, signal_values[i]);
+         signal_sems[i]->timeline.highest_submitted =
+            MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
+         point->wait_count -= 2;
+         radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
+         mtx_unlock(&signal_sems[i]->timeline.mutex);
+      } else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {
+         signal_sems[i]->timeline_syncobj.max_point =
+            MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);
+      }
+   }
 }
 
 static VkResult
-radv_sparse_buffer_bind_memory(struct radv_device *device,
-                               const VkSparseBufferMemoryBindInfo *bind)
+radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferMemoryBindInfo *bind)
 {
-	RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
-	VkResult result;
+   RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
+   VkResult result;
 
-	for (uint32_t i = 0; i < bind->bindCount; ++i) {
-		struct radv_device_memory *mem = NULL;
+   for (uint32_t i = 0; i < bind->bindCount; ++i) {
+      struct radv_device_memory *mem = NULL;
 
-		if (bind->pBinds[i].memory != VK_NULL_HANDLE)
-			mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
+      if (bind->pBinds[i].memory != VK_NULL_HANDLE)
+         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
 
-		result = device->ws->buffer_virtual_bind(device->ws,
-							 buffer->bo,
-							 bind->pBinds[i].resourceOffset,
-							 bind->pBinds[i].size,
-							 mem ? mem->bo : NULL,
-							 bind->pBinds[i].memoryOffset);
-		if (result != VK_SUCCESS)
-			return result;
-	}
+      result = device->ws->buffer_virtual_bind(device->ws, buffer->bo,
+                                               bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
+                                               mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
+      if (result != VK_SUCCESS)
+         return result;
+   }
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
 static VkResult
 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
 {
-	RADV_FROM_HANDLE(radv_image, image, bind->image);
-	VkResult result;
+   RADV_FROM_HANDLE(radv_image, image, bind->image);
+   VkResult result;
 
-	for (uint32_t i = 0; i < bind->bindCount; ++i) {
-		struct radv_device_memory *mem = NULL;
+   for (uint32_t i = 0; i < bind->bindCount; ++i) {
+      struct radv_device_memory *mem = NULL;
 
-		if (bind->pBinds[i].memory != VK_NULL_HANDLE)
-			mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
+      if (bind->pBinds[i].memory != VK_NULL_HANDLE)
+         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
 
-		result = device->ws->buffer_virtual_bind(device->ws,
-							 image->bo,
-							 bind->pBinds[i].resourceOffset,
-							 bind->pBinds[i].size,
-							 mem ? mem->bo : NULL,
-							 bind->pBinds[i].memoryOffset);
-		if (result != VK_SUCCESS)
-			return result;
-	}
+      result = device->ws->buffer_virtual_bind(device->ws, image->bo,
+                                               bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
+                                               mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
+      if (result != VK_SUCCESS)
+         return result;
+   }
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
 static VkResult
-radv_sparse_image_bind_memory(struct radv_device *device,
-                              const VkSparseImageMemoryBindInfo *bind)
-{
-	RADV_FROM_HANDLE(radv_image, image, bind->image);
-	struct radeon_surf *surface = &image->planes[0].surface;
-	uint32_t bs = vk_format_get_blocksize(image->vk_format);
-	VkResult result;
-
-	for (uint32_t i = 0; i < bind->bindCount; ++i) {
-		struct radv_device_memory *mem = NULL;
-		uint32_t offset, pitch;
-		uint32_t mem_offset = bind->pBinds[i].memoryOffset;
-		const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;
-		const uint32_t level = bind->pBinds[i].subresource.mipLevel;
-
-		VkExtent3D bind_extent = bind->pBinds[i].extent;
-		bind_extent.width = DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk_format));
-		bind_extent.height = DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk_format));
-
-		VkOffset3D bind_offset = bind->pBinds[i].offset;
-		bind_offset.x /= vk_format_get_blockwidth(image->vk_format);
-		bind_offset.y /= vk_format_get_blockheight(image->vk_format);
-
-		if (bind->pBinds[i].memory != VK_NULL_HANDLE)
-			mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
-
-		if (device->physical_device->rad_info.chip_class >= GFX9) {
-			offset = surface->u.gfx9.surf_slice_size * layer +
-			         surface->u.gfx9.prt_level_offset[level];
-			pitch = surface->u.gfx9.prt_level_pitch[level];
-		} else {
-			offset = surface->u.legacy.level[level].offset +
-			         surface->u.legacy.level[level].slice_size_dw * 4 * layer;
-			pitch = surface->u.legacy.level[level].nblk_x;
-		}
-
-		offset += (bind_offset.y * pitch * bs) +
-		          (bind_offset.x * surface->prt_tile_height * bs);
-
-		uint32_t aligned_extent_width = ALIGN(bind_extent.width,
-		                                      surface->prt_tile_width);
-
-		bool whole_subres = bind_offset.x == 0 &&
-		                    aligned_extent_width == pitch;
-
-		if (whole_subres) {
-			uint32_t aligned_extent_height = ALIGN(bind_extent.height,
-			                                       surface->prt_tile_height);
-
-			uint32_t size = aligned_extent_width * aligned_extent_height * bs;
-			result = device->ws->buffer_virtual_bind(device->ws,
-								 image->bo,
-			                                         offset,
-			                                         size,
-			                                         mem ? mem->bo : NULL,
-			                                         mem_offset);
-			if (result != VK_SUCCESS)
-				return result;
-		} else {
-			uint32_t img_increment = pitch * bs;
-			uint32_t mem_increment = aligned_extent_width * bs;
-			uint32_t size = mem_increment * surface->prt_tile_height;
-			for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {
-				result = device->ws->buffer_virtual_bind(device->ws,
-									 image->bo,
-				                                         offset + img_increment * y,
-				                                         size,
-				                                         mem ? mem->bo : NULL,
-				                                         mem_offset + mem_increment * y);
-				if (result != VK_SUCCESS)
-					return result;
-			}
-		}
-	}
-
-	return VK_SUCCESS;
+radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)
+{
+   RADV_FROM_HANDLE(radv_image, image, bind->image);
+   struct radeon_surf *surface = &image->planes[0].surface;
+   uint32_t bs = vk_format_get_blocksize(image->vk_format);
+   VkResult result;
+
+   for (uint32_t i = 0; i < bind->bindCount; ++i) {
+      struct radv_device_memory *mem = NULL;
+      uint32_t offset, pitch;
+      uint32_t mem_offset = bind->pBinds[i].memoryOffset;
+      const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;
+      const uint32_t level = bind->pBinds[i].subresource.mipLevel;
+
+      VkExtent3D bind_extent = bind->pBinds[i].extent;
+      bind_extent.width =
+         DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk_format));
+      bind_extent.height =
+         DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk_format));
+
+      VkOffset3D bind_offset = bind->pBinds[i].offset;
+      bind_offset.x /= vk_format_get_blockwidth(image->vk_format);
+      bind_offset.y /= vk_format_get_blockheight(image->vk_format);
+
+      if (bind->pBinds[i].memory != VK_NULL_HANDLE)
+         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
+
+      if (device->physical_device->rad_info.chip_class >= GFX9) {
+         offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
+         pitch = surface->u.gfx9.prt_level_pitch[level];
+      } else {
+         offset = surface->u.legacy.level[level].offset +
+                  surface->u.legacy.level[level].slice_size_dw * 4 * layer;
+         pitch = surface->u.legacy.level[level].nblk_x;
+      }
+
+      offset += (bind_offset.y * pitch * bs) + (bind_offset.x * surface->prt_tile_height * bs);
+
+      uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
+
+      bool whole_subres = bind_offset.x == 0 && aligned_extent_width == pitch;
+
+      if (whole_subres) {
+         uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
+
+         uint32_t size = aligned_extent_width * aligned_extent_height * bs;
+         result = device->ws->buffer_virtual_bind(device->ws, image->bo, offset, size,
+                                                  mem ? mem->bo : NULL, mem_offset);
+         if (result != VK_SUCCESS)
+            return result;
+      } else {
+         uint32_t img_increment = pitch * bs;
+         uint32_t mem_increment = aligned_extent_width * bs;
+         uint32_t size = mem_increment * surface->prt_tile_height;
+         for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {
+            result = device->ws->buffer_virtual_bind(
+               device->ws, image->bo, offset + img_increment * y, size, mem ? mem->bo : NULL,
+               mem_offset + mem_increment * y);
+            if (result != VK_SUCCESS)
+               return result;
+         }
+      }
+   }
+
+   return VK_SUCCESS;
 }
 
 static VkResult
-radv_get_preambles(struct radv_queue *queue,
-                   const VkCommandBuffer *cmd_buffers,
-                   uint32_t cmd_buffer_count,
-                   struct radeon_cmdbuf **initial_full_flush_preamble_cs,
+radv_get_preambles(struct radv_queue *queue, const VkCommandBuffer *cmd_buffers,
+                   uint32_t cmd_buffer_count, struct radeon_cmdbuf **initial_full_flush_preamble_cs,
                    struct radeon_cmdbuf **initial_preamble_cs,
                    struct radeon_cmdbuf **continue_preamble_cs)
 {
-	uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
-	uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
-	uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
-	bool tess_rings_needed = false;
-	bool gds_needed = false;
-	bool gds_oa_needed = false;
-	bool sample_positions_needed = false;
-
-	for (uint32_t j = 0; j < cmd_buffer_count; j++) {
-		RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
-				 cmd_buffers[j]);
-
-		scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
-		waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
-		compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave,
-		                                     cmd_buffer->compute_scratch_size_per_wave_needed);
-		compute_waves_wanted = MAX2(compute_waves_wanted,
-		                            cmd_buffer->compute_scratch_waves_wanted);
-		esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
-		gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
-		tess_rings_needed |= cmd_buffer->tess_rings_needed;
-		gds_needed |= cmd_buffer->gds_needed;
-		gds_oa_needed |= cmd_buffer->gds_oa_needed;
-		sample_positions_needed |= cmd_buffer->sample_positions_needed;
-	}
-
-	return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
-	                            compute_scratch_size_per_wave, compute_waves_wanted,
-	                            esgs_ring_size, gsvs_ring_size, tess_rings_needed,
-	                            gds_needed, gds_oa_needed, sample_positions_needed,
-	                            initial_full_flush_preamble_cs,
-	                            initial_preamble_cs, continue_preamble_cs);
+   uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
+   uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
+   uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
+   bool tess_rings_needed = false;
+   bool gds_needed = false;
+   bool gds_oa_needed = false;
+   bool sample_positions_needed = false;
+
+   for (uint32_t j = 0; j < cmd_buffer_count; j++) {
+      RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, cmd_buffers[j]);
+
+      scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
+      waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
+      compute_scratch_size_per_wave =
+         MAX2(compute_scratch_size_per_wave, cmd_buffer->compute_scratch_size_per_wave_needed);
+      compute_waves_wanted = MAX2(compute_waves_wanted, cmd_buffer->compute_scratch_waves_wanted);
+      esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
+      gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
+      tess_rings_needed |= cmd_buffer->tess_rings_needed;
+      gds_needed |= cmd_buffer->gds_needed;
+      gds_oa_needed |= cmd_buffer->gds_oa_needed;
+      sample_positions_needed |= cmd_buffer->sample_positions_needed;
+   }
+
+   return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
+                               compute_scratch_size_per_wave, compute_waves_wanted, esgs_ring_size,
+                               gsvs_ring_size, tess_rings_needed, gds_needed, gds_oa_needed,
+                               sample_positions_needed, initial_full_flush_preamble_cs,
+                               initial_preamble_cs, continue_preamble_cs);
 }
 
 struct radv_deferred_queue_submission {
-	struct radv_queue *queue;
-	VkCommandBuffer *cmd_buffers;
-	uint32_t cmd_buffer_count;
-
-	/* Sparse bindings that happen on a queue. */
-	VkSparseBufferMemoryBindInfo *buffer_binds;
-	uint32_t buffer_bind_count;
-	VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
-	uint32_t image_opaque_bind_count;
-	VkSparseImageMemoryBindInfo *image_binds;
-	uint32_t image_bind_count;
-
-	bool flush_caches;
-	VkShaderStageFlags wait_dst_stage_mask;
-	struct radv_semaphore_part **wait_semaphores;
-	uint32_t wait_semaphore_count;
-	struct radv_semaphore_part **signal_semaphores;
-	uint32_t signal_semaphore_count;
-	VkFence fence;
-
-	uint64_t *wait_values;
-	uint64_t *signal_values;
-
-	struct radv_semaphore_part *temporary_semaphore_parts;
-	uint32_t temporary_semaphore_part_count;
-
-	struct list_head queue_pending_list;
-	uint32_t submission_wait_count;
-	struct radv_timeline_waiter *wait_nodes;
-
-	struct list_head processing_list;
+   struct radv_queue *queue;
+   VkCommandBuffer *cmd_buffers;
+   uint32_t cmd_buffer_count;
+
+   /* Sparse bindings that happen on a queue. */
+   VkSparseBufferMemoryBindInfo *buffer_binds;
+   uint32_t buffer_bind_count;
+   VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
+   uint32_t image_opaque_bind_count;
+   VkSparseImageMemoryBindInfo *image_binds;
+   uint32_t image_bind_count;
+
+   bool flush_caches;
+   VkShaderStageFlags wait_dst_stage_mask;
+   struct radv_semaphore_part **wait_semaphores;
+   uint32_t wait_semaphore_count;
+   struct radv_semaphore_part **signal_semaphores;
+   uint32_t signal_semaphore_count;
+   VkFence fence;
+
+   uint64_t *wait_values;
+   uint64_t *signal_values;
+
+   struct radv_semaphore_part *temporary_semaphore_parts;
+   uint32_t temporary_semaphore_part_count;
+
+   struct list_head queue_pending_list;
+   uint32_t submission_wait_count;
+   struct radv_timeline_waiter *wait_nodes;
+
+   struct list_head processing_list;
 };
 
 struct radv_queue_submission {
-	const VkCommandBuffer *cmd_buffers;
-	uint32_t cmd_buffer_count;
-
-	/* Sparse bindings that happen on a queue. */
-	const VkSparseBufferMemoryBindInfo *buffer_binds;
-	uint32_t buffer_bind_count;
-	const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
-	uint32_t image_opaque_bind_count;
-	const VkSparseImageMemoryBindInfo *image_binds;
-	uint32_t image_bind_count;
-
-	bool flush_caches;
-	VkPipelineStageFlags wait_dst_stage_mask;
-	const VkSemaphore *wait_semaphores;
-	uint32_t wait_semaphore_count;
-	const VkSemaphore *signal_semaphores;
-	uint32_t signal_semaphore_count;
-	VkFence fence;
-
-	const uint64_t *wait_values;
-	uint32_t wait_value_count;
-	const uint64_t *signal_values;
-	uint32_t signal_value_count;
+   const VkCommandBuffer *cmd_buffers;
+   uint32_t cmd_buffer_count;
+
+   /* Sparse bindings that happen on a queue. */
+   const VkSparseBufferMemoryBindInfo *buffer_binds;
+   uint32_t buffer_bind_count;
+   const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
+   uint32_t image_opaque_bind_count;
+   const VkSparseImageMemoryBindInfo *image_binds;
+   uint32_t image_bind_count;
+
+   bool flush_caches;
+   VkPipelineStageFlags wait_dst_stage_mask;
+   const VkSemaphore *wait_semaphores;
+   uint32_t wait_semaphore_count;
+   const VkSemaphore *signal_semaphores;
+   uint32_t signal_semaphore_count;
+   VkFence fence;
+
+   const uint64_t *wait_values;
+   uint32_t wait_value_count;
+   const uint64_t *signal_values;
+   uint32_t signal_value_count;
 };
 
-static VkResult
-radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
-                              uint32_t decrement,
-                              struct list_head *processing_list);
+static VkResult radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
+                                              uint32_t decrement,
+                                              struct list_head *processing_list);
 
 static VkResult
 radv_create_deferred_submission(struct radv_queue *queue,
                                 const struct radv_queue_submission *submission,
                                 struct radv_deferred_queue_submission **out)
 {
-	struct radv_deferred_queue_submission *deferred = NULL;
-	size_t size = sizeof(struct radv_deferred_queue_submission);
-
-	uint32_t temporary_count = 0;
-	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
-		RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
-		if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
-			++temporary_count;
-	}
-
-	size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
-	size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
-	size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
-	size += submission->image_bind_count * sizeof(VkSparseImageMemoryBindInfo);
-
-	for (uint32_t i = 0; i < submission->image_bind_count; ++i)
-		size += submission->image_binds[i].bindCount * sizeof(VkSparseImageMemoryBind);
-
-	size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
-	size += temporary_count * sizeof(struct radv_semaphore_part);
-	size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
-	size += submission->wait_value_count * sizeof(uint64_t);
-	size += submission->signal_value_count * sizeof(uint64_t);
-	size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
-
-	deferred = calloc(1, size);
-	if (!deferred)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-	deferred->queue = queue;
-
-	deferred->cmd_buffers = (void*)(deferred + 1);
-	deferred->cmd_buffer_count = submission->cmd_buffer_count;
-	if (submission->cmd_buffer_count) {
-		memcpy(deferred->cmd_buffers, submission->cmd_buffers,
-		       submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
-	}
-
-	deferred->buffer_binds = (void*)(deferred->cmd_buffers + submission->cmd_buffer_count);
-	deferred->buffer_bind_count = submission->buffer_bind_count;
-	if (submission->buffer_bind_count) {
-		memcpy(deferred->buffer_binds, submission->buffer_binds,
-		       submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
-	}
-
-	deferred->image_opaque_binds = (void*)(deferred->buffer_binds + submission->buffer_bind_count);
-	deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
-	if (submission->image_opaque_bind_count) {
-		memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
-		       submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
-	}
-
-	deferred->image_binds = (void*)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
-	deferred->image_bind_count = submission->image_bind_count;
-
-	VkSparseImageMemoryBind *sparse_image_binds = (void*)(deferred->image_binds + deferred->image_bind_count);
-	for (uint32_t i = 0; i < deferred->image_bind_count; ++i) {
-		deferred->image_binds[i] = submission->image_binds[i];
-		deferred->image_binds[i].pBinds = sparse_image_binds;
-
-		for (uint32_t j = 0; j < deferred->image_binds[i].bindCount; ++j)
-			*sparse_image_binds++ = submission->image_binds[i].pBinds[j];
-	}
-
-	deferred->flush_caches = submission->flush_caches;
-	deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
-
-	deferred->wait_semaphores = (void*)sparse_image_binds;
-	deferred->wait_semaphore_count = submission->wait_semaphore_count;
-
-	deferred->signal_semaphores = (void*)(deferred->wait_semaphores + deferred->wait_semaphore_count);
-	deferred->signal_semaphore_count = submission->signal_semaphore_count;
-
-	deferred->fence = submission->fence;
-
-	deferred->temporary_semaphore_parts = (void*)(deferred->signal_semaphores + deferred->signal_semaphore_count);
-	deferred->temporary_semaphore_part_count = temporary_count;
-
-	uint32_t temporary_idx = 0;
-	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
-		RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
-		if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
-			deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
-			deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
-			semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
-			++temporary_idx;
-		} else
-			deferred->wait_semaphores[i] = &semaphore->permanent;
-	}
-
-	for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
-		RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
-		if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
-			deferred->signal_semaphores[i] = &semaphore->temporary;
-		} else {
-			deferred->signal_semaphores[i] = &semaphore->permanent;
-		}
-	}
-
-	deferred->wait_values = (void*)(deferred->temporary_semaphore_parts + temporary_count);
-	if (submission->wait_value_count) {
-		memcpy(deferred->wait_values, submission->wait_values, submission->wait_value_count * sizeof(uint64_t));
-	}
-	deferred->signal_values = deferred->wait_values + submission->wait_value_count;
-	if (submission->signal_value_count) {
-		memcpy(deferred->signal_values, submission->signal_values, submission->signal_value_count * sizeof(uint64_t));
-	}
-
-	deferred->wait_nodes = (void*)(deferred->signal_values + submission->signal_value_count);
-	/* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
-	 * ensure the submission is not accidentally triggered early when adding wait timelines. */
-	deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
-
-	*out = deferred;
-	return VK_SUCCESS;
+   struct radv_deferred_queue_submission *deferred = NULL;
+   size_t size = sizeof(struct radv_deferred_queue_submission);
+
+   uint32_t temporary_count = 0;
+   for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
+      RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
+      if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
+         ++temporary_count;
+   }
+
+   size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
+   size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
+   size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
+   size += submission->image_bind_count * sizeof(VkSparseImageMemoryBindInfo);
+
+   for (uint32_t i = 0; i < submission->image_bind_count; ++i)
+      size += submission->image_binds[i].bindCount * sizeof(VkSparseImageMemoryBind);
+
+   size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
+   size += temporary_count * sizeof(struct radv_semaphore_part);
+   size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
+   size += submission->wait_value_count * sizeof(uint64_t);
+   size += submission->signal_value_count * sizeof(uint64_t);
+   size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
+
+   deferred = calloc(1, size);
+   if (!deferred)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   deferred->queue = queue;
+
+   deferred->cmd_buffers = (void *)(deferred + 1);
+   deferred->cmd_buffer_count = submission->cmd_buffer_count;
+   if (submission->cmd_buffer_count) {
+      memcpy(deferred->cmd_buffers, submission->cmd_buffers,
+             submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
+   }
+
+   deferred->buffer_binds = (void *)(deferred->cmd_buffers + submission->cmd_buffer_count);
+   deferred->buffer_bind_count = submission->buffer_bind_count;
+   if (submission->buffer_bind_count) {
+      memcpy(deferred->buffer_binds, submission->buffer_binds,
+             submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
+   }
+
+   deferred->image_opaque_binds = (void *)(deferred->buffer_binds + submission->buffer_bind_count);
+   deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
+   if (submission->image_opaque_bind_count) {
+      memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
+             submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
+   }
+
+   deferred->image_binds =
+      (void *)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
+   deferred->image_bind_count = submission->image_bind_count;
+
+   VkSparseImageMemoryBind *sparse_image_binds =
+      (void *)(deferred->image_binds + deferred->image_bind_count);
+   for (uint32_t i = 0; i < deferred->image_bind_count; ++i) {
+      deferred->image_binds[i] = submission->image_binds[i];
+      deferred->image_binds[i].pBinds = sparse_image_binds;
+
+      for (uint32_t j = 0; j < deferred->image_binds[i].bindCount; ++j)
+         *sparse_image_binds++ = submission->image_binds[i].pBinds[j];
+   }
+
+   deferred->flush_caches = submission->flush_caches;
+   deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
+
+   deferred->wait_semaphores = (void *)sparse_image_binds;
+   deferred->wait_semaphore_count = submission->wait_semaphore_count;
+
+   deferred->signal_semaphores =
+      (void *)(deferred->wait_semaphores + deferred->wait_semaphore_count);
+   deferred->signal_semaphore_count = submission->signal_semaphore_count;
+
+   deferred->fence = submission->fence;
+
+   deferred->temporary_semaphore_parts =
+      (void *)(deferred->signal_semaphores + deferred->signal_semaphore_count);
+   deferred->temporary_semaphore_part_count = temporary_count;
+
+   uint32_t temporary_idx = 0;
+   for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
+      RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
+      if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
+         deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
+         deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
+         semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
+         ++temporary_idx;
+      } else
+         deferred->wait_semaphores[i] = &semaphore->permanent;
+   }
+
+   for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
+      RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
+      if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
+         deferred->signal_semaphores[i] = &semaphore->temporary;
+      } else {
+         deferred->signal_semaphores[i] = &semaphore->permanent;
+      }
+   }
+
+   deferred->wait_values = (void *)(deferred->temporary_semaphore_parts + temporary_count);
+   if (submission->wait_value_count) {
+      memcpy(deferred->wait_values, submission->wait_values,
+             submission->wait_value_count * sizeof(uint64_t));
+   }
+   deferred->signal_values = deferred->wait_values + submission->wait_value_count;
+   if (submission->signal_value_count) {
+      memcpy(deferred->signal_values, submission->signal_values,
+             submission->signal_value_count * sizeof(uint64_t));
+   }
+
+   deferred->wait_nodes = (void *)(deferred->signal_values + submission->signal_value_count);
+   /* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
+    * ensure the submission is not accidentally triggered early when adding wait timelines. */
+   deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
+
+   *out = deferred;
+   return VK_SUCCESS;
 }
 
 static VkResult
 radv_queue_enqueue_submission(struct radv_deferred_queue_submission *submission,
                               struct list_head *processing_list)
 {
-	uint32_t wait_cnt = 0;
-	struct radv_timeline_waiter *waiter = submission->wait_nodes;
-	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
-		if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
-			mtx_lock(&submission->wait_semaphores[i]->timeline.mutex);
-			if (submission->wait_semaphores[i]->timeline.highest_submitted < submission->wait_values[i]) {
-				++wait_cnt;
-				waiter->value = submission->wait_values[i];
-				waiter->submission = submission;
-				list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
-				++waiter;
-			}
-			mtx_unlock(&submission->wait_semaphores[i]->timeline.mutex);
-		}
-	}
+   uint32_t wait_cnt = 0;
+   struct radv_timeline_waiter *waiter = submission->wait_nodes;
+   for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
+      if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
+         mtx_lock(&submission->wait_semaphores[i]->timeline.mutex);
+         if (submission->wait_semaphores[i]->timeline.highest_submitted <
+             submission->wait_values[i]) {
+            ++wait_cnt;
+            waiter->value = submission->wait_values[i];
+            waiter->submission = submission;
+            list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
+            ++waiter;
+         }
+         mtx_unlock(&submission->wait_semaphores[i]->timeline.mutex);
+      }
+   }
 
-	mtx_lock(&submission->queue->pending_mutex);
+   mtx_lock(&submission->queue->pending_mutex);
 
-	bool is_first = list_is_empty(&submission->queue->pending_submissions);
-	list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
+   bool is_first = list_is_empty(&submission->queue->pending_submissions);
+   list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
 
-	mtx_unlock(&submission->queue->pending_mutex);
+   mtx_unlock(&submission->queue->pending_mutex);
 
-	/* If there is already a submission in the queue, that will decrement the counter by 1 when
-	 * submitted, but if the queue was empty, we decrement ourselves as there is no previous
-	 * submission. */
-	uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
+   /* If there is already a submission in the queue, that will decrement the counter by 1 when
+    * submitted, but if the queue was empty, we decrement ourselves as there is no previous
+    * submission. */
+   uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
 
-	/* if decrement is zero, then we don't have a refcounted reference to the
-	 * submission anymore, so it is not safe to access the submission. */
-	if (!decrement)
-		return VK_SUCCESS;
+   /* if decrement is zero, then we don't have a refcounted reference to the
+    * submission anymore, so it is not safe to access the submission. */
+   if (!decrement)
+      return VK_SUCCESS;
 
-	return radv_queue_trigger_submission(submission, decrement, processing_list);
+   return radv_queue_trigger_submission(submission, decrement, processing_list);
 }
 
 static void
 radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submission,
                                    struct list_head *processing_list)
 {
-	mtx_lock(&submission->queue->pending_mutex);
-	list_del(&submission->queue_pending_list);
+   mtx_lock(&submission->queue->pending_mutex);
+   list_del(&submission->queue_pending_list);
 
-	/* trigger the next submission in the queue. */
-	if (!list_is_empty(&submission->queue->pending_submissions)) {
-		struct radv_deferred_queue_submission *next_submission =
-			list_first_entry(&submission->queue->pending_submissions,
-			                 struct radv_deferred_queue_submission,
-			                 queue_pending_list);
-		radv_queue_trigger_submission(next_submission, 1, processing_list);
-	}
-	mtx_unlock(&submission->queue->pending_mutex);
+   /* trigger the next submission in the queue. */
+   if (!list_is_empty(&submission->queue->pending_submissions)) {
+      struct radv_deferred_queue_submission *next_submission =
+         list_first_entry(&submission->queue->pending_submissions,
+                          struct radv_deferred_queue_submission, queue_pending_list);
+      radv_queue_trigger_submission(next_submission, 1, processing_list);
+   }
+   mtx_unlock(&submission->queue->pending_mutex);
 
-	u_cnd_monotonic_broadcast(&submission->queue->device->timeline_cond);
+   u_cnd_monotonic_broadcast(&submission->queue->device->timeline_cond);
 }
 
 static VkResult
 radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
                            struct list_head *processing_list)
 {
-	struct radv_queue *queue = submission->queue;
-	struct radeon_winsys_ctx *ctx = queue->hw_ctx;
-	uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
-	bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
-	bool can_patch = true;
-	uint32_t advance;
-	struct radv_winsys_sem_info sem_info = {0};
-	VkResult result;
-	struct radeon_cmdbuf *initial_preamble_cs = NULL;
-	struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
-	struct radeon_cmdbuf *continue_preamble_cs = NULL;
-
-	result = radv_get_preambles(queue, submission->cmd_buffers,
-	                            submission->cmd_buffer_count,
-	                            &initial_preamble_cs,
-	                            &initial_flush_preamble_cs,
-	                            &continue_preamble_cs);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	result = radv_alloc_sem_info(queue->device,
-				     &sem_info,
-				     submission->wait_semaphore_count,
-				     submission->wait_semaphores,
-				     submission->wait_values,
-				     submission->signal_semaphore_count,
-				     submission->signal_semaphores,
-				     submission->signal_values,
-				     submission->fence);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
-		result = radv_sparse_buffer_bind_memory(queue->device,
-							submission->buffer_binds + i);
-		if (result != VK_SUCCESS)
-			goto fail;
-	}
-
-	for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
-		result = radv_sparse_image_opaque_bind_memory(queue->device,
-							      submission->image_opaque_binds + i);
-		if (result != VK_SUCCESS)
-			goto fail;
-	}
-
-	for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
-		result = radv_sparse_image_bind_memory(queue->device,
-						       submission->image_binds + i);
-		if (result != VK_SUCCESS)
-			goto fail;
-	}
-
-	if (!submission->cmd_buffer_count) {
-		result = queue->device->ws->cs_submit(ctx, queue->queue_idx,
-						      &queue->device->empty_cs[queue->queue_family_index],
-						      1, NULL, NULL,
-						      &sem_info,
-						      false);
-		if (result != VK_SUCCESS)
-			goto fail;
-	} else {
-		struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
-		                                         (submission->cmd_buffer_count));
-
-		for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
-			RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
-			assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
-
-			cs_array[j] = cmd_buffer->cs;
-			if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
-				can_patch = false;
-
-			cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
-		}
-
-		for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
-			struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
-			advance = MIN2(max_cs_submission,
-			               submission->cmd_buffer_count - j);
-
-			if (queue->device->trace_bo)
-				*queue->device->trace_id_ptr = 0;
-
-			sem_info.cs_emit_wait = j == 0;
-			sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
-
-			result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
-							      advance, initial_preamble, continue_preamble_cs,
-							      &sem_info,
-							      can_patch);
-			if (result != VK_SUCCESS) {
-				free(cs_array);
-				goto fail;
-			}
-
-			if (queue->device->trace_bo) {
-				radv_check_gpu_hangs(queue, cs_array[j]);
-			}
-
-			if (queue->device->tma_bo) {
-				radv_check_trap_handler(queue);
-			}
-		}
-
-		free(cs_array);
-	}
-
-	radv_finalize_timelines(queue->device,
-	                        submission->wait_semaphore_count,
-	                        submission->wait_semaphores,
-	                        submission->wait_values,
-	                        submission->signal_semaphore_count,
-	                        submission->signal_semaphores,
-	                        submission->signal_values,
-	                        processing_list);
-	/* Has to happen after timeline finalization to make sure the
-	 * condition variable is only triggered when timelines and queue have
-	 * been updated. */
-	radv_queue_submission_update_queue(submission, processing_list);
+   struct radv_queue *queue = submission->queue;
+   struct radeon_winsys_ctx *ctx = queue->hw_ctx;
+   uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
+   bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
+   bool can_patch = true;
+   uint32_t advance;
+   struct radv_winsys_sem_info sem_info = {0};
+   VkResult result;
+   struct radeon_cmdbuf *initial_preamble_cs = NULL;
+   struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
+   struct radeon_cmdbuf *continue_preamble_cs = NULL;
+
+   result =
+      radv_get_preambles(queue, submission->cmd_buffers, submission->cmd_buffer_count,
+                         &initial_preamble_cs, &initial_flush_preamble_cs, &continue_preamble_cs);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = radv_alloc_sem_info(queue->device, &sem_info, submission->wait_semaphore_count,
+                                submission->wait_semaphores, submission->wait_values,
+                                submission->signal_semaphore_count, submission->signal_semaphores,
+                                submission->signal_values, submission->fence);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
+      result = radv_sparse_buffer_bind_memory(queue->device, submission->buffer_binds + i);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
+      result =
+         radv_sparse_image_opaque_bind_memory(queue->device, submission->image_opaque_binds + i);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
+      result = radv_sparse_image_bind_memory(queue->device, submission->image_binds + i);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   if (!submission->cmd_buffer_count) {
+      result = queue->device->ws->cs_submit(ctx, queue->queue_idx,
+                                            &queue->device->empty_cs[queue->queue_family_index], 1,
+                                            NULL, NULL, &sem_info, false);
+      if (result != VK_SUCCESS)
+         goto fail;
+   } else {
+      struct radeon_cmdbuf **cs_array =
+         malloc(sizeof(struct radeon_cmdbuf *) * (submission->cmd_buffer_count));
+
+      for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
+         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
+         assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+
+         cs_array[j] = cmd_buffer->cs;
+         if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
+            can_patch = false;
+
+         cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
+      }
+
+      for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
+         struct radeon_cmdbuf *initial_preamble =
+            (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
+         advance = MIN2(max_cs_submission, submission->cmd_buffer_count - j);
+
+         if (queue->device->trace_bo)
+            *queue->device->trace_id_ptr = 0;
+
+         sem_info.cs_emit_wait = j == 0;
+         sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
+
+         result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance,
+                                               initial_preamble, continue_preamble_cs, &sem_info,
+                                               can_patch);
+         if (result != VK_SUCCESS) {
+            free(cs_array);
+            goto fail;
+         }
+
+         if (queue->device->trace_bo) {
+            radv_check_gpu_hangs(queue, cs_array[j]);
+         }
+
+         if (queue->device->tma_bo) {
+            radv_check_trap_handler(queue);
+         }
+      }
+
+      free(cs_array);
+   }
+
+   radv_finalize_timelines(queue->device, submission->wait_semaphore_count,
+                           submission->wait_semaphores, submission->wait_values,
+                           submission->signal_semaphore_count, submission->signal_semaphores,
+                           submission->signal_values, processing_list);
+   /* Has to happen after timeline finalization to make sure the
+    * condition variable is only triggered when timelines and queue have
+    * been updated. */
+   radv_queue_submission_update_queue(submission, processing_list);
 
 fail:
-	if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
-		/* When something bad happened during the submission, such as
-		 * an out of memory issue, it might be hard to recover from
-		 * this inconsistent state. To avoid this sort of problem, we
-		 * assume that we are in a really bad situation and return
-		 * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
-		 * to submit the same job again to this device.
-		 */
-		result = radv_device_set_lost(queue->device, "vkQueueSubmit() failed");
-	}
-
-	radv_free_temp_syncobjs(queue->device,
-				submission->temporary_semaphore_part_count,
-				submission->temporary_semaphore_parts);
-	radv_free_sem_info(&sem_info);
-	free(submission);
-	return result;
+   if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
+      /* When something bad happened during the submission, such as
+       * an out of memory issue, it might be hard to recover from
+       * this inconsistent state. To avoid this sort of problem, we
+       * assume that we are in a really bad situation and return
+       * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
+       * to submit the same job again to this device.
+       */
+      result = radv_device_set_lost(queue->device, "vkQueueSubmit() failed");
+   }
+
+   radv_free_temp_syncobjs(queue->device, submission->temporary_semaphore_part_count,
+                           submission->temporary_semaphore_parts);
+   radv_free_sem_info(&sem_info);
+   free(submission);
+   return result;
 }
 
 static VkResult
 radv_process_submissions(struct list_head *processing_list)
 {
-	while(!list_is_empty(processing_list)) {
-		struct radv_deferred_queue_submission *submission =
-			list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
-		list_del(&submission->processing_list);
+   while (!list_is_empty(processing_list)) {
+      struct radv_deferred_queue_submission *submission =
+         list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
+      list_del(&submission->processing_list);
 
-		VkResult result = radv_queue_submit_deferred(submission, processing_list);
-		if (result != VK_SUCCESS)
-			return result;
-	}
-	return VK_SUCCESS;
+      VkResult result = radv_queue_submit_deferred(submission, processing_list);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+   return VK_SUCCESS;
 }
 
 static VkResult
 wait_for_submission_timelines_available(struct radv_deferred_queue_submission *submission,
                                         uint64_t timeout)
 {
-	struct radv_device *device = submission->queue->device;
-	uint32_t syncobj_count = 0;
-	uint32_t syncobj_idx = 0;
+   struct radv_device *device = submission->queue->device;
+   uint32_t syncobj_count = 0;
+   uint32_t syncobj_idx = 0;
 
-	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
-		if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
-			continue;
+   for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
+      if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
+         continue;
 
-		if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
-			continue;
-		++syncobj_count;
-	}
+      if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
+         continue;
+      ++syncobj_count;
+   }
 
-	if (!syncobj_count)
-		return VK_SUCCESS;
+   if (!syncobj_count)
+      return VK_SUCCESS;
 
-	uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);
-	if (!points)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);
+   if (!points)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	uint32_t *syncobj = (uint32_t*)(points + syncobj_count);
+   uint32_t *syncobj = (uint32_t *)(points + syncobj_count);
 
-	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
-		if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
-			continue;
+   for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
+      if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
+         continue;
 
-		if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
-			continue;
+      if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
+         continue;
 
-		syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;
-		points[syncobj_idx] = submission->wait_values[i];
-		++syncobj_idx;
-	}
-	bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true, true, timeout);
+      syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;
+      points[syncobj_idx] = submission->wait_values[i];
+      ++syncobj_idx;
+   }
+   bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true,
+                                                    true, timeout);
 
-	free(points);
-	return success ? VK_SUCCESS : VK_TIMEOUT;
+   free(points);
+   return success ? VK_SUCCESS : VK_TIMEOUT;
 }
 
-static int radv_queue_submission_thread_run(void *q)
-{
-	struct radv_queue *queue = q;
-
-	mtx_lock(&queue->thread_mutex);
-	while (!p_atomic_read(&queue->thread_exit)) {
-		struct radv_deferred_queue_submission *submission = queue->thread_submission;
-		struct list_head processing_list;
-		VkResult result = VK_SUCCESS;
-		if (!submission) {
-			u_cnd_monotonic_wait(&queue->thread_cond, &queue->thread_mutex);
-			continue;
-		}
-		mtx_unlock(&queue->thread_mutex);
-
-		/* Wait at most 5 seconds so we have a chance to notice shutdown when
-		 * a semaphore never gets signaled. If it takes longer we just retry
-		 * the wait next iteration. */
-		result = wait_for_submission_timelines_available(submission,
-		                                                 radv_get_absolute_timeout(5000000000));
-		if (result != VK_SUCCESS) {
-			mtx_lock(&queue->thread_mutex);
-			continue;
-		}
-
-		/* The lock isn't held but nobody will add one until we finish
-		 * the current submission. */
-		p_atomic_set(&queue->thread_submission, NULL);
-
-		list_inithead(&processing_list);
-		list_addtail(&submission->processing_list, &processing_list);
-		result = radv_process_submissions(&processing_list);
-
-		mtx_lock(&queue->thread_mutex);
-	}
-	mtx_unlock(&queue->thread_mutex);
-	return 0;
+static int
+radv_queue_submission_thread_run(void *q)
+{
+   struct radv_queue *queue = q;
+
+   mtx_lock(&queue->thread_mutex);
+   while (!p_atomic_read(&queue->thread_exit)) {
+      struct radv_deferred_queue_submission *submission = queue->thread_submission;
+      struct list_head processing_list;
+      VkResult result = VK_SUCCESS;
+      if (!submission) {
+         u_cnd_monotonic_wait(&queue->thread_cond, &queue->thread_mutex);
+         continue;
+      }
+      mtx_unlock(&queue->thread_mutex);
+
+      /* Wait at most 5 seconds so we have a chance to notice shutdown when
+       * a semaphore never gets signaled. If it takes longer we just retry
+       * the wait next iteration. */
+      result =
+         wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(5000000000));
+      if (result != VK_SUCCESS) {
+         mtx_lock(&queue->thread_mutex);
+         continue;
+      }
+
+      /* The lock isn't held but nobody will add one until we finish
+       * the current submission. */
+      p_atomic_set(&queue->thread_submission, NULL);
+
+      list_inithead(&processing_list);
+      list_addtail(&submission->processing_list, &processing_list);
+      result = radv_process_submissions(&processing_list);
+
+      mtx_lock(&queue->thread_mutex);
+   }
+   mtx_unlock(&queue->thread_mutex);
+   return 0;
 }
 
 static VkResult
-radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
-                              uint32_t decrement,
+radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission, uint32_t decrement,
                               struct list_head *processing_list)
 {
-	struct radv_queue *queue = submission->queue;
-	int ret;
-	if  (p_atomic_add_return(&submission->submission_wait_count, -decrement))
-		return VK_SUCCESS;
+   struct radv_queue *queue = submission->queue;
+   int ret;
+   if (p_atomic_add_return(&submission->submission_wait_count, -decrement))
+      return VK_SUCCESS;
 
-	if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) == VK_SUCCESS) {
-		list_addtail(&submission->processing_list, processing_list);
-		return VK_SUCCESS;
-	}
+   if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) ==
+       VK_SUCCESS) {
+      list_addtail(&submission->processing_list, processing_list);
+      return VK_SUCCESS;
+   }
 
-	mtx_lock(&queue->thread_mutex);
+   mtx_lock(&queue->thread_mutex);
 
-	/* A submission can only be ready for the thread if it doesn't have
-	 * any predecessors in the same queue, so there can only be one such
-	 * submission at a time. */
-	assert(queue->thread_submission == NULL);
+   /* A submission can only be ready for the thread if it doesn't have
+    * any predecessors in the same queue, so there can only be one such
+    * submission at a time. */
+   assert(queue->thread_submission == NULL);
 
-	/* Only start the thread on demand to save resources for the many games
-	 * which only use binary semaphores. */
-	if (!queue->thread_running) {
-		ret  = thrd_create(&queue->submission_thread,
-		                   radv_queue_submission_thread_run, queue);
-		if (ret) {
-			mtx_unlock(&queue->thread_mutex);
-			return vk_errorf(queue->device->instance,
-			                 VK_ERROR_DEVICE_LOST,
-			                 "Failed to start submission thread");
-		}
-		queue->thread_running = true;
-	}
+   /* Only start the thread on demand to save resources for the many games
+    * which only use binary semaphores. */
+   if (!queue->thread_running) {
+      ret = thrd_create(&queue->submission_thread, radv_queue_submission_thread_run, queue);
+      if (ret) {
+         mtx_unlock(&queue->thread_mutex);
+         return vk_errorf(queue->device->instance, VK_ERROR_DEVICE_LOST,
+                          "Failed to start submission thread");
+      }
+      queue->thread_running = true;
+   }
 
-	queue->thread_submission = submission;
-	mtx_unlock(&queue->thread_mutex);
+   queue->thread_submission = submission;
+   mtx_unlock(&queue->thread_mutex);
 
-	u_cnd_monotonic_signal(&queue->thread_cond);
-	return VK_SUCCESS;
+   u_cnd_monotonic_signal(&queue->thread_cond);
+   return VK_SUCCESS;
 }
 
-static VkResult radv_queue_submit(struct radv_queue *queue,
-                                  const struct radv_queue_submission *submission)
+static VkResult
+radv_queue_submit(struct radv_queue *queue, const struct radv_queue_submission *submission)
 {
-	struct radv_deferred_queue_submission *deferred = NULL;
+   struct radv_deferred_queue_submission *deferred = NULL;
 
-	VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
-	if (result != VK_SUCCESS)
-		return result;
+   VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
+   if (result != VK_SUCCESS)
+      return result;
 
-	struct list_head processing_list;
-	list_inithead(&processing_list);
+   struct list_head processing_list;
+   list_inithead(&processing_list);
 
-	result = radv_queue_enqueue_submission(deferred, &processing_list);
-	if (result != VK_SUCCESS) {
-		/* If anything is in the list we leak. */
-		assert(list_is_empty(&processing_list));
-		return result;
-	}
-	return radv_process_submissions(&processing_list);
+   result = radv_queue_enqueue_submission(deferred, &processing_list);
+   if (result != VK_SUCCESS) {
+      /* If anything is in the list we leak. */
+      assert(list_is_empty(&processing_list));
+      return result;
+   }
+   return radv_process_submissions(&processing_list);
 }
 
 bool
 radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
 {
-	struct radeon_winsys_ctx *ctx = queue->hw_ctx;
-	struct radv_winsys_sem_info sem_info = {0};
-	VkResult result;
+   struct radeon_winsys_ctx *ctx = queue->hw_ctx;
+   struct radv_winsys_sem_info sem_info = {0};
+   VkResult result;
 
-	result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0,
-				     0, NULL, VK_NULL_HANDLE);
-	if (result != VK_SUCCESS)
-		return false;
+   result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0, 0, NULL, VK_NULL_HANDLE);
+   if (result != VK_SUCCESS)
+      return false;
 
-	result = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1,
-					      NULL, NULL, &sem_info,
-					      false);
-	radv_free_sem_info(&sem_info);
-	if (result != VK_SUCCESS)
-		return false;
-
-	return true;
+   result =
+      queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1, NULL, NULL, &sem_info, false);
+   radv_free_sem_info(&sem_info);
+   if (result != VK_SUCCESS)
+      return false;
 
+   return true;
 }
 
 /* Signals fence as soon as all the work currently put on queue is done. */
-static VkResult radv_signal_fence(struct radv_queue *queue,
-                              VkFence fence)
-{
-	return radv_queue_submit(queue, &(struct radv_queue_submission) {
-			.fence = fence
-		});
-}
-
-static bool radv_submit_has_effects(const VkSubmitInfo *info)
-{
-	return info->commandBufferCount ||
-	       info->waitSemaphoreCount ||
-	       info->signalSemaphoreCount;
-}
-
-VkResult radv_QueueSubmit(
-	VkQueue                                     _queue,
-	uint32_t                                    submitCount,
-	const VkSubmitInfo*                         pSubmits,
-	VkFence                                     fence)
-{
-	RADV_FROM_HANDLE(radv_queue, queue, _queue);
-	VkResult result;
-	uint32_t fence_idx = 0;
-	bool flushed_caches = false;
-
-	if (radv_device_is_lost(queue->device))
-		return VK_ERROR_DEVICE_LOST;
-
-	if (fence != VK_NULL_HANDLE) {
-		for (uint32_t i = 0; i < submitCount; ++i)
-			if (radv_submit_has_effects(pSubmits + i))
-				fence_idx = i;
-	} else
-		fence_idx = UINT32_MAX;
-
-	for (uint32_t i = 0; i < submitCount; i++) {
-		if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
-			continue;
-
-		VkPipelineStageFlags wait_dst_stage_mask = 0;
-		for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
-			wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
-		}
-
-		const VkTimelineSemaphoreSubmitInfo *timeline_info =
-			vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
-
-		result = radv_queue_submit(queue, &(struct radv_queue_submission) {
-				.cmd_buffers = pSubmits[i].pCommandBuffers,
-				.cmd_buffer_count = pSubmits[i].commandBufferCount,
-				.wait_dst_stage_mask = wait_dst_stage_mask,
-				.flush_caches = !flushed_caches,
-				.wait_semaphores = pSubmits[i].pWaitSemaphores,
-				.wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
-				.signal_semaphores = pSubmits[i].pSignalSemaphores,
-				.signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
-				.fence = i == fence_idx ? fence : VK_NULL_HANDLE,
-				.wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
-				.wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
-				.signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
-				.signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
-			});
-		if (result != VK_SUCCESS)
-			return result;
-
-		flushed_caches  = true;
-	}
-
-	if (fence != VK_NULL_HANDLE && !submitCount) {
-		result = radv_signal_fence(queue, fence);
-		if (result != VK_SUCCESS)
-			return result;
-	}
-
-	return VK_SUCCESS;
+static VkResult
+radv_signal_fence(struct radv_queue *queue, VkFence fence)
+{
+   return radv_queue_submit(queue, &(struct radv_queue_submission){.fence = fence});
+}
+
+static bool
+radv_submit_has_effects(const VkSubmitInfo *info)
+{
+   return info->commandBufferCount || info->waitSemaphoreCount || info->signalSemaphoreCount;
+}
+
+VkResult
+radv_QueueSubmit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence)
+{
+   RADV_FROM_HANDLE(radv_queue, queue, _queue);
+   VkResult result;
+   uint32_t fence_idx = 0;
+   bool flushed_caches = false;
+
+   if (radv_device_is_lost(queue->device))
+      return VK_ERROR_DEVICE_LOST;
+
+   if (fence != VK_NULL_HANDLE) {
+      for (uint32_t i = 0; i < submitCount; ++i)
+         if (radv_submit_has_effects(pSubmits + i))
+            fence_idx = i;
+   } else
+      fence_idx = UINT32_MAX;
+
+   for (uint32_t i = 0; i < submitCount; i++) {
+      if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
+         continue;
+
+      VkPipelineStageFlags wait_dst_stage_mask = 0;
+      for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
+         wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
+      }
+
+      const VkTimelineSemaphoreSubmitInfo *timeline_info =
+         vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
+
+      result = radv_queue_submit(
+         queue, &(struct radv_queue_submission){
+                   .cmd_buffers = pSubmits[i].pCommandBuffers,
+                   .cmd_buffer_count = pSubmits[i].commandBufferCount,
+                   .wait_dst_stage_mask = wait_dst_stage_mask,
+                   .flush_caches = !flushed_caches,
+                   .wait_semaphores = pSubmits[i].pWaitSemaphores,
+                   .wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
+                   .signal_semaphores = pSubmits[i].pSignalSemaphores,
+                   .signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
+                   .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
+                   .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
+                   .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues
+                                          ? timeline_info->waitSemaphoreValueCount
+                                          : 0,
+                   .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
+                   .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues
+                                            ? timeline_info->signalSemaphoreValueCount
+                                            : 0,
+                });
+      if (result != VK_SUCCESS)
+         return result;
+
+      flushed_caches = true;
+   }
+
+   if (fence != VK_NULL_HANDLE && !submitCount) {
+      result = radv_signal_fence(queue, fence);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   return VK_SUCCESS;
 }
 
 static const char *
 radv_get_queue_family_name(struct radv_queue *queue)
 {
-	switch (queue->queue_family_index) {
-	case RADV_QUEUE_GENERAL:
-		return "graphics";
-	case RADV_QUEUE_COMPUTE:
-		return "compute";
-	case RADV_QUEUE_TRANSFER:
-		return "transfer";
-	default:
-		unreachable("Unknown queue family");
-	}
+   switch (queue->queue_family_index) {
+   case RADV_QUEUE_GENERAL:
+      return "graphics";
+   case RADV_QUEUE_COMPUTE:
+      return "compute";
+   case RADV_QUEUE_TRANSFER:
+      return "transfer";
+   default:
+      unreachable("Unknown queue family");
+   }
 }
 
-VkResult radv_QueueWaitIdle(
-	VkQueue                                     _queue)
+VkResult
+radv_QueueWaitIdle(VkQueue _queue)
 {
-	RADV_FROM_HANDLE(radv_queue, queue, _queue);
+   RADV_FROM_HANDLE(radv_queue, queue, _queue);
 
-	if (radv_device_is_lost(queue->device))
-		return VK_ERROR_DEVICE_LOST;
+   if (radv_device_is_lost(queue->device))
+      return VK_ERROR_DEVICE_LOST;
 
-	mtx_lock(&queue->pending_mutex);
-	while (!list_is_empty(&queue->pending_submissions)) {
-		u_cnd_monotonic_wait(&queue->device->timeline_cond, &queue->pending_mutex);
-	}
-	mtx_unlock(&queue->pending_mutex);
+   mtx_lock(&queue->pending_mutex);
+   while (!list_is_empty(&queue->pending_submissions)) {
+      u_cnd_monotonic_wait(&queue->device->timeline_cond, &queue->pending_mutex);
+   }
+   mtx_unlock(&queue->pending_mutex);
 
-	if (!queue->device->ws->ctx_wait_idle(queue->hw_ctx,
-					      radv_queue_family_to_ring(queue->queue_family_index),
-					      queue->queue_idx)) {
-		return radv_device_set_lost(queue->device,
-					    "Failed to wait for a '%s' queue "
-					    "to be idle. GPU hang ?",
-					    radv_get_queue_family_name(queue));
-	}
+   if (!queue->device->ws->ctx_wait_idle(
+          queue->hw_ctx, radv_queue_family_to_ring(queue->queue_family_index), queue->queue_idx)) {
+      return radv_device_set_lost(queue->device,
+                                  "Failed to wait for a '%s' queue "
+                                  "to be idle. GPU hang ?",
+                                  radv_get_queue_family_name(queue));
+   }
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-VkResult radv_DeviceWaitIdle(
-	VkDevice                                    _device)
+VkResult
+radv_DeviceWaitIdle(VkDevice _device)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device, device, _device);
 
-	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
-		for (unsigned q = 0; q < device->queue_count[i]; q++) {
-			VkResult result =
-				radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
+   for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
+      for (unsigned q = 0; q < device->queue_count[i]; q++) {
+         VkResult result = radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
 
-			if (result != VK_SUCCESS)
-				return result;
-		}
-	}
-	return VK_SUCCESS;
+         if (result != VK_SUCCESS)
+            return result;
+      }
+   }
+   return VK_SUCCESS;
 }
 
-VkResult radv_EnumerateInstanceExtensionProperties(
-    const char*                                 pLayerName,
-    uint32_t*                                   pPropertyCount,
-    VkExtensionProperties*                      pProperties)
+VkResult
+radv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount,
+                                          VkExtensionProperties *pProperties)
 {
-	if (pLayerName)
-		return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+   if (pLayerName)
+      return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
 
-	return vk_enumerate_instance_extension_properties(
-		&radv_instance_extensions_supported,
-		pPropertyCount, pProperties);
+   return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported,
+                                                     pPropertyCount, pProperties);
 }
 
-PFN_vkVoidFunction radv_GetInstanceProcAddr(
-	VkInstance                                  _instance,
-	const char*                                 pName)
+PFN_vkVoidFunction
+radv_GetInstanceProcAddr(VkInstance _instance, const char *pName)
 {
-	RADV_FROM_HANDLE(radv_instance, instance, _instance);
+   RADV_FROM_HANDLE(radv_instance, instance, _instance);
 
-	/* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
-	 * when we have to return valid function pointers, NULL, or it's left
-	 * undefined.  See the table for exact details.
-	 */
-	if (pName == NULL)
-		return NULL;
+   /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
+    * when we have to return valid function pointers, NULL, or it's left
+    * undefined.  See the table for exact details.
+    */
+   if (pName == NULL)
+      return NULL;
 
-#define LOOKUP_RADV_ENTRYPOINT(entrypoint) \
-	if (strcmp(pName, "vk" #entrypoint) == 0) \
-		return (PFN_vkVoidFunction)radv_##entrypoint
+#define LOOKUP_RADV_ENTRYPOINT(entrypoint)                                                         \
+   if (strcmp(pName, "vk" #entrypoint) == 0)                                                       \
+   return (PFN_vkVoidFunction)radv_##entrypoint
 
-	LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
-	LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
-	LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
-	LOOKUP_RADV_ENTRYPOINT(CreateInstance);
+   LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
+   LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
+   LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
+   LOOKUP_RADV_ENTRYPOINT(CreateInstance);
 
-	/* GetInstanceProcAddr() can also be called with a NULL instance.
-	 * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
-	 */
-	LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
+   /* GetInstanceProcAddr() can also be called with a NULL instance.
+    * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
+    */
+   LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
 
 #undef LOOKUP_RADV_ENTRYPOINT
 
-	if (instance == NULL)
-		return NULL;
+   if (instance == NULL)
+      return NULL;
 
-	return vk_instance_get_proc_addr(&instance->vk,
-                                         &radv_instance_entrypoints,
-                                         pName);
+   return vk_instance_get_proc_addr(&instance->vk, &radv_instance_entrypoints, pName);
 }
 
 /* The loader wants us to expose a second GetInstanceProcAddr function
  * to work around certain LD_PRELOAD issues seen in apps.
  */
 PUBLIC
-VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
-	VkInstance                                  instance,
-	const char*                                 pName)
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
 {
-	return radv_GetInstanceProcAddr(instance, pName);
+   return radv_GetInstanceProcAddr(instance, pName);
 }
 
 PUBLIC
-VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
-	VkInstance                                  _instance,
-	const char*                                 pName)
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
 {
-	RADV_FROM_HANDLE(radv_instance, instance, _instance);
-	return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
+   RADV_FROM_HANDLE(radv_instance, instance, _instance);
+   return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
 }
 
-bool radv_get_memory_fd(struct radv_device *device,
-			struct radv_device_memory *memory,
-			int *pFD)
+bool
+radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD)
 {
-	/* Only set BO metadata for the first plane */
-	if (memory->image && memory->image->offset == 0) {
-		struct radeon_bo_metadata metadata;
-		radv_init_metadata(device, memory->image, &metadata);
-		device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
-	}
+   /* Only set BO metadata for the first plane */
+   if (memory->image && memory->image->offset == 0) {
+      struct radeon_bo_metadata metadata;
+      radv_init_metadata(device, memory->image, &metadata);
+      device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
+   }
 
-	return device->ws->buffer_get_fd(device->ws, memory->bo,
-					 pFD);
+   return device->ws->buffer_get_fd(device->ws, memory->bo, pFD);
 }
 
-
 void
-radv_free_memory(struct radv_device *device,
-		 const VkAllocationCallbacks* pAllocator,
-		 struct radv_device_memory *mem)
+radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+                 struct radv_device_memory *mem)
 {
-	if (mem == NULL)
-		return;
+   if (mem == NULL)
+      return;
 
 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
-	if (mem->android_hardware_buffer)
-		AHardwareBuffer_release(mem->android_hardware_buffer);
+   if (mem->android_hardware_buffer)
+      AHardwareBuffer_release(mem->android_hardware_buffer);
 #endif
 
-	if (mem->bo) {
-		if (device->overallocation_disallowed) {
-			mtx_lock(&device->overallocation_mutex);
-			device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
-			mtx_unlock(&device->overallocation_mutex);
-		}
-
-		if (device->use_global_bo_list)
-			device->ws->buffer_make_resident(device->ws, mem->bo, false);
-		device->ws->buffer_destroy(device->ws, mem->bo);
-		mem->bo = NULL;
-	}
-
-	vk_object_base_finish(&mem->base);
-	vk_free2(&device->vk.alloc, pAllocator, mem);
-}
-
-static VkResult radv_alloc_memory(struct radv_device *device,
-				  const VkMemoryAllocateInfo*     pAllocateInfo,
-				  const VkAllocationCallbacks*    pAllocator,
-				  VkDeviceMemory*                 pMem)
-{
-	struct radv_device_memory *mem;
-	VkResult result;
-	enum radeon_bo_domain domain;
-	uint32_t flags = 0;
-
-	assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
-
-	const VkImportMemoryFdInfoKHR *import_info =
-		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
-	const VkMemoryDedicatedAllocateInfo *dedicate_info =
-		vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
-	const VkExportMemoryAllocateInfo *export_info =
-		vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
-	const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
-		vk_find_struct_const(pAllocateInfo->pNext,
-		                     IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
-	const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
-		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
-
-	const struct wsi_memory_allocate_info *wsi_info =
-		vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
-
-	if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
-	    !(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
-		/* Apparently, this is allowed */
-		*pMem = VK_NULL_HANDLE;
-		return VK_SUCCESS;
-	}
-
-	mem = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8,
-			  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (mem == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &mem->base,
-			    VK_OBJECT_TYPE_DEVICE_MEMORY);
-
-	if (wsi_info && wsi_info->implicit_sync)
-		flags |= RADEON_FLAG_IMPLICIT_SYNC;
-
-	if (dedicate_info) {
-		mem->image = radv_image_from_handle(dedicate_info->image);
-		mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
-	} else {
-		mem->image = NULL;
-		mem->buffer = NULL;
-	}
-
-	float priority_float = 0.5;
-	const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
-		vk_find_struct_const(pAllocateInfo->pNext,
-				     MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
-	if (priority_ext)
-		priority_float = priority_ext->priority;
-
-	unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
-	                         (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
-
-	mem->user_ptr = NULL;
-	mem->bo = NULL;
+   if (mem->bo) {
+      if (device->overallocation_disallowed) {
+         mtx_lock(&device->overallocation_mutex);
+         device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
+         mtx_unlock(&device->overallocation_mutex);
+      }
+
+      if (device->use_global_bo_list)
+         device->ws->buffer_make_resident(device->ws, mem->bo, false);
+      device->ws->buffer_destroy(device->ws, mem->bo);
+      mem->bo = NULL;
+   }
+
+   vk_object_base_finish(&mem->base);
+   vk_free2(&device->vk.alloc, pAllocator, mem);
+}
+
+static VkResult
+radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,
+                  const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
+{
+   struct radv_device_memory *mem;
+   VkResult result;
+   enum radeon_bo_domain domain;
+   uint32_t flags = 0;
+
+   assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
+
+   const VkImportMemoryFdInfoKHR *import_info =
+      vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
+   const VkMemoryDedicatedAllocateInfo *dedicate_info =
+      vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
+   const VkExportMemoryAllocateInfo *export_info =
+      vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
+   const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
+      vk_find_struct_const(pAllocateInfo->pNext, IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
+   const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
+      vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
+
+   const struct wsi_memory_allocate_info *wsi_info =
+      vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
+
+   if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
+       !(export_info && (export_info->handleTypes &
+                         VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
+      /* Apparently, this is allowed */
+      *pMem = VK_NULL_HANDLE;
+      return VK_SUCCESS;
+   }
+
+   mem =
+      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (mem == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);
+
+   if (wsi_info && wsi_info->implicit_sync)
+      flags |= RADEON_FLAG_IMPLICIT_SYNC;
+
+   if (dedicate_info) {
+      mem->image = radv_image_from_handle(dedicate_info->image);
+      mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
+   } else {
+      mem->image = NULL;
+      mem->buffer = NULL;
+   }
+
+   float priority_float = 0.5;
+   const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
+      vk_find_struct_const(pAllocateInfo->pNext, MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
+   if (priority_ext)
+      priority_float = priority_ext->priority;
+
+   unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
+                            (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
+
+   mem->user_ptr = NULL;
+   mem->bo = NULL;
 
 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
-	mem->android_hardware_buffer = NULL;
+   mem->android_hardware_buffer = NULL;
 #endif
 
-	if (ahb_import_info) {
-		result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
-		if (result != VK_SUCCESS)
-			goto fail;
-	} else if(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
-		result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
-		if (result != VK_SUCCESS)
-			goto fail;
-	} else if (import_info) {
-		assert(import_info->handleType ==
-		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
-		       import_info->handleType ==
-		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
-		mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
-						     priority, NULL);
-		if (!mem->bo) {
-			result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
-			goto fail;
-		} else {
-			close(import_info->fd);
-		}
-
-		if (mem->image && mem->image->plane_count == 1 &&
-		    !vk_format_is_depth_or_stencil(mem->image->vk_format) &&
-		    mem->image->info.samples == 1 &&
-		    mem->image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
-			struct radeon_bo_metadata metadata;
-			device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
-
-			struct radv_image_create_info create_info = {
-				.no_metadata_planes = true,
-				.bo_metadata = &metadata
-			};
-
-			/* This gives a basic ability to import radeonsi images
-			 * that don't have DCC. This is not guaranteed by any
-			 * spec and can be removed after we support modifiers. */
-			result = radv_image_create_layout(device, create_info, NULL,
-			                                  mem->image);
-			if (result != VK_SUCCESS) {
-				device->ws->buffer_destroy(device->ws, mem->bo);
-				goto fail;
-			}
-		}
-	} else if (host_ptr_info) {
-		assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
-		mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
-		                                      pAllocateInfo->allocationSize,
-		                                      priority);
-		if (!mem->bo) {
-			result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
-			goto fail;
-		} else {
-			mem->user_ptr = host_ptr_info->pHostPointer;
-		}
-	} else {
-		uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
-		uint32_t heap_index;
-
-		heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
-		domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
-		flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
-
-		if (!import_info && (!export_info || !export_info->handleTypes)) {
-			flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
-			if (device->use_global_bo_list) {
-				flags |= RADEON_FLAG_PREFER_LOCAL_BO;
-			}
-		}
-
-		if (device->overallocation_disallowed) {
-			uint64_t total_size =
-				device->physical_device->memory_properties.memoryHeaps[heap_index].size;
-
-			mtx_lock(&device->overallocation_mutex);
-			if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
-				mtx_unlock(&device->overallocation_mutex);
-				result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
-				goto fail;
-			}
-			device->allocated_memory_size[heap_index] += alloc_size;
-			mtx_unlock(&device->overallocation_mutex);
-		}
-
-		mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
-		                                    domain, flags, priority);
-
-		if (!mem->bo) {
-			if (device->overallocation_disallowed) {
-				mtx_lock(&device->overallocation_mutex);
-				device->allocated_memory_size[heap_index] -= alloc_size;
-				mtx_unlock(&device->overallocation_mutex);
-			}
-			result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
-			goto fail;
-		}
-
-		mem->heap_index = heap_index;
-		mem->alloc_size = alloc_size;
-	}
-
-	if (!wsi_info) {
-		if (device->use_global_bo_list) {
-			result = device->ws->buffer_make_resident(device->ws, mem->bo, true);
-			if (result != VK_SUCCESS)
-				goto fail;
-		}
-	}
-
-	*pMem = radv_device_memory_to_handle(mem);
-
-	return VK_SUCCESS;
+   if (ahb_import_info) {
+      result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
+      if (result != VK_SUCCESS)
+         goto fail;
+   } else if (export_info && (export_info->handleTypes &
+                              VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
+      result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
+      if (result != VK_SUCCESS)
+         goto fail;
+   } else if (import_info) {
+      assert(import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
+             import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+      mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd, priority, NULL);
+      if (!mem->bo) {
+         result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
+         goto fail;
+      } else {
+         close(import_info->fd);
+      }
+
+      if (mem->image && mem->image->plane_count == 1 &&
+          !vk_format_is_depth_or_stencil(mem->image->vk_format) && mem->image->info.samples == 1 &&
+          mem->image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+         struct radeon_bo_metadata metadata;
+         device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
+
+         struct radv_image_create_info create_info = {.no_metadata_planes = true,
+                                                      .bo_metadata = &metadata};
+
+         /* This gives a basic ability to import radeonsi images
+          * that don't have DCC. This is not guaranteed by any
+          * spec and can be removed after we support modifiers. */
+         result = radv_image_create_layout(device, create_info, NULL, mem->image);
+         if (result != VK_SUCCESS) {
+            device->ws->buffer_destroy(device->ws, mem->bo);
+            goto fail;
+         }
+      }
+   } else if (host_ptr_info) {
+      assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
+      mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
+                                            pAllocateInfo->allocationSize, priority);
+      if (!mem->bo) {
+         result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
+         goto fail;
+      } else {
+         mem->user_ptr = host_ptr_info->pHostPointer;
+      }
+   } else {
+      uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
+      uint32_t heap_index;
+
+      heap_index =
+         device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex]
+            .heapIndex;
+      domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
+      flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
+
+      if (!import_info && (!export_info || !export_info->handleTypes)) {
+         flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+         if (device->use_global_bo_list) {
+            flags |= RADEON_FLAG_PREFER_LOCAL_BO;
+         }
+      }
+
+      if (device->overallocation_disallowed) {
+         uint64_t total_size =
+            device->physical_device->memory_properties.memoryHeaps[heap_index].size;
+
+         mtx_lock(&device->overallocation_mutex);
+         if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
+            mtx_unlock(&device->overallocation_mutex);
+            result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+            goto fail;
+         }
+         device->allocated_memory_size[heap_index] += alloc_size;
+         mtx_unlock(&device->overallocation_mutex);
+      }
+
+      mem->bo = device->ws->buffer_create(device->ws, alloc_size,
+                                          device->physical_device->rad_info.max_alignment, domain,
+                                          flags, priority);
+
+      if (!mem->bo) {
+         if (device->overallocation_disallowed) {
+            mtx_lock(&device->overallocation_mutex);
+            device->allocated_memory_size[heap_index] -= alloc_size;
+            mtx_unlock(&device->overallocation_mutex);
+         }
+         result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+         goto fail;
+      }
+
+      mem->heap_index = heap_index;
+      mem->alloc_size = alloc_size;
+   }
+
+   if (!wsi_info) {
+      if (device->use_global_bo_list) {
+         result = device->ws->buffer_make_resident(device->ws, mem->bo, true);
+         if (result != VK_SUCCESS)
+            goto fail;
+      }
+   }
+
+   *pMem = radv_device_memory_to_handle(mem);
+
+   return VK_SUCCESS;
 
 fail:
-	radv_free_memory(device, pAllocator,mem);
+   radv_free_memory(device, pAllocator, mem);
 
-	return result;
+   return result;
 }
 
-VkResult radv_AllocateMemory(
-	VkDevice                                    _device,
-	const VkMemoryAllocateInfo*                 pAllocateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkDeviceMemory*                             pMem)
+VkResult
+radv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo,
+                    const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
 }
 
-void radv_FreeMemory(
-	VkDevice                                    _device,
-	VkDeviceMemory                              _mem,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_FreeMemory(VkDevice _device, VkDeviceMemory _mem, const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
 
-	radv_free_memory(device, pAllocator, mem);
+   radv_free_memory(device, pAllocator, mem);
 }
 
-VkResult radv_MapMemory(
-	VkDevice                                    _device,
-	VkDeviceMemory                              _memory,
-	VkDeviceSize                                offset,
-	VkDeviceSize                                size,
-	VkMemoryMapFlags                            flags,
-	void**                                      ppData)
+VkResult
+radv_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size,
+               VkMemoryMapFlags flags, void **ppData)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
 
-	if (mem == NULL) {
-		*ppData = NULL;
-		return VK_SUCCESS;
-	}
+   if (mem == NULL) {
+      *ppData = NULL;
+      return VK_SUCCESS;
+   }
 
-	if (mem->user_ptr)
-		*ppData = mem->user_ptr;
-	else
-		*ppData = device->ws->buffer_map(mem->bo);
+   if (mem->user_ptr)
+      *ppData = mem->user_ptr;
+   else
+      *ppData = device->ws->buffer_map(mem->bo);
 
-	if (*ppData) {
-		*ppData = (uint8_t*)*ppData + offset;
-		return VK_SUCCESS;
-	}
+   if (*ppData) {
+      *ppData = (uint8_t *)*ppData + offset;
+      return VK_SUCCESS;
+   }
 
-	return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
+   return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
 }
 
-void radv_UnmapMemory(
-	VkDevice                                    _device,
-	VkDeviceMemory                              _memory)
+void
+radv_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
 
-	if (mem == NULL)
-		return;
+   if (mem == NULL)
+      return;
 
-	if (mem->user_ptr == NULL)
-		device->ws->buffer_unmap(mem->bo);
+   if (mem->user_ptr == NULL)
+      device->ws->buffer_unmap(mem->bo);
 }
 
-VkResult radv_FlushMappedMemoryRanges(
-	VkDevice                                    _device,
-	uint32_t                                    memoryRangeCount,
-	const VkMappedMemoryRange*                  pMemoryRanges)
+VkResult
+radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
+                             const VkMappedMemoryRange *pMemoryRanges)
 {
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-VkResult radv_InvalidateMappedMemoryRanges(
-	VkDevice                                    _device,
-	uint32_t                                    memoryRangeCount,
-	const VkMappedMemoryRange*                  pMemoryRanges)
+VkResult
+radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
+                                  const VkMappedMemoryRange *pMemoryRanges)
 {
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-void radv_GetBufferMemoryRequirements(
-	VkDevice                                    _device,
-	VkBuffer                                    _buffer,
-	VkMemoryRequirements*                       pMemoryRequirements)
+void
+radv_GetBufferMemoryRequirements(VkDevice _device, VkBuffer _buffer,
+                                 VkMemoryRequirements *pMemoryRequirements)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
 
-	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
+   pMemoryRequirements->memoryTypeBits =
+      (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
 
-	if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
-		pMemoryRequirements->alignment = 4096;
-	else
-		pMemoryRequirements->alignment = 16;
+   if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
+      pMemoryRequirements->alignment = 4096;
+   else
+      pMemoryRequirements->alignment = 16;
 
-	pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
+   pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
 }
 
-void radv_GetBufferMemoryRequirements2(
-	VkDevice                                     device,
-	const VkBufferMemoryRequirementsInfo2       *pInfo,
-	VkMemoryRequirements2                       *pMemoryRequirements)
+void
+radv_GetBufferMemoryRequirements2(VkDevice device, const VkBufferMemoryRequirementsInfo2 *pInfo,
+                                  VkMemoryRequirements2 *pMemoryRequirements)
+{
+   radv_GetBufferMemoryRequirements(device, pInfo->buffer,
+                                    &pMemoryRequirements->memoryRequirements);
+   vk_foreach_struct(ext, pMemoryRequirements->pNext)
+   {
+      switch (ext->sType) {
+      case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+         VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
+         req->requiresDedicatedAllocation = false;
+         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
+         break;
+      }
+      default:
+         break;
+      }
+   }
+}
+
+void
+radv_GetImageMemoryRequirements(VkDevice _device, VkImage _image,
+                                VkMemoryRequirements *pMemoryRequirements)
 {
-	radv_GetBufferMemoryRequirements(device, pInfo->buffer,
-                                        &pMemoryRequirements->memoryRequirements);
-	vk_foreach_struct(ext, pMemoryRequirements->pNext) {
-		switch (ext->sType) {
-		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
-			VkMemoryDedicatedRequirements *req =
-			               (VkMemoryDedicatedRequirements *) ext;
-			req->requiresDedicatedAllocation = false;
-			req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
-			break;
-		}
-		default:
-			break;
-		}
-	}
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_image, image, _image);
+
+   pMemoryRequirements->memoryTypeBits =
+      (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
+
+   pMemoryRequirements->size = image->size;
+   pMemoryRequirements->alignment = image->alignment;
 }
 
-void radv_GetImageMemoryRequirements(
-	VkDevice                                    _device,
-	VkImage                                     _image,
-	VkMemoryRequirements*                       pMemoryRequirements)
+void
+radv_GetImageMemoryRequirements2(VkDevice device, const VkImageMemoryRequirementsInfo2 *pInfo,
+                                 VkMemoryRequirements2 *pMemoryRequirements)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_image, image, _image);
+   radv_GetImageMemoryRequirements(device, pInfo->image, &pMemoryRequirements->memoryRequirements);
 
-	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
+   RADV_FROM_HANDLE(radv_image, image, pInfo->image);
 
-	pMemoryRequirements->size = image->size;
-	pMemoryRequirements->alignment = image->alignment;
+   vk_foreach_struct(ext, pMemoryRequirements->pNext)
+   {
+      switch (ext->sType) {
+      case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+         VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
+         req->requiresDedicatedAllocation =
+            image->shareable && image->tiling != VK_IMAGE_TILING_LINEAR;
+         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
+         break;
+      }
+      default:
+         break;
+      }
+   }
 }
 
-void radv_GetImageMemoryRequirements2(
-	VkDevice                                    device,
-	const VkImageMemoryRequirementsInfo2       *pInfo,
-	VkMemoryRequirements2                      *pMemoryRequirements)
+void
+radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory,
+                               VkDeviceSize *pCommittedMemoryInBytes)
 {
-	radv_GetImageMemoryRequirements(device, pInfo->image,
-                                        &pMemoryRequirements->memoryRequirements);
+   *pCommittedMemoryInBytes = 0;
+}
 
-	RADV_FROM_HANDLE(radv_image, image, pInfo->image);
+VkResult
+radv_BindBufferMemory2(VkDevice device, uint32_t bindInfoCount,
+                       const VkBindBufferMemoryInfo *pBindInfos)
+{
+   for (uint32_t i = 0; i < bindInfoCount; ++i) {
+      RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
+      RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
 
-	vk_foreach_struct(ext, pMemoryRequirements->pNext) {
-		switch (ext->sType) {
-		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
-			VkMemoryDedicatedRequirements *req =
-			               (VkMemoryDedicatedRequirements *) ext;
-			req->requiresDedicatedAllocation = image->shareable &&
-			                                   image->tiling != VK_IMAGE_TILING_LINEAR;
-			req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
-			break;
-		}
-		default:
-			break;
-		}
-	}
+      if (mem) {
+         buffer->bo = mem->bo;
+         buffer->offset = pBindInfos[i].memoryOffset;
+      } else {
+         buffer->bo = NULL;
+      }
+   }
+   return VK_SUCCESS;
 }
 
-void radv_GetDeviceMemoryCommitment(
-	VkDevice                                    device,
-	VkDeviceMemory                              memory,
-	VkDeviceSize*                               pCommittedMemoryInBytes)
+VkResult
+radv_BindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory memory,
+                      VkDeviceSize memoryOffset)
 {
-	*pCommittedMemoryInBytes = 0;
+   const VkBindBufferMemoryInfo info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
+                                        .buffer = buffer,
+                                        .memory = memory,
+                                        .memoryOffset = memoryOffset};
+
+   return radv_BindBufferMemory2(device, 1, &info);
 }
 
-VkResult radv_BindBufferMemory2(VkDevice device,
-                                uint32_t bindInfoCount,
-                                const VkBindBufferMemoryInfo *pBindInfos)
+VkResult
+radv_BindImageMemory2(VkDevice device, uint32_t bindInfoCount,
+                      const VkBindImageMemoryInfo *pBindInfos)
 {
-	for (uint32_t i = 0; i < bindInfoCount; ++i) {
-		RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
-		RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
+   for (uint32_t i = 0; i < bindInfoCount; ++i) {
+      RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
+      RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
 
-		if (mem) {
-			buffer->bo = mem->bo;
-			buffer->offset = pBindInfos[i].memoryOffset;
-		} else {
-			buffer->bo = NULL;
-		}
-	}
-	return VK_SUCCESS;
-}
-
-VkResult radv_BindBufferMemory(
-	VkDevice                                    device,
-	VkBuffer                                    buffer,
-	VkDeviceMemory                              memory,
-	VkDeviceSize                                memoryOffset)
-{
-	const VkBindBufferMemoryInfo info = {
-		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
-		.buffer = buffer,
-		.memory = memory,
-		.memoryOffset = memoryOffset
-	};
-
-	return radv_BindBufferMemory2(device, 1, &info);
-}
-
-VkResult radv_BindImageMemory2(VkDevice device,
-                               uint32_t bindInfoCount,
-                               const VkBindImageMemoryInfo *pBindInfos)
-{
-	for (uint32_t i = 0; i < bindInfoCount; ++i) {
-		RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
-		RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
-
-		if (mem) {
-			image->bo = mem->bo;
-			image->offset = pBindInfos[i].memoryOffset;
-		} else {
-			image->bo = NULL;
-			image->offset = 0;
-		}
-	}
-	return VK_SUCCESS;
-}
-
-
-VkResult radv_BindImageMemory(
-	VkDevice                                    device,
-	VkImage                                     image,
-	VkDeviceMemory                              memory,
-	VkDeviceSize                                memoryOffset)
-{
-	const VkBindImageMemoryInfo info = {
-		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
-		.image = image,
-		.memory = memory,
-		.memoryOffset = memoryOffset
-	};
-
-	return radv_BindImageMemory2(device, 1, &info);
-}
-
-static bool radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
-{
-	return info->bufferBindCount ||
-	       info->imageOpaqueBindCount ||
-	       info->imageBindCount ||
-	       info->waitSemaphoreCount ||
-	       info->signalSemaphoreCount;
-}
-
- VkResult radv_QueueBindSparse(
-	VkQueue                                     _queue,
-	uint32_t                                    bindInfoCount,
-	const VkBindSparseInfo*                     pBindInfo,
-	VkFence                                     fence)
-{
-	RADV_FROM_HANDLE(radv_queue, queue, _queue);
-	uint32_t fence_idx = 0;
-
-	if (radv_device_is_lost(queue->device))
-		return VK_ERROR_DEVICE_LOST;
+      if (mem) {
+         image->bo = mem->bo;
+         image->offset = pBindInfos[i].memoryOffset;
+      } else {
+         image->bo = NULL;
+         image->offset = 0;
+      }
+   }
+   return VK_SUCCESS;
+}
 
-	if (fence != VK_NULL_HANDLE) {
-		for (uint32_t i = 0; i < bindInfoCount; ++i)
-			if (radv_sparse_bind_has_effects(pBindInfo + i))
-				fence_idx = i;
-	} else
-		fence_idx = UINT32_MAX;
-
-	for (uint32_t i = 0; i < bindInfoCount; ++i) {
-		if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
-			continue;
-
-		const VkTimelineSemaphoreSubmitInfo *timeline_info =
-			vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
-
-		VkResult result = radv_queue_submit(queue, &(struct radv_queue_submission) {
-				.buffer_binds = pBindInfo[i].pBufferBinds,
-				.buffer_bind_count = pBindInfo[i].bufferBindCount,
-				.image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
-				.image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
-				.image_binds = pBindInfo[i].pImageBinds,
-				.image_bind_count = pBindInfo[i].imageBindCount,
-				.wait_semaphores = pBindInfo[i].pWaitSemaphores,
-				.wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
-				.signal_semaphores = pBindInfo[i].pSignalSemaphores,
-				.signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
-				.fence = i == fence_idx ? fence : VK_NULL_HANDLE,
-				.wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
-				.wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
-				.signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
-				.signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
-			});
-
-		if (result != VK_SUCCESS)
-			return result;
-	}
-
-	if (fence != VK_NULL_HANDLE && !bindInfoCount) {
-		VkResult result = radv_signal_fence(queue, fence);
-		if (result != VK_SUCCESS)
-			return result;
-	}
-
-	return VK_SUCCESS;
+VkResult
+radv_BindImageMemory(VkDevice device, VkImage image, VkDeviceMemory memory,
+                     VkDeviceSize memoryOffset)
+{
+   const VkBindImageMemoryInfo info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
+                                       .image = image,
+                                       .memory = memory,
+                                       .memoryOffset = memoryOffset};
+
+   return radv_BindImageMemory2(device, 1, &info);
+}
+
+static bool
+radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
+{
+   return info->bufferBindCount || info->imageOpaqueBindCount || info->imageBindCount ||
+          info->waitSemaphoreCount || info->signalSemaphoreCount;
+}
+
+VkResult
+radv_QueueBindSparse(VkQueue _queue, uint32_t bindInfoCount, const VkBindSparseInfo *pBindInfo,
+                     VkFence fence)
+{
+   RADV_FROM_HANDLE(radv_queue, queue, _queue);
+   uint32_t fence_idx = 0;
+
+   if (radv_device_is_lost(queue->device))
+      return VK_ERROR_DEVICE_LOST;
+
+   if (fence != VK_NULL_HANDLE) {
+      for (uint32_t i = 0; i < bindInfoCount; ++i)
+         if (radv_sparse_bind_has_effects(pBindInfo + i))
+            fence_idx = i;
+   } else
+      fence_idx = UINT32_MAX;
+
+   for (uint32_t i = 0; i < bindInfoCount; ++i) {
+      if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
+         continue;
+
+      const VkTimelineSemaphoreSubmitInfo *timeline_info =
+         vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
+
+      VkResult result = radv_queue_submit(
+         queue, &(struct radv_queue_submission){
+                   .buffer_binds = pBindInfo[i].pBufferBinds,
+                   .buffer_bind_count = pBindInfo[i].bufferBindCount,
+                   .image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
+                   .image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
+                   .image_binds = pBindInfo[i].pImageBinds,
+                   .image_bind_count = pBindInfo[i].imageBindCount,
+                   .wait_semaphores = pBindInfo[i].pWaitSemaphores,
+                   .wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
+                   .signal_semaphores = pBindInfo[i].pSignalSemaphores,
+                   .signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
+                   .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
+                   .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
+                   .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues
+                                          ? timeline_info->waitSemaphoreValueCount
+                                          : 0,
+                   .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
+                   .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues
+                                            ? timeline_info->signalSemaphoreValueCount
+                                            : 0,
+                });
+
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   if (fence != VK_NULL_HANDLE && !bindInfoCount) {
+      VkResult result = radv_signal_fence(queue, fence);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   return VK_SUCCESS;
 }
 
 static void
-radv_destroy_fence_part(struct radv_device *device,
-			struct radv_fence_part *part)
+radv_destroy_fence_part(struct radv_device *device, struct radv_fence_part *part)
 {
-	if (part->kind != RADV_FENCE_NONE)
-		device->ws->destroy_syncobj(device->ws, part->syncobj);
-	part->kind = RADV_FENCE_NONE;
+   if (part->kind != RADV_FENCE_NONE)
+      device->ws->destroy_syncobj(device->ws, part->syncobj);
+   part->kind = RADV_FENCE_NONE;
 }
 
 static void
-radv_destroy_fence(struct radv_device *device,
-		   const VkAllocationCallbacks *pAllocator,
-		   struct radv_fence *fence)
+radv_destroy_fence(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+                   struct radv_fence *fence)
 {
-	radv_destroy_fence_part(device, &fence->temporary);
-	radv_destroy_fence_part(device, &fence->permanent);
+   radv_destroy_fence_part(device, &fence->temporary);
+   radv_destroy_fence_part(device, &fence->permanent);
 
-	vk_object_base_finish(&fence->base);
-	vk_free2(&device->vk.alloc, pAllocator, fence);
+   vk_object_base_finish(&fence->base);
+   vk_free2(&device->vk.alloc, pAllocator, fence);
 }
 
-VkResult radv_CreateFence(
-	VkDevice                                    _device,
-	const VkFenceCreateInfo*                    pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkFence*                                    pFence)
+VkResult
+radv_CreateFence(VkDevice _device, const VkFenceCreateInfo *pCreateInfo,
+                 const VkAllocationCallbacks *pAllocator, VkFence *pFence)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	bool create_signaled = false;
-	struct radv_fence *fence;
-	int ret;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   bool create_signaled = false;
+   struct radv_fence *fence;
+   int ret;
 
-	fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
-			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (!fence)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
+                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!fence)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
+   vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
 
-	fence->permanent.kind = RADV_FENCE_SYNCOBJ;
+   fence->permanent.kind = RADV_FENCE_SYNCOBJ;
 
-	if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
-		create_signaled = true;
+   if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
+      create_signaled = true;
 
-	ret = device->ws->create_syncobj(device->ws, create_signaled,
-				         &fence->permanent.syncobj);
-	if (ret) {
-		radv_destroy_fence(device, pAllocator, fence);
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-	}
+   ret = device->ws->create_syncobj(device->ws, create_signaled, &fence->permanent.syncobj);
+   if (ret) {
+      radv_destroy_fence(device, pAllocator, fence);
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
 
-	*pFence = radv_fence_to_handle(fence);
+   *pFence = radv_fence_to_handle(fence);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-
-void radv_DestroyFence(
-	VkDevice                                    _device,
-	VkFence                                     _fence,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyFence(VkDevice _device, VkFence _fence, const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_fence, fence, _fence);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_fence, fence, _fence);
 
-	if (!fence)
-		return;
+   if (!fence)
+      return;
 
-	radv_destroy_fence(device, pAllocator, fence);
+   radv_destroy_fence(device, pAllocator, fence);
 }
 
-VkResult radv_WaitForFences(
-	VkDevice                                    _device,
-	uint32_t                                    fenceCount,
-	const VkFence*                              pFences,
-	VkBool32                                    waitAll,
-	uint64_t                                    timeout)
+VkResult
+radv_WaitForFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll,
+                   uint64_t timeout)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	uint32_t *handles;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   uint32_t *handles;
 
-	if (radv_device_is_lost(device))
-		return VK_ERROR_DEVICE_LOST;
+   if (radv_device_is_lost(device))
+      return VK_ERROR_DEVICE_LOST;
 
-	timeout = radv_get_absolute_timeout(timeout);
+   timeout = radv_get_absolute_timeout(timeout);
 
-	handles = malloc(sizeof(uint32_t) * fenceCount);
-	if (!handles)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   handles = malloc(sizeof(uint32_t) * fenceCount);
+   if (!handles)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	for (uint32_t i = 0; i < fenceCount; ++i) {
-		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+   for (uint32_t i = 0; i < fenceCount; ++i) {
+      RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
 
-		struct radv_fence_part *part =
-			fence->temporary.kind != RADV_FENCE_NONE ?
-			&fence->temporary : &fence->permanent;
+      struct radv_fence_part *part =
+         fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
 
-		assert(part->kind == RADV_FENCE_SYNCOBJ);
-		handles[i] = part->syncobj;
-	}
+      assert(part->kind == RADV_FENCE_SYNCOBJ);
+      handles[i] = part->syncobj;
+   }
 
-	bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
-	free(handles);
-	return success ? VK_SUCCESS : VK_TIMEOUT;
+   bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
+   free(handles);
+   return success ? VK_SUCCESS : VK_TIMEOUT;
 }
 
-VkResult radv_ResetFences(VkDevice _device,
-			  uint32_t fenceCount,
-			  const VkFence *pFences)
+VkResult
+radv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device, device, _device);
 
-	for (unsigned i = 0; i < fenceCount; ++i) {
-		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+   for (unsigned i = 0; i < fenceCount; ++i) {
+      RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
 
-		/* From the Vulkan 1.0.53 spec:
-		 *
-		 *    "If any member of pFences currently has its payload
-		 *    imported with temporary permanence, that fence’s prior
-		 *    permanent payload is irst restored. The remaining
-		 *    operations described therefore operate on the restored
-		 *    payload."
-		 */
-		if (fence->temporary.kind != RADV_FENCE_NONE)
-			radv_destroy_fence_part(device, &fence->temporary);
+      /* From the Vulkan 1.0.53 spec:
+       *
+       *    "If any member of pFences currently has its payload
+       *    imported with temporary permanence, that fence’s prior
+       *    permanent payload is irst restored. The remaining
+       *    operations described therefore operate on the restored
+       *    payload."
+       */
+      if (fence->temporary.kind != RADV_FENCE_NONE)
+         radv_destroy_fence_part(device, &fence->temporary);
 
-		device->ws->reset_syncobj(device->ws, fence->permanent.syncobj);
-	}
+      device->ws->reset_syncobj(device->ws, fence->permanent.syncobj);
+   }
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
+VkResult
+radv_GetFenceStatus(VkDevice _device, VkFence _fence)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_fence, fence, _fence);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_fence, fence, _fence);
 
-	struct radv_fence_part *part =
-		fence->temporary.kind != RADV_FENCE_NONE ?
-		&fence->temporary : &fence->permanent;
+   struct radv_fence_part *part =
+      fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
 
-	if (radv_device_is_lost(device))
-		return VK_ERROR_DEVICE_LOST;
+   if (radv_device_is_lost(device))
+      return VK_ERROR_DEVICE_LOST;
 
-	bool success = device->ws->wait_syncobj(device->ws,
-						&part->syncobj, 1, true, 0);
-	return success ? VK_SUCCESS : VK_NOT_READY;
+   bool success = device->ws->wait_syncobj(device->ws, &part->syncobj, 1, true, 0);
+   return success ? VK_SUCCESS : VK_NOT_READY;
 }
 
-
 // Queue semaphore functions
 
 static void
 radv_create_timeline(struct radv_timeline *timeline, uint64_t value)
 {
-	timeline->highest_signaled = value;
-	timeline->highest_submitted = value;
-	list_inithead(&timeline->points);
-	list_inithead(&timeline->free_points);
-	list_inithead(&timeline->waiters);
-	mtx_init(&timeline->mutex, mtx_plain);
+   timeline->highest_signaled = value;
+   timeline->highest_submitted = value;
+   list_inithead(&timeline->points);
+   list_inithead(&timeline->free_points);
+   list_inithead(&timeline->waiters);
+   mtx_init(&timeline->mutex, mtx_plain);
 }
 
 static void
-radv_destroy_timeline(struct radv_device *device,
-                      struct radv_timeline *timeline)
-{
-	list_for_each_entry_safe(struct radv_timeline_point, point,
-	                         &timeline->free_points, list) {
-		list_del(&point->list);
-		device->ws->destroy_syncobj(device->ws, point->syncobj);
-		free(point);
-	}
-	list_for_each_entry_safe(struct radv_timeline_point, point,
-	                         &timeline->points, list) {
-		list_del(&point->list);
-		device->ws->destroy_syncobj(device->ws, point->syncobj);
-		free(point);
-	}
-	mtx_destroy(&timeline->mutex);
+radv_destroy_timeline(struct radv_device *device, struct radv_timeline *timeline)
+{
+   list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->free_points, list)
+   {
+      list_del(&point->list);
+      device->ws->destroy_syncobj(device->ws, point->syncobj);
+      free(point);
+   }
+   list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)
+   {
+      list_del(&point->list);
+      device->ws->destroy_syncobj(device->ws, point->syncobj);
+      free(point);
+   }
+   mtx_destroy(&timeline->mutex);
 }
 
 static void
-radv_timeline_gc_locked(struct radv_device *device,
-                        struct radv_timeline *timeline)
+radv_timeline_gc_locked(struct radv_device *device, struct radv_timeline *timeline)
 {
-	list_for_each_entry_safe(struct radv_timeline_point, point,
-	                         &timeline->points, list) {
-		if (point->wait_count || point->value > timeline->highest_submitted)
-			return;
+   list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)
+   {
+      if (point->wait_count || point->value > timeline->highest_submitted)
+         return;
 
-		if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
-			timeline->highest_signaled = point->value;
-			list_del(&point->list);
-			list_add(&point->list, &timeline->free_points);
-		}
-	}
+      if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
+         timeline->highest_signaled = point->value;
+         list_del(&point->list);
+         list_add(&point->list, &timeline->free_points);
+      }
+   }
 }
 
 static struct radv_timeline_point *
-radv_timeline_find_point_at_least_locked(struct radv_device *device,
-                                         struct radv_timeline *timeline,
+radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
                                          uint64_t p)
 {
-	radv_timeline_gc_locked(device, timeline);
+   radv_timeline_gc_locked(device, timeline);
 
-	if (p <= timeline->highest_signaled)
-		return NULL;
+   if (p <= timeline->highest_signaled)
+      return NULL;
 
-	list_for_each_entry(struct radv_timeline_point, point,
-	                    &timeline->points, list) {
-		if (point->value >= p) {
-			++point->wait_count;
-			return point;
-		}
-	}
-	return NULL;
+   list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)
+   {
+      if (point->value >= p) {
+         ++point->wait_count;
+         return point;
+      }
+   }
+   return NULL;
 }
 
 static struct radv_timeline_point *
-radv_timeline_add_point_locked(struct radv_device *device,
-                               struct radv_timeline *timeline,
+radv_timeline_add_point_locked(struct radv_device *device, struct radv_timeline *timeline,
                                uint64_t p)
 {
-	radv_timeline_gc_locked(device, timeline);
+   radv_timeline_gc_locked(device, timeline);
 
-	struct radv_timeline_point *ret = NULL;
-	struct radv_timeline_point *prev = NULL;
-	int r;
+   struct radv_timeline_point *ret = NULL;
+   struct radv_timeline_point *prev = NULL;
+   int r;
 
-	if (p <= timeline->highest_signaled)
-		return NULL;
+   if (p <= timeline->highest_signaled)
+      return NULL;
 
-	list_for_each_entry(struct radv_timeline_point, point,
-	                    &timeline->points, list) {
-		if (point->value == p) {
-			return NULL;
-		}
+   list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)
+   {
+      if (point->value == p) {
+         return NULL;
+      }
 
-		if (point->value < p)
-			prev = point;
-	}
+      if (point->value < p)
+         prev = point;
+   }
 
-	if (list_is_empty(&timeline->free_points)) {
-		ret = malloc(sizeof(struct radv_timeline_point));
-		r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
-		if (r) {
-			free(ret);
-			return NULL;
-		}
-	} else {
-		ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
-		list_del(&ret->list);
+   if (list_is_empty(&timeline->free_points)) {
+      ret = malloc(sizeof(struct radv_timeline_point));
+      r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
+      if (r) {
+         free(ret);
+         return NULL;
+      }
+   } else {
+      ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
+      list_del(&ret->list);
 
-		device->ws->reset_syncobj(device->ws, ret->syncobj);
-	}
+      device->ws->reset_syncobj(device->ws, ret->syncobj);
+   }
 
-	ret->value = p;
-	ret->wait_count = 1;
+   ret->value = p;
+   ret->wait_count = 1;
 
-	if (prev) {
-		list_add(&ret->list, &prev->list);
-	} else {
-		list_addtail(&ret->list, &timeline->points);
-	}
-	return ret;
+   if (prev) {
+      list_add(&ret->list, &prev->list);
+   } else {
+      list_addtail(&ret->list, &timeline->points);
+   }
+   return ret;
 }
 
-
 static VkResult
-radv_timeline_wait(struct radv_device *device,
-                   struct radv_timeline *timeline,
-                   uint64_t value,
+radv_timeline_wait(struct radv_device *device, struct radv_timeline *timeline, uint64_t value,
                    uint64_t abs_timeout)
 {
-	mtx_lock(&timeline->mutex);
+   mtx_lock(&timeline->mutex);
 
-	while(timeline->highest_submitted < value) {
-		struct timespec abstime;
-		timespec_from_nsec(&abstime, abs_timeout);
+   while (timeline->highest_submitted < value) {
+      struct timespec abstime;
+      timespec_from_nsec(&abstime, abs_timeout);
 
-		u_cnd_monotonic_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
+      u_cnd_monotonic_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
 
-		if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {
-			mtx_unlock(&timeline->mutex);
-			return VK_TIMEOUT;
-		}
-	}
+      if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {
+         mtx_unlock(&timeline->mutex);
+         return VK_TIMEOUT;
+      }
+   }
 
-	struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(device, timeline, value);
-	mtx_unlock(&timeline->mutex);
-	if (!point)
-		return VK_SUCCESS;
+   struct radv_timeline_point *point =
+      radv_timeline_find_point_at_least_locked(device, timeline, value);
+   mtx_unlock(&timeline->mutex);
+   if (!point)
+      return VK_SUCCESS;
 
-	bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
+   bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
 
-	mtx_lock(&timeline->mutex);
-	point->wait_count--;
-	mtx_unlock(&timeline->mutex);
-	return success ? VK_SUCCESS : VK_TIMEOUT;
+   mtx_lock(&timeline->mutex);
+   point->wait_count--;
+   mtx_unlock(&timeline->mutex);
+   return success ? VK_SUCCESS : VK_TIMEOUT;
 }
 
 static void
 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
                                      struct list_head *processing_list)
 {
-	list_for_each_entry_safe(struct radv_timeline_waiter, waiter,
-	                         &timeline->waiters, list) {
-		if (waiter->value > timeline->highest_submitted)
-			continue;
+   list_for_each_entry_safe(struct radv_timeline_waiter, waiter, &timeline->waiters, list)
+   {
+      if (waiter->value > timeline->highest_submitted)
+         continue;
 
-		radv_queue_trigger_submission(waiter->submission, 1, processing_list);
-		list_del(&waiter->list);
-	}
+      radv_queue_trigger_submission(waiter->submission, 1, processing_list);
+      list_del(&waiter->list);
+   }
 }
 
-static
-void radv_destroy_semaphore_part(struct radv_device *device,
-                                 struct radv_semaphore_part *part)
-{
-	switch(part->kind) {
-	case RADV_SEMAPHORE_NONE:
-		break;
-	case RADV_SEMAPHORE_TIMELINE:
-		radv_destroy_timeline(device, &part->timeline);
-		break;
-	case RADV_SEMAPHORE_SYNCOBJ:
-	case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
-		device->ws->destroy_syncobj(device->ws, part->syncobj);
-		break;
-	}
-	part->kind = RADV_SEMAPHORE_NONE;
+static void
+radv_destroy_semaphore_part(struct radv_device *device, struct radv_semaphore_part *part)
+{
+   switch (part->kind) {
+   case RADV_SEMAPHORE_NONE:
+      break;
+   case RADV_SEMAPHORE_TIMELINE:
+      radv_destroy_timeline(device, &part->timeline);
+      break;
+   case RADV_SEMAPHORE_SYNCOBJ:
+   case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
+      device->ws->destroy_syncobj(device->ws, part->syncobj);
+      break;
+   }
+   part->kind = RADV_SEMAPHORE_NONE;
 }
 
 static VkSemaphoreTypeKHR
 radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)
 {
-	const VkSemaphoreTypeCreateInfo *type_info =
-		vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
+   const VkSemaphoreTypeCreateInfo *type_info =
+      vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
 
-	if (!type_info)
-		return VK_SEMAPHORE_TYPE_BINARY;
+   if (!type_info)
+      return VK_SEMAPHORE_TYPE_BINARY;
 
-	if (initial_value)
-		*initial_value = type_info->initialValue;
-	return type_info->semaphoreType;
+   if (initial_value)
+      *initial_value = type_info->initialValue;
+   return type_info->semaphoreType;
 }
 
 static void
-radv_destroy_semaphore(struct radv_device *device,
-		       const VkAllocationCallbacks *pAllocator,
-		       struct radv_semaphore *sem)
-{
-	radv_destroy_semaphore_part(device, &sem->temporary);
-	radv_destroy_semaphore_part(device, &sem->permanent);
-	vk_object_base_finish(&sem->base);
-	vk_free2(&device->vk.alloc, pAllocator, sem);
-}
-
-VkResult radv_CreateSemaphore(
-	VkDevice                                    _device,
-	const VkSemaphoreCreateInfo*                pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkSemaphore*                                pSemaphore)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	uint64_t initial_value = 0;
-	VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
-
-	struct radv_semaphore *sem = vk_alloc2(&device->vk.alloc, pAllocator,
-					       sizeof(*sem), 8,
-					       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (!sem)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &sem->base,
-			    VK_OBJECT_TYPE_SEMAPHORE);
-
-	sem->temporary.kind = RADV_SEMAPHORE_NONE;
-	sem->permanent.kind = RADV_SEMAPHORE_NONE;
-
-	if (type == VK_SEMAPHORE_TYPE_TIMELINE &&
-	    device->physical_device->rad_info.has_timeline_syncobj) {
-		int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
-		if (ret) {
-			radv_destroy_semaphore(device, pAllocator, sem);
-			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-		}
-		device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);
-		sem->permanent.timeline_syncobj.max_point = initial_value;
-		sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
-	} else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
-		radv_create_timeline(&sem->permanent.timeline, initial_value);
-		sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
-	} else {
-		int ret = device->ws->create_syncobj(device->ws, false,
-						     &sem->permanent.syncobj);
-		if (ret) {
-			radv_destroy_semaphore(device, pAllocator, sem);
-			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-		}
-		sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
-	}
-
-	*pSemaphore = radv_semaphore_to_handle(sem);
-	return VK_SUCCESS;
-}
-
-void radv_DestroySemaphore(
-	VkDevice                                    _device,
-	VkSemaphore                                 _semaphore,
-	const VkAllocationCallbacks*                pAllocator)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
-	if (!_semaphore)
-		return;
-
-	radv_destroy_semaphore(device, pAllocator, sem);
+radv_destroy_semaphore(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+                       struct radv_semaphore *sem)
+{
+   radv_destroy_semaphore_part(device, &sem->temporary);
+   radv_destroy_semaphore_part(device, &sem->permanent);
+   vk_object_base_finish(&sem->base);
+   vk_free2(&device->vk.alloc, pAllocator, sem);
 }
 
 VkResult
-radv_GetSemaphoreCounterValue(VkDevice _device,
-			      VkSemaphore _semaphore,
-			      uint64_t* pValue)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
-
-	if (radv_device_is_lost(device))
-		return VK_ERROR_DEVICE_LOST;
-
-	struct radv_semaphore_part *part =
-		semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
-
-	switch (part->kind) {
-	case RADV_SEMAPHORE_TIMELINE: {
-		mtx_lock(&part->timeline.mutex);
-		radv_timeline_gc_locked(device, &part->timeline);
-		*pValue = part->timeline.highest_signaled;
-		mtx_unlock(&part->timeline.mutex);
-		return VK_SUCCESS;
-	}
-	case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
-		return device->ws->query_syncobj(device->ws, part->syncobj, pValue);
-	}
-	case RADV_SEMAPHORE_NONE:
-	case RADV_SEMAPHORE_SYNCOBJ:
-		unreachable("Invalid semaphore type");
-	}
-	unreachable("Unhandled semaphore type");
+radv_CreateSemaphore(VkDevice _device, const VkSemaphoreCreateInfo *pCreateInfo,
+                     const VkAllocationCallbacks *pAllocator, VkSemaphore *pSemaphore)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   uint64_t initial_value = 0;
+   VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
+
+   struct radv_semaphore *sem =
+      vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!sem)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &sem->base, VK_OBJECT_TYPE_SEMAPHORE);
+
+   sem->temporary.kind = RADV_SEMAPHORE_NONE;
+   sem->permanent.kind = RADV_SEMAPHORE_NONE;
+
+   if (type == VK_SEMAPHORE_TYPE_TIMELINE &&
+       device->physical_device->rad_info.has_timeline_syncobj) {
+      int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
+      if (ret) {
+         radv_destroy_semaphore(device, pAllocator, sem);
+         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+      device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);
+      sem->permanent.timeline_syncobj.max_point = initial_value;
+      sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
+   } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
+      radv_create_timeline(&sem->permanent.timeline, initial_value);
+      sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
+   } else {
+      int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
+      if (ret) {
+         radv_destroy_semaphore(device, pAllocator, sem);
+         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+      sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
+   }
+
+   *pSemaphore = radv_semaphore_to_handle(sem);
+   return VK_SUCCESS;
+}
+
+void
+radv_DestroySemaphore(VkDevice _device, VkSemaphore _semaphore,
+                      const VkAllocationCallbacks *pAllocator)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
+   if (!_semaphore)
+      return;
+
+   radv_destroy_semaphore(device, pAllocator, sem);
 }
 
+VkResult
+radv_GetSemaphoreCounterValue(VkDevice _device, VkSemaphore _semaphore, uint64_t *pValue)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
+
+   if (radv_device_is_lost(device))
+      return VK_ERROR_DEVICE_LOST;
+
+   struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
+                                         ? &semaphore->temporary
+                                         : &semaphore->permanent;
+
+   switch (part->kind) {
+   case RADV_SEMAPHORE_TIMELINE: {
+      mtx_lock(&part->timeline.mutex);
+      radv_timeline_gc_locked(device, &part->timeline);
+      *pValue = part->timeline.highest_signaled;
+      mtx_unlock(&part->timeline.mutex);
+      return VK_SUCCESS;
+   }
+   case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
+      return device->ws->query_syncobj(device->ws, part->syncobj, pValue);
+   }
+   case RADV_SEMAPHORE_NONE:
+   case RADV_SEMAPHORE_SYNCOBJ:
+      unreachable("Invalid semaphore type");
+   }
+   unreachable("Unhandled semaphore type");
+}
 
 static VkResult
-radv_wait_timelines(struct radv_device *device,
-                    const VkSemaphoreWaitInfo* pWaitInfo,
+radv_wait_timelines(struct radv_device *device, const VkSemaphoreWaitInfo *pWaitInfo,
                     uint64_t abs_timeout)
 {
-	if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
-		for (;;) {
-			for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
-				RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
-				VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
-
-				if (result == VK_SUCCESS)
-					return VK_SUCCESS;
-			}
-			if (radv_get_current_time() > abs_timeout)
-				return VK_TIMEOUT;
-		}
-	}
-
-	for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
-		RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
-		VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], abs_timeout);
-
-		if (result != VK_SUCCESS)
-			return result;
-	}
-	return VK_SUCCESS;
+   if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
+      for (;;) {
+         for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
+            RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+            VkResult result =
+               radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
+
+            if (result == VK_SUCCESS)
+               return VK_SUCCESS;
+         }
+         if (radv_get_current_time() > abs_timeout)
+            return VK_TIMEOUT;
+      }
+   }
+
+   for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
+      RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+      VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline,
+                                           pWaitInfo->pValues[i], abs_timeout);
+
+      if (result != VK_SUCCESS)
+         return result;
+   }
+   return VK_SUCCESS;
 }
 VkResult
-radv_WaitSemaphores(VkDevice _device,
-		    const VkSemaphoreWaitInfo* pWaitInfo,
-		    uint64_t timeout)
+radv_WaitSemaphores(VkDevice _device, const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device, device, _device);
 
-	if (radv_device_is_lost(device))
-		return VK_ERROR_DEVICE_LOST;
+   if (radv_device_is_lost(device))
+      return VK_ERROR_DEVICE_LOST;
 
-	uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
+   uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
 
-	if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind == RADV_SEMAPHORE_TIMELINE)
-		return radv_wait_timelines(device, pWaitInfo, abs_timeout);
+   if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind ==
+       RADV_SEMAPHORE_TIMELINE)
+      return radv_wait_timelines(device, pWaitInfo, abs_timeout);
 
-	if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))
-		return vk_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "semaphoreCount integer overflow");
+   if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))
+      return vk_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
+                       "semaphoreCount integer overflow");
 
-	bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);
-	uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);
-	if (!handles)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);
+   uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);
+   if (!handles)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
-		RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
-		handles[i] = semaphore->permanent.syncobj;
-	}
+   for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
+      RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+      handles[i] = semaphore->permanent.syncobj;
+   }
 
-	bool success = device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,
-	                                                 pWaitInfo->semaphoreCount, wait_all, false,
-	                                                 abs_timeout);
-	free(handles);
-	return success ? VK_SUCCESS : VK_TIMEOUT;
+   bool success =
+      device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,
+                                        pWaitInfo->semaphoreCount, wait_all, false, abs_timeout);
+   free(handles);
+   return success ? VK_SUCCESS : VK_TIMEOUT;
 }
 
 VkResult
-radv_SignalSemaphore(VkDevice _device,
-                     const VkSemaphoreSignalInfo* pSignalInfo)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
-
-	struct radv_semaphore_part *part =
-		semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
-
-	switch(part->kind) {
-	case RADV_SEMAPHORE_TIMELINE: {
-		mtx_lock(&part->timeline.mutex);
-		radv_timeline_gc_locked(device, &part->timeline);
-		part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
-		part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
-
-		struct list_head processing_list;
-		list_inithead(&processing_list);
-		radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
-		mtx_unlock(&part->timeline.mutex);
-
-		VkResult result = radv_process_submissions(&processing_list);
-
-		/* This needs to happen after radv_process_submissions, so
-		 * that any submitted submissions that are now unblocked get
-		 * processed before we wake the application. This way we
-		 * ensure that any binary semaphores that are now unblocked
-		 * are usable by the application. */
-		u_cnd_monotonic_broadcast(&device->timeline_cond);
-
-		return result;
-	}
-	case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
-		part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);
-		device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);
-		break;
-	}
-	case RADV_SEMAPHORE_NONE:
-	case RADV_SEMAPHORE_SYNCOBJ:
-		unreachable("Invalid semaphore type");
-	}
-	return VK_SUCCESS;
+radv_SignalSemaphore(VkDevice _device, const VkSemaphoreSignalInfo *pSignalInfo)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
+
+   struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
+                                         ? &semaphore->temporary
+                                         : &semaphore->permanent;
+
+   switch (part->kind) {
+   case RADV_SEMAPHORE_TIMELINE: {
+      mtx_lock(&part->timeline.mutex);
+      radv_timeline_gc_locked(device, &part->timeline);
+      part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
+      part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
+
+      struct list_head processing_list;
+      list_inithead(&processing_list);
+      radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
+      mtx_unlock(&part->timeline.mutex);
+
+      VkResult result = radv_process_submissions(&processing_list);
+
+      /* This needs to happen after radv_process_submissions, so
+       * that any submitted submissions that are now unblocked get
+       * processed before we wake the application. This way we
+       * ensure that any binary semaphores that are now unblocked
+       * are usable by the application. */
+      u_cnd_monotonic_broadcast(&device->timeline_cond);
+
+      return result;
+   }
+   case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
+      part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);
+      device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);
+      break;
+   }
+   case RADV_SEMAPHORE_NONE:
+   case RADV_SEMAPHORE_SYNCOBJ:
+      unreachable("Invalid semaphore type");
+   }
+   return VK_SUCCESS;
 }
 
-static void radv_destroy_event(struct radv_device *device,
-                               const VkAllocationCallbacks* pAllocator,
-                               struct radv_event *event)
+static void
+radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+                   struct radv_event *event)
 {
-	if (event->bo)
-		device->ws->buffer_destroy(device->ws, event->bo);
+   if (event->bo)
+      device->ws->buffer_destroy(device->ws, event->bo);
 
-	vk_object_base_finish(&event->base);
-	vk_free2(&device->vk.alloc, pAllocator, event);
+   vk_object_base_finish(&event->base);
+   vk_free2(&device->vk.alloc, pAllocator, event);
 }
 
-VkResult radv_CreateEvent(
-	VkDevice                                    _device,
-	const VkEventCreateInfo*                    pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkEvent*                                    pEvent)
+VkResult
+radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,
+                 const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator,
-					       sizeof(*event), 8,
-					       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
+                                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 
-	if (!event)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   if (!event)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
+   vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
 
-	event->bo = device->ws->buffer_create(device->ws, 8, 8,
-					      RADEON_DOMAIN_GTT,
-					      RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
-					      RADV_BO_PRIORITY_FENCE);
-	if (!event->bo) {
-		radv_destroy_event(device, pAllocator, event);
-		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-	}
+   event->bo = device->ws->buffer_create(
+      device->ws, 8, 8, RADEON_DOMAIN_GTT,
+      RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
+      RADV_BO_PRIORITY_FENCE);
+   if (!event->bo) {
+      radv_destroy_event(device, pAllocator, event);
+      return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   }
 
-	event->map = (uint64_t*)device->ws->buffer_map(event->bo);
-	if (!event->map) {
-		radv_destroy_event(device, pAllocator, event);
-		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-	}
+   event->map = (uint64_t *)device->ws->buffer_map(event->bo);
+   if (!event->map) {
+      radv_destroy_event(device, pAllocator, event);
+      return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   }
 
-	*pEvent = radv_event_to_handle(event);
+   *pEvent = radv_event_to_handle(event);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-void radv_DestroyEvent(
-	VkDevice                                    _device,
-	VkEvent                                     _event,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyEvent(VkDevice _device, VkEvent _event, const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_event, event, _event);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_event, event, _event);
 
-	if (!event)
-		return;
+   if (!event)
+      return;
 
-	radv_destroy_event(device, pAllocator, event);
+   radv_destroy_event(device, pAllocator, event);
 }
 
-VkResult radv_GetEventStatus(
-	VkDevice                                    _device,
-	VkEvent                                     _event)
+VkResult
+radv_GetEventStatus(VkDevice _device, VkEvent _event)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_event, event, _event);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_event, event, _event);
 
-	if (radv_device_is_lost(device))
-		return VK_ERROR_DEVICE_LOST;
+   if (radv_device_is_lost(device))
+      return VK_ERROR_DEVICE_LOST;
 
-	if (*event->map == 1)
-		return VK_EVENT_SET;
-	return VK_EVENT_RESET;
+   if (*event->map == 1)
+      return VK_EVENT_SET;
+   return VK_EVENT_RESET;
 }
 
-VkResult radv_SetEvent(
-	VkDevice                                    _device,
-	VkEvent                                     _event)
+VkResult
+radv_SetEvent(VkDevice _device, VkEvent _event)
 {
-	RADV_FROM_HANDLE(radv_event, event, _event);
-	*event->map = 1;
+   RADV_FROM_HANDLE(radv_event, event, _event);
+   *event->map = 1;
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-VkResult radv_ResetEvent(
-    VkDevice                                    _device,
-    VkEvent                                     _event)
+VkResult
+radv_ResetEvent(VkDevice _device, VkEvent _event)
 {
-	RADV_FROM_HANDLE(radv_event, event, _event);
-	*event->map = 0;
+   RADV_FROM_HANDLE(radv_event, event, _event);
+   *event->map = 0;
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
 static void
-radv_destroy_buffer(struct radv_device *device,
-		    const VkAllocationCallbacks *pAllocator,
-		    struct radv_buffer *buffer)
+radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+                    struct radv_buffer *buffer)
 {
-	if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
-		device->ws->buffer_destroy(device->ws, buffer->bo);
+   if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
+      device->ws->buffer_destroy(device->ws, buffer->bo);
 
-	vk_object_base_finish(&buffer->base);
-	vk_free2(&device->vk.alloc, pAllocator, buffer);
+   vk_object_base_finish(&buffer->base);
+   vk_free2(&device->vk.alloc, pAllocator, buffer);
 }
 
-VkResult radv_CreateBuffer(
-	VkDevice                                    _device,
-	const VkBufferCreateInfo*                   pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkBuffer*                                   pBuffer)
+VkResult
+radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,
+                  const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_buffer *buffer;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_buffer *buffer;
 
-	if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
-		return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+   if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
+      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
 
-	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
 
-	buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
-			     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (buffer == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
+                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (buffer == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
+   vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
 
-	buffer->size = pCreateInfo->size;
-	buffer->usage = pCreateInfo->usage;
-	buffer->bo = NULL;
-	buffer->offset = 0;
-	buffer->flags = pCreateInfo->flags;
+   buffer->size = pCreateInfo->size;
+   buffer->usage = pCreateInfo->usage;
+   buffer->bo = NULL;
+   buffer->offset = 0;
+   buffer->flags = pCreateInfo->flags;
 
-	buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
-						 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
+   buffer->shareable =
+      vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
 
-	if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
-		buffer->bo = device->ws->buffer_create(device->ws,
-		                                       align64(buffer->size, 4096),
-		                                       4096, 0, RADEON_FLAG_VIRTUAL,
-		                                       RADV_BO_PRIORITY_VIRTUAL);
-		if (!buffer->bo) {
-			radv_destroy_buffer(device, pAllocator, buffer);
-			return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-		}
-	}
+   if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
+      buffer->bo = device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0,
+                                             RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
+      if (!buffer->bo) {
+         radv_destroy_buffer(device, pAllocator, buffer);
+         return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+      }
+   }
 
-	*pBuffer = radv_buffer_to_handle(buffer);
+   *pBuffer = radv_buffer_to_handle(buffer);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-void radv_DestroyBuffer(
-	VkDevice                                    _device,
-	VkBuffer                                    _buffer,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
 
-	if (!buffer)
-		return;
+   if (!buffer)
+      return;
 
-	radv_destroy_buffer(device, pAllocator, buffer);
+   radv_destroy_buffer(device, pAllocator, buffer);
 }
 
-VkDeviceAddress radv_GetBufferDeviceAddress(
-	VkDevice                                    device,
-	const VkBufferDeviceAddressInfo*         pInfo)
+VkDeviceAddress
+radv_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
 {
-	RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
-	return radv_buffer_get_va(buffer->bo) + buffer->offset;
+   RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
+   return radv_buffer_get_va(buffer->bo) + buffer->offset;
 }
 
-
-uint64_t radv_GetBufferOpaqueCaptureAddress(VkDevice device,
-					    const VkBufferDeviceAddressInfo* pInfo)
+uint64_t
+radv_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
 {
-	return 0;
+   return 0;
 }
 
-uint64_t radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
-						  const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
+uint64_t
+radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
+                                         const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
 {
-	return 0;
+   return 0;
 }
 
 static inline unsigned
 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
 {
-	if (stencil)
-		return plane->surface.u.legacy.stencil_tiling_index[level];
-	else
-		return plane->surface.u.legacy.tiling_index[level];
+   if (stencil)
+      return plane->surface.u.legacy.stencil_tiling_index[level];
+   else
+      return plane->surface.u.legacy.tiling_index[level];
 }
 
-static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
+static uint32_t
+radv_surface_max_layer_count(struct radv_image_view *iview)
 {
-	return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
+   return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth
+                                               : (iview->base_layer + iview->layer_count);
 }
 
 static unsigned
 get_dcc_max_uncompressed_block_size(const struct radv_device *device,
                                     const struct radv_image_view *iview)
 {
-	if (device->physical_device->rad_info.chip_class < GFX10 &&
-	    iview->image->info.samples > 1) {
-		if (iview->image->planes[0].surface.bpe == 1)
-			return V_028C78_MAX_BLOCK_SIZE_64B;
-		else if (iview->image->planes[0].surface.bpe == 2)
-			return V_028C78_MAX_BLOCK_SIZE_128B;
-	}
+   if (device->physical_device->rad_info.chip_class < GFX10 && iview->image->info.samples > 1) {
+      if (iview->image->planes[0].surface.bpe == 1)
+         return V_028C78_MAX_BLOCK_SIZE_64B;
+      else if (iview->image->planes[0].surface.bpe == 2)
+         return V_028C78_MAX_BLOCK_SIZE_128B;
+   }
 
-	return V_028C78_MAX_BLOCK_SIZE_256B;
+   return V_028C78_MAX_BLOCK_SIZE_256B;
 }
 
 static unsigned
 get_dcc_min_compressed_block_size(const struct radv_device *device)
 {
-	if (!device->physical_device->rad_info.has_dedicated_vram) {
-		/* amdvlk: [min-compressed-block-size] should be set to 32 for
-		 * dGPU and 64 for APU because all of our APUs to date use
-		 * DIMMs which have a request granularity size of 64B while all
-		 * other chips have a 32B request size.
-		 */
-		return V_028C78_MIN_BLOCK_SIZE_64B;
-	}
+   if (!device->physical_device->rad_info.has_dedicated_vram) {
+      /* amdvlk: [min-compressed-block-size] should be set to 32 for
+       * dGPU and 64 for APU because all of our APUs to date use
+       * DIMMs which have a request granularity size of 64B while all
+       * other chips have a 32B request size.
+       */
+      return V_028C78_MIN_BLOCK_SIZE_64B;
+   }
 
-	return V_028C78_MIN_BLOCK_SIZE_32B;
+   return V_028C78_MIN_BLOCK_SIZE_32B;
 }
 
 static uint32_t
-radv_init_dcc_control_reg(struct radv_device *device,
-			  struct radv_image_view *iview)
-{
-	unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview);
-	unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
-	unsigned max_compressed_block_size;
-	unsigned independent_128b_blocks;
-	unsigned independent_64b_blocks;
-
-	if (!radv_dcc_enabled(iview->image, iview->base_mip))
-		return 0;
-
-	/* For GFX9+ ac_surface computes values for us (except min_compressed
-	 * and max_uncompressed) */
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.dcc.max_compressed_block_size;
-		independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.dcc.independent_128B_blocks;
-		independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.dcc.independent_64B_blocks;
-	} else {
-		independent_128b_blocks = 0;
-
-		if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
-		                           VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
-		                           VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
-			/* If this DCC image is potentially going to be used in texture
-			 * fetches, we need some special settings.
-			 */
-			independent_64b_blocks = 1;
-			max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
-		} else {
-			/* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
-			 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
-			 * big as possible for better compression state.
-			 */
-			independent_64b_blocks = 0;
-			max_compressed_block_size = max_uncompressed_block_size;
-		}
-	}
-
-	return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
-	       S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
-	       S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
-	       S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
-	       S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
+radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
+{
+   unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview);
+   unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
+   unsigned max_compressed_block_size;
+   unsigned independent_128b_blocks;
+   unsigned independent_64b_blocks;
+
+   if (!radv_dcc_enabled(iview->image, iview->base_mip))
+      return 0;
+
+   /* For GFX9+ ac_surface computes values for us (except min_compressed
+    * and max_uncompressed) */
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      max_compressed_block_size =
+         iview->image->planes[0].surface.u.gfx9.dcc.max_compressed_block_size;
+      independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.dcc.independent_128B_blocks;
+      independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.dcc.independent_64B_blocks;
+   } else {
+      independent_128b_blocks = 0;
+
+      if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+                                 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
+         /* If this DCC image is potentially going to be used in texture
+          * fetches, we need some special settings.
+          */
+         independent_64b_blocks = 1;
+         max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
+      } else {
+         /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
+          * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
+          * big as possible for better compression state.
+          */
+         independent_64b_blocks = 0;
+         max_compressed_block_size = max_uncompressed_block_size;
+      }
+   }
+
+   return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
+          S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
+          S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
+          S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
+          S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
 }
 
 void
-radv_initialise_color_surface(struct radv_device *device,
-			      struct radv_color_buffer_info *cb,
-			      struct radv_image_view *iview)
-{
-	const struct util_format_description *desc;
-	unsigned ntype, format, swap, endian;
-	unsigned blend_clamp = 0, blend_bypass = 0;
-	uint64_t va;
-	const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
-	const struct radeon_surf *surf = &plane->surface;
-
-	desc = vk_format_description(iview->vk_format);
-
-	memset(cb, 0, sizeof(*cb));
-
-	/* Intensity is implemented as Red, so treat it that way. */
-	cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1);
-
-	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
-
-	cb->cb_color_base = va >> 8;
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		if (device->physical_device->rad_info.chip_class >= GFX10) {
-			cb->cb_color_attrib3 |=	S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
-				S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
-				S_028EE0_CMASK_PIPE_ALIGNED(1) |
-				S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
-		} else {
-			struct gfx9_surf_meta_flags meta = {
-				.rb_aligned = 1,
-				.pipe_aligned = 1,
-			};
-
-			if (surf->dcc_offset)
-				meta = surf->u.gfx9.dcc;
-
-			cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
-				S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
-				S_028C74_RB_ALIGNED(meta.rb_aligned) |
-				S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
-			cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
-		}
-
-		cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
-		cb->cb_color_base |= surf->tile_swizzle;
-	} else {
-		const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
-		unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
-
-		cb->cb_color_base += level_info->offset >> 8;
-		if (level_info->mode == RADEON_SURF_MODE_2D)
-			cb->cb_color_base |= surf->tile_swizzle;
-
-		pitch_tile_max = level_info->nblk_x / 8 - 1;
-		slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
-		tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
-
-		cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
-		cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
-		cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
-
-		cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
-
-		if (radv_image_has_fmask(iview->image)) {
-			if (device->physical_device->rad_info.chip_class >= GFX7)
-				cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
-			cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
-			cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
-		} else {
-			/* This must be set for fast clear to work without FMASK. */
-			if (device->physical_device->rad_info.chip_class >= GFX7)
-				cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
-			cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
-			cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
-		}
-	}
-
-	/* CMASK variables */
-	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
-	va += surf->cmask_offset;
-	cb->cb_color_cmask = va >> 8;
-
-	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
-	va += surf->dcc_offset;
-
-	if (radv_dcc_enabled(iview->image, iview->base_mip) &&
-	    device->physical_device->rad_info.chip_class <= GFX8)
-		va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
-
-	unsigned dcc_tile_swizzle = surf->tile_swizzle;
-	dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
-
-	cb->cb_dcc_base = va >> 8;
-	cb->cb_dcc_base |= dcc_tile_swizzle;
-
-	/* GFX10 field has the same base shift as the GFX6 field. */
-	uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
-	cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
-		S_028C6C_SLICE_MAX_GFX10(max_slice);
-
-	if (iview->image->info.samples > 1) {
-		unsigned log_samples = util_logbase2(iview->image->info.samples);
-
-		cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
-			S_028C74_NUM_FRAGMENTS(log_samples);
-	}
-
-	if (radv_image_has_fmask(iview->image)) {
-		va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->fmask_offset;
-		cb->cb_color_fmask = va >> 8;
-		cb->cb_color_fmask |= surf->fmask_tile_swizzle;
-	} else {
-		cb->cb_color_fmask = cb->cb_color_base;
-	}
-
-	ntype = radv_translate_color_numformat(iview->vk_format,
-					       desc,
-					       vk_format_get_first_non_void_channel(iview->vk_format));
-	format = radv_translate_colorformat(iview->vk_format);
-	if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
-		radv_finishme("Illegal color\n");
-	swap = radv_translate_colorswap(iview->vk_format, false);
-	endian = radv_colorformat_endian_swap(format);
-
-	/* blend clamp should be set for all NORM/SRGB types */
-	if (ntype == V_028C70_NUMBER_UNORM ||
-	    ntype == V_028C70_NUMBER_SNORM ||
-	    ntype == V_028C70_NUMBER_SRGB)
-		blend_clamp = 1;
-
-	/* set blend bypass according to docs if SINT/UINT or
-	   8/24 COLOR variants */
-	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
-	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
-	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
-		blend_clamp = 0;
-		blend_bypass = 1;
-	}
+radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
+                              struct radv_image_view *iview)
+{
+   const struct util_format_description *desc;
+   unsigned ntype, format, swap, endian;
+   unsigned blend_clamp = 0, blend_bypass = 0;
+   uint64_t va;
+   const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
+   const struct radeon_surf *surf = &plane->surface;
+
+   desc = vk_format_description(iview->vk_format);
+
+   memset(cb, 0, sizeof(*cb));
+
+   /* Intensity is implemented as Red, so treat it that way. */
+   cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1);
+
+   va = radv_buffer_get_va(iview->bo) + iview->image->offset;
+
+   cb->cb_color_base = va >> 8;
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      if (device->physical_device->rad_info.chip_class >= GFX10) {
+         cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
+                                 S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
+                                 S_028EE0_CMASK_PIPE_ALIGNED(1) |
+                                 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
+      } else {
+         struct gfx9_surf_meta_flags meta = {
+            .rb_aligned = 1,
+            .pipe_aligned = 1,
+         };
+
+         if (surf->dcc_offset)
+            meta = surf->u.gfx9.dcc;
+
+         cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
+                                S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
+                                S_028C74_RB_ALIGNED(meta.rb_aligned) |
+                                S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
+         cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
+      }
+
+      cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
+      cb->cb_color_base |= surf->tile_swizzle;
+   } else {
+      const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
+      unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
+
+      cb->cb_color_base += level_info->offset >> 8;
+      if (level_info->mode == RADEON_SURF_MODE_2D)
+         cb->cb_color_base |= surf->tile_swizzle;
+
+      pitch_tile_max = level_info->nblk_x / 8 - 1;
+      slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
+      tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
+
+      cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
+      cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
+      cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
+
+      cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
+
+      if (radv_image_has_fmask(iview->image)) {
+         if (device->physical_device->rad_info.chip_class >= GFX7)
+            cb->cb_color_pitch |=
+               S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
+         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
+         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
+      } else {
+         /* This must be set for fast clear to work without FMASK. */
+         if (device->physical_device->rad_info.chip_class >= GFX7)
+            cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
+         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
+         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
+      }
+   }
+
+   /* CMASK variables */
+   va = radv_buffer_get_va(iview->bo) + iview->image->offset;
+   va += surf->cmask_offset;
+   cb->cb_color_cmask = va >> 8;
+
+   va = radv_buffer_get_va(iview->bo) + iview->image->offset;
+   va += surf->dcc_offset;
+
+   if (radv_dcc_enabled(iview->image, iview->base_mip) &&
+       device->physical_device->rad_info.chip_class <= GFX8)
+      va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
+
+   unsigned dcc_tile_swizzle = surf->tile_swizzle;
+   dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
+
+   cb->cb_dcc_base = va >> 8;
+   cb->cb_dcc_base |= dcc_tile_swizzle;
+
+   /* GFX10 field has the same base shift as the GFX6 field. */
+   uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
+   cb->cb_color_view =
+      S_028C6C_SLICE_START(iview->base_layer) | S_028C6C_SLICE_MAX_GFX10(max_slice);
+
+   if (iview->image->info.samples > 1) {
+      unsigned log_samples = util_logbase2(iview->image->info.samples);
+
+      cb->cb_color_attrib |=
+         S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_samples);
+   }
+
+   if (radv_image_has_fmask(iview->image)) {
+      va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->fmask_offset;
+      cb->cb_color_fmask = va >> 8;
+      cb->cb_color_fmask |= surf->fmask_tile_swizzle;
+   } else {
+      cb->cb_color_fmask = cb->cb_color_base;
+   }
+
+   ntype = radv_translate_color_numformat(iview->vk_format, desc,
+                                          vk_format_get_first_non_void_channel(iview->vk_format));
+   format = radv_translate_colorformat(iview->vk_format);
+   if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
+      radv_finishme("Illegal color\n");
+   swap = radv_translate_colorswap(iview->vk_format, false);
+   endian = radv_colorformat_endian_swap(format);
+
+   /* blend clamp should be set for all NORM/SRGB types */
+   if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
+       ntype == V_028C70_NUMBER_SRGB)
+      blend_clamp = 1;
+
+   /* set blend bypass according to docs if SINT/UINT or
+      8/24 COLOR variants */
+   if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
+       format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
+       format == V_028C70_COLOR_X24_8_32_FLOAT) {
+      blend_clamp = 0;
+      blend_bypass = 1;
+   }
 #if 0
 	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
 	    (format == V_028C70_COLOR_8 ||
@@ -6744,1268 +6417,1239 @@ radv_initialise_color_surface(struct radv_device *device,
 	     format == V_028C70_COLOR_8_8_8_8))
 		->color_is_int8 = true;
 #endif
-	cb->cb_color_info = S_028C70_FORMAT(format) |
-		S_028C70_COMP_SWAP(swap) |
-		S_028C70_BLEND_CLAMP(blend_clamp) |
-		S_028C70_BLEND_BYPASS(blend_bypass) |
-		S_028C70_SIMPLE_FLOAT(1) |
-		S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
-				    ntype != V_028C70_NUMBER_SNORM &&
-				    ntype != V_028C70_NUMBER_SRGB &&
-				    format != V_028C70_COLOR_8_24 &&
-				    format != V_028C70_COLOR_24_8) |
-		S_028C70_NUMBER_TYPE(ntype) |
-		S_028C70_ENDIAN(endian);
-	if (radv_image_has_fmask(iview->image)) {
-		cb->cb_color_info |= S_028C70_COMPRESSION(1);
-		if (device->physical_device->rad_info.chip_class == GFX6) {
-			unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
-			cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
-		}
-
-		if (radv_image_is_tc_compat_cmask(iview->image)) {
-			/* Allow the texture block to read FMASK directly
-			 * without decompressing it. This bit must be cleared
-			 * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
-			 * otherwise the operation doesn't happen.
-			 */
-			cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
-
-			if (device->physical_device->rad_info.chip_class == GFX8) {
-				/* Set CMASK into a tiling format that allows
-				 * the texture block to read it.
-				 */
-				cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
-			}
-		}
-	}
-
-	if (radv_image_has_cmask(iview->image) &&
-	    !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
-		cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
-
-	if (radv_dcc_enabled(iview->image, iview->base_mip))
-		cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
-
-	cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
-
-	/* This must be set for fast clear to work without FMASK. */
-	if (!radv_image_has_fmask(iview->image) &&
-	    device->physical_device->rad_info.chip_class == GFX6) {
-		unsigned bankh = util_logbase2(surf->u.legacy.bankh);
-		cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
-	}
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
-		  (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
-		unsigned width = vk_format_get_plane_width(iview->image->vk_format,
-							   iview->plane_id, iview->extent.width);
-		unsigned height = vk_format_get_plane_height(iview->image->vk_format,
-							     iview->plane_id, iview->extent.height);
-
-		if (device->physical_device->rad_info.chip_class >= GFX10) {
-			cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
-
-			cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
-					        S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
-					        S_028EE0_RESOURCE_LEVEL(1);
-		} else {
-			cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
-			cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
-					       S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
-		}
-
-		cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
-			S_028C68_MIP0_HEIGHT(height - 1) |
-			S_028C68_MAX_MIP(iview->image->info.levels - 1);
-	}
+   cb->cb_color_info =
+      S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
+      S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
+      S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
+                          ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
+                          format != V_028C70_COLOR_24_8) |
+      S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian);
+   if (radv_image_has_fmask(iview->image)) {
+      cb->cb_color_info |= S_028C70_COMPRESSION(1);
+      if (device->physical_device->rad_info.chip_class == GFX6) {
+         unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
+         cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
+      }
+
+      if (radv_image_is_tc_compat_cmask(iview->image)) {
+         /* Allow the texture block to read FMASK directly
+          * without decompressing it. This bit must be cleared
+          * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
+          * otherwise the operation doesn't happen.
+          */
+         cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
+
+         if (device->physical_device->rad_info.chip_class == GFX8) {
+            /* Set CMASK into a tiling format that allows
+             * the texture block to read it.
+             */
+            cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
+         }
+      }
+   }
+
+   if (radv_image_has_cmask(iview->image) &&
+       !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
+      cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
+
+   if (radv_dcc_enabled(iview->image, iview->base_mip))
+      cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
+
+   cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
+
+   /* This must be set for fast clear to work without FMASK. */
+   if (!radv_image_has_fmask(iview->image) &&
+       device->physical_device->rad_info.chip_class == GFX6) {
+      unsigned bankh = util_logbase2(surf->u.legacy.bankh);
+      cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
+   }
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D
+                               ? (iview->extent.depth - 1)
+                               : (iview->image->info.array_size - 1);
+      unsigned width =
+         vk_format_get_plane_width(iview->image->vk_format, iview->plane_id, iview->extent.width);
+      unsigned height =
+         vk_format_get_plane_height(iview->image->vk_format, iview->plane_id, iview->extent.height);
+
+      if (device->physical_device->rad_info.chip_class >= GFX10) {
+         cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
+
+         cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
+                                 S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
+                                 S_028EE0_RESOURCE_LEVEL(1);
+      } else {
+         cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
+         cb->cb_color_attrib |=
+            S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
+      }
+
+      cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) |
+                             S_028C68_MAX_MIP(iview->image->info.levels - 1);
+   }
 }
 
 static unsigned
-radv_calc_decompress_on_z_planes(struct radv_device *device,
-				 struct radv_image_view *iview)
-{
-	unsigned max_zplanes = 0;
-
-	assert(radv_image_is_tc_compat_htile(iview->image));
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		/* Default value for 32-bit depth surfaces. */
-		max_zplanes = 4;
-
-		if (iview->vk_format == VK_FORMAT_D16_UNORM &&
-		    iview->image->info.samples > 1)
-			max_zplanes = 2;
-
-		max_zplanes = max_zplanes + 1;
-	} else {
-		if (iview->vk_format == VK_FORMAT_D16_UNORM) {
-			/* Do not enable Z plane compression for 16-bit depth
-			 * surfaces because isn't supported on GFX8. Only
-			 * 32-bit depth surfaces are supported by the hardware.
-			 * This allows to maintain shader compatibility and to
-			 * reduce the number of depth decompressions.
-			 */
-			max_zplanes = 1;
-		} else {
-			if (iview->image->info.samples <= 1)
-				max_zplanes = 5;
-			else if (iview->image->info.samples <= 4)
-				max_zplanes = 3;
-			else
-				max_zplanes = 2;
-		}
-	}
-
-	return max_zplanes;
+radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_view *iview)
+{
+   unsigned max_zplanes = 0;
+
+   assert(radv_image_is_tc_compat_htile(iview->image));
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      /* Default value for 32-bit depth surfaces. */
+      max_zplanes = 4;
+
+      if (iview->vk_format == VK_FORMAT_D16_UNORM && iview->image->info.samples > 1)
+         max_zplanes = 2;
+
+      max_zplanes = max_zplanes + 1;
+   } else {
+      if (iview->vk_format == VK_FORMAT_D16_UNORM) {
+         /* Do not enable Z plane compression for 16-bit depth
+          * surfaces because isn't supported on GFX8. Only
+          * 32-bit depth surfaces are supported by the hardware.
+          * This allows to maintain shader compatibility and to
+          * reduce the number of depth decompressions.
+          */
+         max_zplanes = 1;
+      } else {
+         if (iview->image->info.samples <= 1)
+            max_zplanes = 5;
+         else if (iview->image->info.samples <= 4)
+            max_zplanes = 3;
+         else
+            max_zplanes = 2;
+      }
+   }
+
+   return max_zplanes;
 }
 
 void
-radv_initialise_ds_surface(struct radv_device *device,
-			   struct radv_ds_buffer_info *ds,
-			   struct radv_image_view *iview)
-{
-	unsigned level = iview->base_mip;
-	unsigned format, stencil_format;
-	uint64_t va, s_offs, z_offs;
-	bool stencil_only = false;
-	const struct radv_image_plane *plane = &iview->image->planes[0];
-	const struct radeon_surf *surf = &plane->surface;
-
-	assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
-
-	memset(ds, 0, sizeof(*ds));
-	switch (iview->image->vk_format) {
-	case VK_FORMAT_D24_UNORM_S8_UINT:
-	case VK_FORMAT_X8_D24_UNORM_PACK32:
-		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
-		break;
-	case VK_FORMAT_D16_UNORM:
-	case VK_FORMAT_D16_UNORM_S8_UINT:
-		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
-		break;
-	case VK_FORMAT_D32_SFLOAT:
-	case VK_FORMAT_D32_SFLOAT_S8_UINT:
-		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
-			S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
-		break;
-	case VK_FORMAT_S8_UINT:
-		stencil_only = true;
-		break;
-	default:
-		break;
-	}
-
-	format = radv_translate_dbformat(iview->image->vk_format);
-	stencil_format = surf->has_stencil ?
-		V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
-
-	uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
-	ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
-		S_028008_SLICE_MAX(max_slice);
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		ds->db_depth_view |= S_028008_SLICE_START_HI(iview->base_layer >> 11) |
-				     S_028008_SLICE_MAX_HI(max_slice >> 11);
-	}
-
-	ds->db_htile_data_base = 0;
-	ds->db_htile_surface = 0;
-
-	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
-	s_offs = z_offs = va;
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		assert(surf->u.gfx9.surf_offset == 0);
-		s_offs += surf->u.gfx9.stencil_offset;
-
-		ds->db_z_info = S_028038_FORMAT(format) |
-			S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
-			S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
-			S_028038_MAXMIP(iview->image->info.levels - 1) |
-			S_028038_ZRANGE_PRECISION(1);
-		ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
-			S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
-
-		if (device->physical_device->rad_info.chip_class == GFX9) {
-			ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
-			ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
-		}
-
-		ds->db_depth_view |= S_028008_MIPID(level);
-		ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
-			S_02801C_Y_MAX(iview->image->info.height - 1);
-
-		if (radv_htile_enabled(iview->image, level)) {
-			ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
-
-			if (radv_image_is_tc_compat_htile(iview->image)) {
-				unsigned max_zplanes =
-					radv_calc_decompress_on_z_planes(device, iview);
-
-				ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
-
-				if (device->physical_device->rad_info.chip_class >= GFX10) {
-					ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
-					ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
-				} else {
-					ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
-					ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
-				}
-			}
-
-			if (radv_image_tile_stencil_disabled(device, iview->image)) {
-				ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
-			}
-
-			va = radv_buffer_get_va(iview->bo) + iview->image->offset +
-				surf->htile_offset;
-			ds->db_htile_data_base = va >> 8;
-			ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
-				S_028ABC_PIPE_ALIGNED(1);
-
-			if (device->physical_device->rad_info.chip_class == GFX9) {
-				ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
-			}
-		}
-	} else {
-		const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
-
-		if (stencil_only)
-			level_info = &surf->u.legacy.stencil_level[level];
-
-		z_offs += surf->u.legacy.level[level].offset;
-		s_offs += surf->u.legacy.stencil_level[level].offset;
-
-		ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
-		ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
-		ds->db_stencil_info = S_028044_FORMAT(stencil_format);
-
-		if (iview->image->info.samples > 1)
-			ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
-
-		if (device->physical_device->rad_info.chip_class >= GFX7) {
-			struct radeon_info *info = &device->physical_device->rad_info;
-			unsigned tiling_index = surf->u.legacy.tiling_index[level];
-			unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
-			unsigned macro_index = surf->u.legacy.macro_tile_index;
-			unsigned tile_mode = info->si_tile_mode_array[tiling_index];
-			unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
-			unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
-
-			if (stencil_only)
-				tile_mode = stencil_tile_mode;
-
-			ds->db_depth_info |=
-				S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
-				S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
-				S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
-				S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
-				S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
-				S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
-			ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
-			ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
-		} else {
-			unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
-			ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
-			tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
-			ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
-			if (stencil_only)
-				ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
-		}
-
-		ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
-			S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
-		ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
-
-		if (radv_htile_enabled(iview->image, level)) {
-			ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
-
-			if (radv_image_tile_stencil_disabled(device, iview->image)) {
-				ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
-			}
-
-			va = radv_buffer_get_va(iview->bo) + iview->image->offset +
-				surf->htile_offset;
-			ds->db_htile_data_base = va >> 8;
-			ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
-
-			if (radv_image_is_tc_compat_htile(iview->image)) {
-				unsigned max_zplanes =
-					radv_calc_decompress_on_z_planes(device, iview);
-
-				ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
-				ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
-			}
-		}
-	}
-
-	ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
-	ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
-}
-
-VkResult radv_CreateFramebuffer(
-	VkDevice                                    _device,
-	const VkFramebufferCreateInfo*              pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkFramebuffer*                              pFramebuffer)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_framebuffer *framebuffer;
-	const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
-		vk_find_struct_const(pCreateInfo->pNext,
-			FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
-
-	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
-
-	size_t size = sizeof(*framebuffer);
-	if (!imageless_create_info)
-		size += sizeof(struct radv_image_view*) * pCreateInfo->attachmentCount;
-	framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
-				  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (framebuffer == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &framebuffer->base,
-			    VK_OBJECT_TYPE_FRAMEBUFFER);
-
-	framebuffer->attachment_count = pCreateInfo->attachmentCount;
-	framebuffer->width = pCreateInfo->width;
-	framebuffer->height = pCreateInfo->height;
-	framebuffer->layers = pCreateInfo->layers;
-	framebuffer->imageless = !!imageless_create_info;
-
-	if (!imageless_create_info) {
-		for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
-			VkImageView _iview = pCreateInfo->pAttachments[i];
-			struct radv_image_view *iview = radv_image_view_from_handle(_iview);
-			framebuffer->attachments[i] = iview;
-		}
-	}
-
-	*pFramebuffer = radv_framebuffer_to_handle(framebuffer);
-	return VK_SUCCESS;
-}
-
-void radv_DestroyFramebuffer(
-	VkDevice                                    _device,
-	VkFramebuffer                               _fb,
-	const VkAllocationCallbacks*                pAllocator)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
-
-	if (!fb)
-		return;
-	vk_object_base_finish(&fb->base);
-	vk_free2(&device->vk.alloc, pAllocator, fb);
-}
-
-static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
-{
-	switch (address_mode) {
-	case VK_SAMPLER_ADDRESS_MODE_REPEAT:
-		return V_008F30_SQ_TEX_WRAP;
-	case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
-		return V_008F30_SQ_TEX_MIRROR;
-	case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
-		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
-	case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
-		return V_008F30_SQ_TEX_CLAMP_BORDER;
-	case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
-		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
-	default:
-		unreachable("illegal tex wrap mode");
-		break;
-	}
+radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
+                           struct radv_image_view *iview)
+{
+   unsigned level = iview->base_mip;
+   unsigned format, stencil_format;
+   uint64_t va, s_offs, z_offs;
+   bool stencil_only = false;
+   const struct radv_image_plane *plane = &iview->image->planes[0];
+   const struct radeon_surf *surf = &plane->surface;
+
+   assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
+
+   memset(ds, 0, sizeof(*ds));
+   switch (iview->image->vk_format) {
+   case VK_FORMAT_D24_UNORM_S8_UINT:
+   case VK_FORMAT_X8_D24_UNORM_PACK32:
+      ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
+      break;
+   case VK_FORMAT_D16_UNORM:
+   case VK_FORMAT_D16_UNORM_S8_UINT:
+      ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
+      break;
+   case VK_FORMAT_D32_SFLOAT:
+   case VK_FORMAT_D32_SFLOAT_S8_UINT:
+      ds->pa_su_poly_offset_db_fmt_cntl =
+         S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
+      break;
+   case VK_FORMAT_S8_UINT:
+      stencil_only = true;
+      break;
+   default:
+      break;
+   }
+
+   format = radv_translate_dbformat(iview->image->vk_format);
+   stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
+
+   uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
+   ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | S_028008_SLICE_MAX(max_slice);
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      ds->db_depth_view |=
+         S_028008_SLICE_START_HI(iview->base_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
+   }
+
+   ds->db_htile_data_base = 0;
+   ds->db_htile_surface = 0;
+
+   va = radv_buffer_get_va(iview->bo) + iview->image->offset;
+   s_offs = z_offs = va;
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      assert(surf->u.gfx9.surf_offset == 0);
+      s_offs += surf->u.gfx9.stencil_offset;
+
+      ds->db_z_info = S_028038_FORMAT(format) |
+                      S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
+                      S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
+                      S_028038_MAXMIP(iview->image->info.levels - 1) | S_028038_ZRANGE_PRECISION(1);
+      ds->db_stencil_info =
+         S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
+
+      if (device->physical_device->rad_info.chip_class == GFX9) {
+         ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
+         ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
+      }
+
+      ds->db_depth_view |= S_028008_MIPID(level);
+      ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
+                          S_02801C_Y_MAX(iview->image->info.height - 1);
+
+      if (radv_htile_enabled(iview->image, level)) {
+         ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
+
+         if (radv_image_is_tc_compat_htile(iview->image)) {
+            unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
+
+            ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
+
+            if (device->physical_device->rad_info.chip_class >= GFX10) {
+               ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
+               ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
+            } else {
+               ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
+               ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
+            }
+         }
+
+         if (radv_image_tile_stencil_disabled(device, iview->image)) {
+            ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
+         }
+
+         va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->htile_offset;
+         ds->db_htile_data_base = va >> 8;
+         ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
+
+         if (device->physical_device->rad_info.chip_class == GFX9) {
+            ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
+         }
+      }
+   } else {
+      const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
+
+      if (stencil_only)
+         level_info = &surf->u.legacy.stencil_level[level];
+
+      z_offs += surf->u.legacy.level[level].offset;
+      s_offs += surf->u.legacy.stencil_level[level].offset;
+
+      ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
+      ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
+      ds->db_stencil_info = S_028044_FORMAT(stencil_format);
+
+      if (iview->image->info.samples > 1)
+         ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
+
+      if (device->physical_device->rad_info.chip_class >= GFX7) {
+         struct radeon_info *info = &device->physical_device->rad_info;
+         unsigned tiling_index = surf->u.legacy.tiling_index[level];
+         unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
+         unsigned macro_index = surf->u.legacy.macro_tile_index;
+         unsigned tile_mode = info->si_tile_mode_array[tiling_index];
+         unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
+         unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
+
+         if (stencil_only)
+            tile_mode = stencil_tile_mode;
+
+         ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
+                              S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
+                              S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
+                              S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
+                              S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
+                              S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
+         ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
+         ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
+      } else {
+         unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
+         ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
+         tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
+         ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
+         if (stencil_only)
+            ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
+      }
+
+      ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
+                          S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
+      ds->db_depth_slice =
+         S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
+
+      if (radv_htile_enabled(iview->image, level)) {
+         ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+
+         if (radv_image_tile_stencil_disabled(device, iview->image)) {
+            ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
+         }
+
+         va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->htile_offset;
+         ds->db_htile_data_base = va >> 8;
+         ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
+
+         if (radv_image_is_tc_compat_htile(iview->image)) {
+            unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
+
+            ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
+            ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
+         }
+      }
+   }
+
+   ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
+   ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
+}
+
+VkResult
+radv_CreateFramebuffer(VkDevice _device, const VkFramebufferCreateInfo *pCreateInfo,
+                       const VkAllocationCallbacks *pAllocator, VkFramebuffer *pFramebuffer)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_framebuffer *framebuffer;
+   const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
+      vk_find_struct_const(pCreateInfo->pNext, FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
+
+   size_t size = sizeof(*framebuffer);
+   if (!imageless_create_info)
+      size += sizeof(struct radv_image_view *) * pCreateInfo->attachmentCount;
+   framebuffer =
+      vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (framebuffer == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &framebuffer->base, VK_OBJECT_TYPE_FRAMEBUFFER);
+
+   framebuffer->attachment_count = pCreateInfo->attachmentCount;
+   framebuffer->width = pCreateInfo->width;
+   framebuffer->height = pCreateInfo->height;
+   framebuffer->layers = pCreateInfo->layers;
+   framebuffer->imageless = !!imageless_create_info;
+
+   if (!imageless_create_info) {
+      for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+         VkImageView _iview = pCreateInfo->pAttachments[i];
+         struct radv_image_view *iview = radv_image_view_from_handle(_iview);
+         framebuffer->attachments[i] = iview;
+      }
+   }
+
+   *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
+   return VK_SUCCESS;
+}
+
+void
+radv_DestroyFramebuffer(VkDevice _device, VkFramebuffer _fb,
+                        const VkAllocationCallbacks *pAllocator)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
+
+   if (!fb)
+      return;
+   vk_object_base_finish(&fb->base);
+   vk_free2(&device->vk.alloc, pAllocator, fb);
+}
+
+static unsigned
+radv_tex_wrap(VkSamplerAddressMode address_mode)
+{
+   switch (address_mode) {
+   case VK_SAMPLER_ADDRESS_MODE_REPEAT:
+      return V_008F30_SQ_TEX_WRAP;
+   case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
+      return V_008F30_SQ_TEX_MIRROR;
+   case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
+      return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
+   case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
+      return V_008F30_SQ_TEX_CLAMP_BORDER;
+   case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
+      return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+   default:
+      unreachable("illegal tex wrap mode");
+      break;
+   }
 }
 
 static unsigned
 radv_tex_compare(VkCompareOp op)
 {
-	switch (op) {
-	case VK_COMPARE_OP_NEVER:
-		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
-	case VK_COMPARE_OP_LESS:
-		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
-	case VK_COMPARE_OP_EQUAL:
-		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
-	case VK_COMPARE_OP_LESS_OR_EQUAL:
-		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
-	case VK_COMPARE_OP_GREATER:
-		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
-	case VK_COMPARE_OP_NOT_EQUAL:
-		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
-	case VK_COMPARE_OP_GREATER_OR_EQUAL:
-		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
-	case VK_COMPARE_OP_ALWAYS:
-		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
-	default:
-		unreachable("illegal compare mode");
-		break;
-	}
+   switch (op) {
+   case VK_COMPARE_OP_NEVER:
+      return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
+   case VK_COMPARE_OP_LESS:
+      return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
+   case VK_COMPARE_OP_EQUAL:
+      return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
+   case VK_COMPARE_OP_LESS_OR_EQUAL:
+      return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
+   case VK_COMPARE_OP_GREATER:
+      return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
+   case VK_COMPARE_OP_NOT_EQUAL:
+      return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
+   case VK_COMPARE_OP_GREATER_OR_EQUAL:
+      return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
+   case VK_COMPARE_OP_ALWAYS:
+      return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
+   default:
+      unreachable("illegal compare mode");
+      break;
+   }
 }
 
 static unsigned
 radv_tex_filter(VkFilter filter, unsigned max_ansio)
 {
-	switch (filter) {
-	case VK_FILTER_NEAREST:
-		return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
-			V_008F38_SQ_TEX_XY_FILTER_POINT);
-	case VK_FILTER_LINEAR:
-		return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
-			V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
-	case VK_FILTER_CUBIC_IMG:
-	default:
-		fprintf(stderr, "illegal texture filter");
-		return 0;
-	}
+   switch (filter) {
+   case VK_FILTER_NEAREST:
+      return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
+                            : V_008F38_SQ_TEX_XY_FILTER_POINT);
+   case VK_FILTER_LINEAR:
+      return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
+                            : V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
+   case VK_FILTER_CUBIC_IMG:
+   default:
+      fprintf(stderr, "illegal texture filter");
+      return 0;
+   }
 }
 
 static unsigned
 radv_tex_mipfilter(VkSamplerMipmapMode mode)
 {
-	switch (mode) {
-	case VK_SAMPLER_MIPMAP_MODE_NEAREST:
-		return V_008F38_SQ_TEX_Z_FILTER_POINT;
-	case VK_SAMPLER_MIPMAP_MODE_LINEAR:
-		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
-	default:
-		return V_008F38_SQ_TEX_Z_FILTER_NONE;
-	}
+   switch (mode) {
+   case VK_SAMPLER_MIPMAP_MODE_NEAREST:
+      return V_008F38_SQ_TEX_Z_FILTER_POINT;
+   case VK_SAMPLER_MIPMAP_MODE_LINEAR:
+      return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
+   default:
+      return V_008F38_SQ_TEX_Z_FILTER_NONE;
+   }
 }
 
 static unsigned
 radv_tex_bordercolor(VkBorderColor bcolor)
 {
-	switch (bcolor) {
-	case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
-	case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
-		return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
-	case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
-	case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
-		return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
-	case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
-	case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
-		return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
-	case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
-	case VK_BORDER_COLOR_INT_CUSTOM_EXT:
-		return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
-	default:
-		break;
-	}
-	return 0;
+   switch (bcolor) {
+   case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
+   case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
+      return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+   case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
+   case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
+      return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
+   case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
+   case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
+      return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
+   case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
+   case VK_BORDER_COLOR_INT_CUSTOM_EXT:
+      return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
+   default:
+      break;
+   }
+   return 0;
 }
 
 static unsigned
 radv_tex_aniso_filter(unsigned filter)
 {
-	if (filter < 2)
-		return 0;
-	if (filter < 4)
-		return 1;
-	if (filter < 8)
-		return 2;
-	if (filter < 16)
-		return 3;
-	return 4;
+   if (filter < 2)
+      return 0;
+   if (filter < 4)
+      return 1;
+   if (filter < 8)
+      return 2;
+   if (filter < 16)
+      return 3;
+   return 4;
 }
 
 static unsigned
 radv_tex_filter_mode(VkSamplerReductionMode mode)
 {
-	switch (mode) {
-	case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
-		return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
-	case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
-		return V_008F30_SQ_IMG_FILTER_MODE_MIN;
-	case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
-		return V_008F30_SQ_IMG_FILTER_MODE_MAX;
-	default:
-		break;
-	}
-	return 0;
+   switch (mode) {
+   case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
+      return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
+   case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
+      return V_008F30_SQ_IMG_FILTER_MODE_MIN;
+   case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
+      return V_008F30_SQ_IMG_FILTER_MODE_MAX;
+   default:
+      break;
+   }
+   return 0;
 }
 
 static uint32_t
-radv_get_max_anisotropy(struct radv_device *device,
-			const VkSamplerCreateInfo *pCreateInfo)
+radv_get_max_anisotropy(struct radv_device *device, const VkSamplerCreateInfo *pCreateInfo)
 {
-	if (device->force_aniso >= 0)
-		return device->force_aniso;
+   if (device->force_aniso >= 0)
+      return device->force_aniso;
 
-	if (pCreateInfo->anisotropyEnable &&
-	    pCreateInfo->maxAnisotropy > 1.0f)
-		return (uint32_t)pCreateInfo->maxAnisotropy;
+   if (pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0f)
+      return (uint32_t)pCreateInfo->maxAnisotropy;
 
-	return 0;
+   return 0;
 }
 
-static inline int S_FIXED(float value, unsigned frac_bits)
+static inline int
+S_FIXED(float value, unsigned frac_bits)
 {
-	return value * (1 << frac_bits);
+   return value * (1 << frac_bits);
 }
 
-static uint32_t radv_register_border_color(struct radv_device *device,
-					   VkClearColorValue   value)
+static uint32_t
+radv_register_border_color(struct radv_device *device, VkClearColorValue value)
 {
-	uint32_t slot;
+   uint32_t slot;
 
-	mtx_lock(&device->border_color_data.mutex);
+   mtx_lock(&device->border_color_data.mutex);
 
-	for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
-		if (!device->border_color_data.used[slot]) {
-			/* Copy to the GPU wrt endian-ness. */
-			util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot],
-						&value,
-						sizeof(VkClearColorValue));
+   for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
+      if (!device->border_color_data.used[slot]) {
+         /* Copy to the GPU wrt endian-ness. */
+         util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value,
+                                 sizeof(VkClearColorValue));
 
-			device->border_color_data.used[slot] = true;
-			break;
-		}
-	}
+         device->border_color_data.used[slot] = true;
+         break;
+      }
+   }
 
-	mtx_unlock(&device->border_color_data.mutex);
+   mtx_unlock(&device->border_color_data.mutex);
 
-	return slot;
+   return slot;
 }
 
-static void radv_unregister_border_color(struct radv_device *device,
-					 uint32_t            slot)
+static void
+radv_unregister_border_color(struct radv_device *device, uint32_t slot)
 {
-	mtx_lock(&device->border_color_data.mutex);
+   mtx_lock(&device->border_color_data.mutex);
 
-	device->border_color_data.used[slot] = false;
+   device->border_color_data.used[slot] = false;
 
-	mtx_unlock(&device->border_color_data.mutex);
+   mtx_unlock(&device->border_color_data.mutex);
 }
 
 static void
-radv_init_sampler(struct radv_device *device,
-		  struct radv_sampler *sampler,
-		  const VkSamplerCreateInfo *pCreateInfo)
-{
-	uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
-	uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
-	bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
-			   device->physical_device->rad_info.chip_class == GFX9;
-	unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
-	unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
-	bool trunc_coord = pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
-	bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
-				 pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
-				 pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
-	VkBorderColor border_color = uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
-	uint32_t border_color_ptr;
-
-	const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
-		vk_find_struct_const(pCreateInfo->pNext,
-				     SAMPLER_REDUCTION_MODE_CREATE_INFO);
-	if (sampler_reduction)
-		filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
-
-	if (pCreateInfo->compareEnable)
-		depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
-
-	sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
-
-	if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
-		const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
-			vk_find_struct_const(pCreateInfo->pNext,
-					     SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
-
-		assert(custom_border_color);
-
-		sampler->border_color_slot =
-			radv_register_border_color(device, custom_border_color->customBorderColor);
-
-		/* Did we fail to find a slot? */
-		if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
-			fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
-			border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
-		}
-	}
-
-	/* If we don't have a custom color, set the ptr to 0 */
-	border_color_ptr = sampler->border_color_slot != RADV_BORDER_COLOR_COUNT
-		? sampler->border_color_slot
-		: 0;
-
-	sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
-			     S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
-			     S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
-			     S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
-			     S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
-			     S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
-			     S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
-			     S_008F30_ANISO_BIAS(max_aniso_ratio) |
-			     S_008F30_DISABLE_CUBE_WRAP(0) |
-			     S_008F30_COMPAT_MODE(compat_mode) |
-			     S_008F30_FILTER_MODE(filter_mode) |
-			     S_008F30_TRUNC_COORD(trunc_coord));
-	sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
-			     S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
-			     S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
-	sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
-			     S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
-			     S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
-			     S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
-			     S_008F38_MIP_POINT_PRECLAMP(0));
-	sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
-			     S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
-
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
-	} else {
-		sampler->state[2] |=
-			S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
-			S_008F38_FILTER_PREC_FIX(1) |
-			S_008F38_ANISO_OVERRIDE_GFX8(device->physical_device->rad_info.chip_class >= GFX8);
-	}
-}
-
-VkResult radv_CreateSampler(
-	VkDevice                                    _device,
-	const VkSamplerCreateInfo*                  pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkSampler*                                  pSampler)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_sampler *sampler;
-
-	const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
-		vk_find_struct_const(pCreateInfo->pNext,
-				     SAMPLER_YCBCR_CONVERSION_INFO);
-
-	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
-
-	sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
-			      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (!sampler)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &sampler->base,
-			    VK_OBJECT_TYPE_SAMPLER);
-
-	radv_init_sampler(device, sampler, pCreateInfo);
-
-	sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL;
-	*pSampler = radv_sampler_to_handle(sampler);
-
-	return VK_SUCCESS;
-}
-
-void radv_DestroySampler(
-	VkDevice                                    _device,
-	VkSampler                                   _sampler,
-	const VkAllocationCallbacks*                pAllocator)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
+radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
+                  const VkSamplerCreateInfo *pCreateInfo)
+{
+   uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
+   uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
+   bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
+                      device->physical_device->rad_info.chip_class == GFX9;
+   unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
+   unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
+   bool trunc_coord =
+      pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
+   bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
+                            pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
+                            pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
+   VkBorderColor border_color =
+      uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
+   uint32_t border_color_ptr;
+
+   const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
+      vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
+   if (sampler_reduction)
+      filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
+
+   if (pCreateInfo->compareEnable)
+      depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
+
+   sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
+
+   if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
+       border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
+      const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
+         vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
+
+      assert(custom_border_color);
+
+      sampler->border_color_slot =
+         radv_register_border_color(device, custom_border_color->customBorderColor);
+
+      /* Did we fail to find a slot? */
+      if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
+         fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
+         border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
+      }
+   }
+
+   /* If we don't have a custom color, set the ptr to 0 */
+   border_color_ptr =
+      sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0;
+
+   sampler->state[0] =
+      (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
+       S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
+       S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
+       S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
+       S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
+       S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
+       S_008F30_DISABLE_CUBE_WRAP(0) | S_008F30_COMPAT_MODE(compat_mode) |
+       S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord));
+   sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
+                        S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
+                        S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
+   sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
+                        S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
+                        S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
+                        S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
+                        S_008F38_MIP_POINT_PRECLAMP(0));
+   sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
+                        S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
+
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
+   } else {
+      sampler->state[2] |=
+         S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
+         S_008F38_FILTER_PREC_FIX(1) |
+         S_008F38_ANISO_OVERRIDE_GFX8(device->physical_device->rad_info.chip_class >= GFX8);
+   }
+}
+
+VkResult
+radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo,
+                   const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_sampler *sampler;
+
+   const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
+      vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
+
+   sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
+                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!sampler)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
+
+   radv_init_sampler(device, sampler, pCreateInfo);
+
+   sampler->ycbcr_sampler =
+      ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion)
+                       : NULL;
+   *pSampler = radv_sampler_to_handle(sampler);
+
+   return VK_SUCCESS;
+}
+
+void
+radv_DestroySampler(VkDevice _device, VkSampler _sampler, const VkAllocationCallbacks *pAllocator)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
+
+   if (!sampler)
+      return;
 
-	if (!sampler)
-		return;
-
-	if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
-		radv_unregister_border_color(device, sampler->border_color_slot);
-
-	vk_object_base_finish(&sampler->base);
-	vk_free2(&device->vk.alloc, pAllocator, sampler);
+   if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
+      radv_unregister_border_color(device, sampler->border_color_slot);
+
+   vk_object_base_finish(&sampler->base);
+   vk_free2(&device->vk.alloc, pAllocator, sampler);
 }
 
 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
 {
-	/* For the full details on loader interface versioning, see
-	* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
-	* What follows is a condensed summary, to help you navigate the large and
-	* confusing official doc.
-	*
-	*   - Loader interface v0 is incompatible with later versions. We don't
-	*     support it.
-	*
-	*   - In loader interface v1:
-	*       - The first ICD entrypoint called by the loader is
-	*         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
-	*         entrypoint.
-	*       - The ICD must statically expose no other Vulkan symbol unless it is
-	*         linked with -Bsymbolic.
-	*       - Each dispatchable Vulkan handle created by the ICD must be
-	*         a pointer to a struct whose first member is VK_LOADER_DATA. The
-	*         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
-	*       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
-	*         vkDestroySurfaceKHR(). The ICD must be capable of working with
-	*         such loader-managed surfaces.
-	*
-	*    - Loader interface v2 differs from v1 in:
-	*       - The first ICD entrypoint called by the loader is
-	*         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
-	*         statically expose this entrypoint.
-	*
-	*    - Loader interface v3 differs from v2 in:
-	*        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
-	*          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
-	*          because the loader no longer does so.
-	*/
-	*pSupportedVersion = MIN2(*pSupportedVersion, 4u);
-	return VK_SUCCESS;
-}
-
-VkResult radv_GetMemoryFdKHR(VkDevice _device,
-			     const VkMemoryGetFdInfoKHR *pGetFdInfo,
-			     int *pFD)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
-
-	assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
-
-	/* At the moment, we support only the below handle types. */
-	assert(pGetFdInfo->handleType ==
-	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
-	       pGetFdInfo->handleType ==
-	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
-
-	bool ret = radv_get_memory_fd(device, memory, pFD);
-	if (ret == false)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-	return VK_SUCCESS;
-}
-
-static uint32_t radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
-                                                        enum radeon_bo_domain domains,
-                                                        enum radeon_bo_flag flags,
-                                                        enum radeon_bo_flag ignore_flags)
-{
-	/* Don't count GTT/CPU as relevant:
-	 *
-	 * - We're not fully consistent between the two.
-	 * - Sometimes VRAM gets VRAM|GTT.
-	 */
-	const enum radeon_bo_domain relevant_domains = RADEON_DOMAIN_VRAM |
-	                                               RADEON_DOMAIN_GDS |
-	                                               RADEON_DOMAIN_OA;
-	uint32_t bits = 0;
-	for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
-		if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
-			continue;
-
-		if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
-			continue;
-
-		bits |= 1u << i;
-	}
-
-	return bits;
-}
-
-static uint32_t radv_compute_valid_memory_types(struct radv_physical_device *dev,
-                                                enum radeon_bo_domain domains,
-                                                enum radeon_bo_flag flags)
-{
-	enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
-	uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
-
-	if (!bits) {
-		ignore_flags |= RADEON_FLAG_GTT_WC;
-		bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
-	}
-
-	if (!bits) {
-		ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
-		bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
-	}
-
-	return bits;
-}
-VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
-				       VkExternalMemoryHandleTypeFlagBits handleType,
-				       int fd,
-				       VkMemoryFdPropertiesKHR *pMemoryFdProperties)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-
-	switch (handleType) {
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
-		enum radeon_bo_domain domains;
-		enum radeon_bo_flag flags;
-		if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
-			return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
-
-		pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
-		return VK_SUCCESS;
-	}
-	default:
-		/* The valid usage section for this function says:
-		 *
-		 *    "handleType must not be one of the handle types defined as
-		 *    opaque."
-		 *
-		 * So opaque handle types fall into the default "unsupported" case.
-		 */
-		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
-	}
-}
-
-static VkResult radv_import_opaque_fd(struct radv_device *device,
-                                      int fd,
-                                      uint32_t *syncobj)
-{
-	uint32_t syncobj_handle = 0;
-	int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
-	if (ret != 0)
-		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
-
-	if (*syncobj)
-		device->ws->destroy_syncobj(device->ws, *syncobj);
-
-	*syncobj = syncobj_handle;
-	close(fd);
-
-	return VK_SUCCESS;
-}
-
-static VkResult radv_import_sync_fd(struct radv_device *device,
-                                    int fd,
-                                    uint32_t *syncobj)
-{
-	/* If we create a syncobj we do it locally so that if we have an error, we don't
-	 * leave a syncobj in an undetermined state in the fence. */
-	uint32_t syncobj_handle =  *syncobj;
-	if (!syncobj_handle) {
-		bool create_signaled = fd == -1 ? true : false;
-
-		int ret = device->ws->create_syncobj(device->ws, create_signaled,
-						     &syncobj_handle);
-		if (ret) {
-			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-		}
-	} else {
-		if (fd == -1)
-			device->ws->signal_syncobj(device->ws, syncobj_handle, 0);
-	}
-
-	if (fd != -1) {
-		int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
-		if (ret)
-			return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
-		close(fd);
-	}
-
-	*syncobj = syncobj_handle;
-
-	return VK_SUCCESS;
-}
-
-VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
-				   const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
-	VkResult result;
-	struct radv_semaphore_part *dst = NULL;
-	bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
-
-	if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
-		assert(!timeline);
-		dst = &sem->temporary;
-	} else {
-		dst = &sem->permanent;
-	}
-
-	uint32_t syncobj = (dst->kind == RADV_SEMAPHORE_SYNCOBJ ||
-	                    dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) ? dst->syncobj : 0;
-
-	switch(pImportSemaphoreFdInfo->handleType) {
-		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
-			result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
-			break;
-		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
-			assert(!timeline);
-			result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
-			break;
-		default:
-			unreachable("Unhandled semaphore handle type");
-	}
-
-	if (result == VK_SUCCESS) {
-		dst->syncobj = syncobj;
-		dst->kind = RADV_SEMAPHORE_SYNCOBJ;
-		if (timeline) {
-			dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
-			dst->timeline_syncobj.max_point = 0;
-		}
-	}
-
-	return result;
-}
-
-VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
-				const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
-				int *pFd)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
-	int ret;
-	uint32_t syncobj_handle;
-
-	if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
-		assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||
-		       sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
-		syncobj_handle = sem->temporary.syncobj;
-	} else {
-		assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||
-		       sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
-		syncobj_handle = sem->permanent.syncobj;
-	}
-
-	switch(pGetFdInfo->handleType) {
-	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
-		ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
-		if (ret)
-			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
-		break;
-	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
-		ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
-		if (ret)
-			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
-
-		if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
-			radv_destroy_semaphore_part(device, &sem->temporary);
-		} else {
-			device->ws->reset_syncobj(device->ws, syncobj_handle);
-		}
-		break;
-	default:
-		unreachable("Unhandled semaphore handle type");
-	}
-
-	return VK_SUCCESS;
-}
-
-void radv_GetPhysicalDeviceExternalSemaphoreProperties(
-	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
-	VkExternalSemaphoreProperties               *pExternalSemaphoreProperties)
-{
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-	VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
-
-	if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&
-	    pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
-		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
-		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
-		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
-			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
-	} else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
-		pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
-		pExternalSemaphoreProperties->compatibleHandleTypes = 0;
-		pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
-	} else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
-	           pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
-		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
-		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
-		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
-			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
-	} else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
-		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
-		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
-		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
-			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
-	} else {
-		pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
-		pExternalSemaphoreProperties->compatibleHandleTypes = 0;
-		pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
-	}
-}
-
-VkResult radv_ImportFenceFdKHR(VkDevice _device,
-				   const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
-	struct radv_fence_part *dst = NULL;
-	VkResult result;
-
-	if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
-		dst = &fence->temporary;
-	} else {
-		dst = &fence->permanent;
-	}
-
-	uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;
-
-	switch(pImportFenceFdInfo->handleType) {
-		case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
-			result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);
-			break;
-		case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
-			result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);
-			break;
-		default:
-			unreachable("Unhandled fence handle type");
-	}
-
-	if (result == VK_SUCCESS) {
-		dst->syncobj = syncobj;
-		dst->kind = RADV_FENCE_SYNCOBJ;
-	}
-
-	return result;
-}
-
-VkResult radv_GetFenceFdKHR(VkDevice _device,
-				const VkFenceGetFdInfoKHR *pGetFdInfo,
-				int *pFd)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
-	int ret;
-
-	struct radv_fence_part *part =
-		fence->temporary.kind != RADV_FENCE_NONE ?
-		&fence->temporary : &fence->permanent;
-
-	switch(pGetFdInfo->handleType) {
-	case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
-		ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);
-		if (ret)
-			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
-		break;
-	case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
-		ret = device->ws->export_syncobj_to_sync_file(device->ws,
-							      part->syncobj, pFd);
-		if (ret)
-			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
-
-		if (part == &fence->temporary) {
-			radv_destroy_fence_part(device, part);
-		} else {
-			device->ws->reset_syncobj(device->ws, part->syncobj);
-		}
-		break;
-	default:
-		unreachable("Unhandled fence handle type");
-	}
-
-	return VK_SUCCESS;
-}
-
-void radv_GetPhysicalDeviceExternalFenceProperties(
-	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
-	VkExternalFenceProperties               *pExternalFenceProperties)
-{
-	if (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
-	    pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
-		pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
-		pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
-		pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
-			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
-	} else {
-		pExternalFenceProperties->exportFromImportedHandleTypes = 0;
-		pExternalFenceProperties->compatibleHandleTypes = 0;
-		pExternalFenceProperties->externalFenceFeatures = 0;
-	}
+   /* For the full details on loader interface versioning, see
+    * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
+    * What follows is a condensed summary, to help you navigate the large and
+    * confusing official doc.
+    *
+    *   - Loader interface v0 is incompatible with later versions. We don't
+    *     support it.
+    *
+    *   - In loader interface v1:
+    *       - The first ICD entrypoint called by the loader is
+    *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
+    *         entrypoint.
+    *       - The ICD must statically expose no other Vulkan symbol unless it is
+    *         linked with -Bsymbolic.
+    *       - Each dispatchable Vulkan handle created by the ICD must be
+    *         a pointer to a struct whose first member is VK_LOADER_DATA. The
+    *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
+    *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
+    *         vkDestroySurfaceKHR(). The ICD must be capable of working with
+    *         such loader-managed surfaces.
+    *
+    *    - Loader interface v2 differs from v1 in:
+    *       - The first ICD entrypoint called by the loader is
+    *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
+    *         statically expose this entrypoint.
+    *
+    *    - Loader interface v3 differs from v2 in:
+    *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
+    *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
+    *          because the loader no longer does so.
+    */
+   *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
+   return VK_SUCCESS;
+}
+
+VkResult
+radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
+
+   assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
+
+   /* At the moment, we support only the below handle types. */
+   assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
+          pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+
+   bool ret = radv_get_memory_fd(device, memory, pFD);
+   if (ret == false)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   return VK_SUCCESS;
+}
+
+static uint32_t
+radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
+                                        enum radeon_bo_domain domains, enum radeon_bo_flag flags,
+                                        enum radeon_bo_flag ignore_flags)
+{
+   /* Don't count GTT/CPU as relevant:
+    *
+    * - We're not fully consistent between the two.
+    * - Sometimes VRAM gets VRAM|GTT.
+    */
+   const enum radeon_bo_domain relevant_domains =
+      RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
+   uint32_t bits = 0;
+   for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
+      if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
+         continue;
+
+      if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
+         continue;
+
+      bits |= 1u << i;
+   }
+
+   return bits;
+}
+
+static uint32_t
+radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains,
+                                enum radeon_bo_flag flags)
+{
+   enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
+   uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
+
+   if (!bits) {
+      ignore_flags |= RADEON_FLAG_GTT_WC;
+      bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
+   }
+
+   if (!bits) {
+      ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
+      bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
+   }
+
+   return bits;
+}
+VkResult
+radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
+                              int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+
+   switch (handleType) {
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
+      enum radeon_bo_domain domains;
+      enum radeon_bo_flag flags;
+      if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
+         return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+
+      pMemoryFdProperties->memoryTypeBits =
+         radv_compute_valid_memory_types(device->physical_device, domains, flags);
+      return VK_SUCCESS;
+   }
+   default:
+      /* The valid usage section for this function says:
+       *
+       *    "handleType must not be one of the handle types defined as
+       *    opaque."
+       *
+       * So opaque handle types fall into the default "unsupported" case.
+       */
+      return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+   }
+}
+
+static VkResult
+radv_import_opaque_fd(struct radv_device *device, int fd, uint32_t *syncobj)
+{
+   uint32_t syncobj_handle = 0;
+   int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
+   if (ret != 0)
+      return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+
+   if (*syncobj)
+      device->ws->destroy_syncobj(device->ws, *syncobj);
+
+   *syncobj = syncobj_handle;
+   close(fd);
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+radv_import_sync_fd(struct radv_device *device, int fd, uint32_t *syncobj)
+{
+   /* If we create a syncobj we do it locally so that if we have an error, we don't
+    * leave a syncobj in an undetermined state in the fence. */
+   uint32_t syncobj_handle = *syncobj;
+   if (!syncobj_handle) {
+      bool create_signaled = fd == -1 ? true : false;
+
+      int ret = device->ws->create_syncobj(device->ws, create_signaled, &syncobj_handle);
+      if (ret) {
+         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+   } else {
+      if (fd == -1)
+         device->ws->signal_syncobj(device->ws, syncobj_handle, 0);
+   }
+
+   if (fd != -1) {
+      int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
+      if (ret)
+         return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+      close(fd);
+   }
+
+   *syncobj = syncobj_handle;
+
+   return VK_SUCCESS;
+}
+
+VkResult
+radv_ImportSemaphoreFdKHR(VkDevice _device,
+                          const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
+   VkResult result;
+   struct radv_semaphore_part *dst = NULL;
+   bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
+
+   if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
+      assert(!timeline);
+      dst = &sem->temporary;
+   } else {
+      dst = &sem->permanent;
+   }
+
+   uint32_t syncobj =
+      (dst->kind == RADV_SEMAPHORE_SYNCOBJ || dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
+         ? dst->syncobj
+         : 0;
+
+   switch (pImportSemaphoreFdInfo->handleType) {
+   case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
+      result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
+      break;
+   case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
+      assert(!timeline);
+      result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
+      break;
+   default:
+      unreachable("Unhandled semaphore handle type");
+   }
+
+   if (result == VK_SUCCESS) {
+      dst->syncobj = syncobj;
+      dst->kind = RADV_SEMAPHORE_SYNCOBJ;
+      if (timeline) {
+         dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
+         dst->timeline_syncobj.max_point = 0;
+      }
+   }
+
+   return result;
+}
+
+VkResult
+radv_GetSemaphoreFdKHR(VkDevice _device, const VkSemaphoreGetFdInfoKHR *pGetFdInfo, int *pFd)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
+   int ret;
+   uint32_t syncobj_handle;
+
+   if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
+      assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||
+             sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
+      syncobj_handle = sem->temporary.syncobj;
+   } else {
+      assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||
+             sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
+      syncobj_handle = sem->permanent.syncobj;
+   }
+
+   switch (pGetFdInfo->handleType) {
+   case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
+      ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
+      if (ret)
+         return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
+      break;
+   case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
+      ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
+      if (ret)
+         return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
+
+      if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
+         radv_destroy_semaphore_part(device, &sem->temporary);
+      } else {
+         device->ws->reset_syncobj(device->ws, syncobj_handle);
+      }
+      break;
+   default:
+      unreachable("Unhandled semaphore handle type");
+   }
+
+   return VK_SUCCESS;
 }
 
 void
-radv_GetDeviceGroupPeerMemoryFeatures(
-    VkDevice                                    device,
-    uint32_t                                    heapIndex,
-    uint32_t                                    localDeviceIndex,
-    uint32_t                                    remoteDeviceIndex,
-    VkPeerMemoryFeatureFlags*                   pPeerMemoryFeatures)
+radv_GetPhysicalDeviceExternalSemaphoreProperties(
+   VkPhysicalDevice physicalDevice,
+   const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
+   VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
+{
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+   VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
+
+   if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&
+       pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
+      pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+      pExternalSemaphoreProperties->compatibleHandleTypes =
+         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+      pExternalSemaphoreProperties->externalSemaphoreFeatures =
+         VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+   } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
+      pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+      pExternalSemaphoreProperties->compatibleHandleTypes = 0;
+      pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
+   } else if (pExternalSemaphoreInfo->handleType ==
+                 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
+              pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
+      pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
+         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+      pExternalSemaphoreProperties->compatibleHandleTypes =
+         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
+         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+      pExternalSemaphoreProperties->externalSemaphoreFeatures =
+         VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+   } else if (pExternalSemaphoreInfo->handleType ==
+              VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
+      pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+      pExternalSemaphoreProperties->compatibleHandleTypes =
+         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+      pExternalSemaphoreProperties->externalSemaphoreFeatures =
+         VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+   } else {
+      pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+      pExternalSemaphoreProperties->compatibleHandleTypes = 0;
+      pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
+   }
+}
+
+VkResult
+radv_ImportFenceFdKHR(VkDevice _device, const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
 {
-	assert(localDeviceIndex == remoteDeviceIndex);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
+   struct radv_fence_part *dst = NULL;
+   VkResult result;
+
+   if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
+      dst = &fence->temporary;
+   } else {
+      dst = &fence->permanent;
+   }
+
+   uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;
+
+   switch (pImportFenceFdInfo->handleType) {
+   case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
+      result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);
+      break;
+   case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
+      result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);
+      break;
+   default:
+      unreachable("Unhandled fence handle type");
+   }
 
-	*pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
-	                       VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
-	                       VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
-	                       VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
+   if (result == VK_SUCCESS) {
+      dst->syncobj = syncobj;
+      dst->kind = RADV_FENCE_SYNCOBJ;
+   }
+
+   return result;
+}
+
+VkResult
+radv_GetFenceFdKHR(VkDevice _device, const VkFenceGetFdInfoKHR *pGetFdInfo, int *pFd)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
+   int ret;
+
+   struct radv_fence_part *part =
+      fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
+
+   switch (pGetFdInfo->handleType) {
+   case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
+      ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);
+      if (ret)
+         return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
+      break;
+   case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
+      ret = device->ws->export_syncobj_to_sync_file(device->ws, part->syncobj, pFd);
+      if (ret)
+         return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
+
+      if (part == &fence->temporary) {
+         radv_destroy_fence_part(device, part);
+      } else {
+         device->ws->reset_syncobj(device->ws, part->syncobj);
+      }
+      break;
+   default:
+      unreachable("Unhandled fence handle type");
+   }
+
+   return VK_SUCCESS;
+}
+
+void
+radv_GetPhysicalDeviceExternalFenceProperties(
+   VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
+   VkExternalFenceProperties *pExternalFenceProperties)
+{
+   if (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
+       pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
+      pExternalFenceProperties->exportFromImportedHandleTypes =
+         VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
+      pExternalFenceProperties->compatibleHandleTypes =
+         VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
+      pExternalFenceProperties->externalFenceFeatures =
+         VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+   } else {
+      pExternalFenceProperties->exportFromImportedHandleTypes = 0;
+      pExternalFenceProperties->compatibleHandleTypes = 0;
+      pExternalFenceProperties->externalFenceFeatures = 0;
+   }
+}
+
+void
+radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, uint32_t heapIndex,
+                                      uint32_t localDeviceIndex, uint32_t remoteDeviceIndex,
+                                      VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
+{
+   assert(localDeviceIndex == remoteDeviceIndex);
+
+   *pPeerMemoryFeatures =
+      VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
+      VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
 }
 
 static const VkTimeDomainEXT radv_time_domains[] = {
-	VK_TIME_DOMAIN_DEVICE_EXT,
-	VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+   VK_TIME_DOMAIN_DEVICE_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
 #ifdef CLOCK_MONOTONIC_RAW
-	VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
 #endif
 };
 
-VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
-	VkPhysicalDevice                             physicalDevice,
-	uint32_t                                     *pTimeDomainCount,
-	VkTimeDomainEXT                              *pTimeDomains)
+VkResult
+radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,
+                                                  uint32_t *pTimeDomainCount,
+                                                  VkTimeDomainEXT *pTimeDomains)
 {
-	int d;
-	VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains,
-			       pTimeDomainCount);
+   int d;
+   VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
 
-	for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
-		vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
-			*i = radv_time_domains[d];
-		}
-	}
+   for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
+      vk_outarray_append_typed(VkTimeDomainEXT, &out, i)
+      {
+         *i = radv_time_domains[d];
+      }
+   }
 
-	return vk_outarray_status(&out);
+   return vk_outarray_status(&out);
 }
 
 #ifndef _WIN32
 static uint64_t
 radv_clock_gettime(clockid_t clock_id)
 {
-	struct timespec current;
-	int ret;
+   struct timespec current;
+   int ret;
 
-	ret = clock_gettime(clock_id, &current);
+   ret = clock_gettime(clock_id, &current);
 #ifdef CLOCK_MONOTONIC_RAW
-	if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
-		ret = clock_gettime(CLOCK_MONOTONIC, &current);
+   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+      ret = clock_gettime(CLOCK_MONOTONIC, &current);
 #endif
-	if (ret < 0)
-		return 0;
+   if (ret < 0)
+      return 0;
 
-	return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
+   return (uint64_t)current.tv_sec * 1000000000ULL + current.tv_nsec;
 }
 
-VkResult radv_GetCalibratedTimestampsEXT(
-	VkDevice                                     _device,
-	uint32_t                                     timestampCount,
-	const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
-	uint64_t                                     *pTimestamps,
-	uint64_t                                     *pMaxDeviation)
+VkResult
+radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount,
+                                const VkCalibratedTimestampInfoEXT *pTimestampInfos,
+                                uint64_t *pTimestamps, uint64_t *pMaxDeviation)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
-	int d;
-	uint64_t begin, end;
-        uint64_t max_clock_period = 0;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
+   int d;
+   uint64_t begin, end;
+   uint64_t max_clock_period = 0;
 
 #ifdef CLOCK_MONOTONIC_RAW
-	begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+   begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
 #else
-	begin = radv_clock_gettime(CLOCK_MONOTONIC);
+   begin = radv_clock_gettime(CLOCK_MONOTONIC);
 #endif
 
-	for (d = 0; d < timestampCount; d++) {
-		switch (pTimestampInfos[d].timeDomain) {
-		case VK_TIME_DOMAIN_DEVICE_EXT:
-			pTimestamps[d] = device->ws->query_value(device->ws,
-								 RADEON_TIMESTAMP);
-                        uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
-                        max_clock_period = MAX2(max_clock_period, device_period);
-			break;
-		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
-			pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
-                        max_clock_period = MAX2(max_clock_period, 1);
-			break;
+   for (d = 0; d < timestampCount; d++) {
+      switch (pTimestampInfos[d].timeDomain) {
+      case VK_TIME_DOMAIN_DEVICE_EXT:
+         pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP);
+         uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
+         max_clock_period = MAX2(max_clock_period, device_period);
+         break;
+      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+         pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
+         max_clock_period = MAX2(max_clock_period, 1);
+         break;
 
 #ifdef CLOCK_MONOTONIC_RAW
-		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
-			pTimestamps[d] = begin;
-			break;
+      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+         pTimestamps[d] = begin;
+         break;
 #endif
-		default:
-			pTimestamps[d] = 0;
-			break;
-		}
-	}
+      default:
+         pTimestamps[d] = 0;
+         break;
+      }
+   }
 
 #ifdef CLOCK_MONOTONIC_RAW
-	end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+   end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
 #else
-	end = radv_clock_gettime(CLOCK_MONOTONIC);
+   end = radv_clock_gettime(CLOCK_MONOTONIC);
 #endif
 
-        /*
-         * The maximum deviation is the sum of the interval over which we
-         * perform the sampling and the maximum period of any sampled
-         * clock. That's because the maximum skew between any two sampled
-         * clock edges is when the sampled clock with the largest period is
-         * sampled at the end of that period but right at the beginning of the
-         * sampling interval and some other clock is sampled right at the
-         * begining of its sampling period and right at the end of the
-         * sampling interval. Let's assume the GPU has the longest clock
-         * period and that the application is sampling GPU and monotonic:
-         *
-         *                               s                 e
-         *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
-         *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
-         *
-         *                               g
-         *		  0         1         2         3
-         *	GPU       -----_____-----_____-----_____-----_____
-         *
-         *                                                m
-         *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
-         *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
-         *
-         *	Interval                     <----------------->
-         *	Deviation           <-------------------------->
-         *
-         *		s  = read(raw)       2
-         *		g  = read(GPU)       1
-         *		m  = read(monotonic) 2
-         *		e  = read(raw)       b
-         *
-         * We round the sample interval up by one tick to cover sampling error
-         * in the interval clock
-         */
-
-        uint64_t sample_interval = end - begin + 1;
-
-        *pMaxDeviation = sample_interval + max_clock_period;
-
-	return VK_SUCCESS;
+   /*
+    * The maximum deviation is the sum of the interval over which we
+    * perform the sampling and the maximum period of any sampled
+    * clock. That's because the maximum skew between any two sampled
+    * clock edges is when the sampled clock with the largest period is
+    * sampled at the end of that period but right at the beginning of the
+    * sampling interval and some other clock is sampled right at the
+    * begining of its sampling period and right at the end of the
+    * sampling interval. Let's assume the GPU has the longest clock
+    * period and that the application is sampling GPU and monotonic:
+    *
+    *                               s                 e
+    *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
+    *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+    *
+    *                               g
+    *		  0         1         2         3
+    *	GPU       -----_____-----_____-----_____-----_____
+    *
+    *                                                m
+    *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
+    *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+    *
+    *	Interval                     <----------------->
+    *	Deviation           <-------------------------->
+    *
+    *		s  = read(raw)       2
+    *		g  = read(GPU)       1
+    *		m  = read(monotonic) 2
+    *		e  = read(raw)       b
+    *
+    * We round the sample interval up by one tick to cover sampling error
+    * in the interval clock
+    */
+
+   uint64_t sample_interval = end - begin + 1;
+
+   *pMaxDeviation = sample_interval + max_clock_period;
+
+   return VK_SUCCESS;
 }
 #endif
 
-void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
-    VkPhysicalDevice                            physicalDevice,
-    VkSampleCountFlagBits                       samples,
-    VkMultisamplePropertiesEXT*                 pMultisampleProperties)
-{
-	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
-	VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT |
-						  VK_SAMPLE_COUNT_4_BIT;
-
-	if (physical_device->rad_info.chip_class < GFX10)
-		supported_samples |= VK_SAMPLE_COUNT_8_BIT;
-
-	if (samples & supported_samples) {
-		pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
-	} else {
-		pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
-	}
-}
-
-VkResult radv_GetPhysicalDeviceFragmentShadingRatesKHR(
-	VkPhysicalDevice                            physicalDevice,
-	uint32_t*                                   pFragmentShadingRateCount,
-	VkPhysicalDeviceFragmentShadingRateKHR*     pFragmentShadingRates)
-{
-	VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates, pFragmentShadingRateCount);
-
-#define append_rate(w, h, s) {									\
-	VkPhysicalDeviceFragmentShadingRateKHR rate = {						\
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,\
-		.sampleCounts = s,								\
-		.fragmentSize = { .width = w, .height = h },					\
-	};											\
-	vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate;	\
-}
-
-	for (uint32_t x = 2; x >= 1; x--) {
-		for (uint32_t y = 2; y >= 1; y--) {
-			append_rate(x, y, VK_SAMPLE_COUNT_1_BIT |
-					  VK_SAMPLE_COUNT_2_BIT |
-					  VK_SAMPLE_COUNT_4_BIT |
-					  VK_SAMPLE_COUNT_8_BIT);
-		}
-	}
+void
+radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,
+                                               VkSampleCountFlagBits samples,
+                                               VkMultisamplePropertiesEXT *pMultisampleProperties)
+{
+   RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+   VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
+
+   if (physical_device->rad_info.chip_class < GFX10)
+      supported_samples |= VK_SAMPLE_COUNT_8_BIT;
+
+   if (samples & supported_samples) {
+      pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
+   } else {
+      pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
+   }
+}
+
+VkResult
+radv_GetPhysicalDeviceFragmentShadingRatesKHR(
+   VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
+   VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
+{
+   VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
+                          pFragmentShadingRateCount);
+
+#define append_rate(w, h, s)                                                                       \
+   {                                                                                               \
+      VkPhysicalDeviceFragmentShadingRateKHR rate = {                                              \
+         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,          \
+         .sampleCounts = s,                                                                        \
+         .fragmentSize = {.width = w, .height = h},                                                \
+      };                                                                                           \
+      vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate;         \
+   }
+
+   for (uint32_t x = 2; x >= 1; x--) {
+      for (uint32_t y = 2; y >= 1; y--) {
+         append_rate(x, y,
+                     VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
+                        VK_SAMPLE_COUNT_8_BIT);
+      }
+   }
 #undef append_rate
 
-	return vk_outarray_status(&out);
+   return vk_outarray_status(&out);
 }
diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
index cbb6d5e9373..925444d3fd0 100644
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -22,1999 +22,1947 @@
  * IN THE SOFTWARE.
  */
 
-#include "radv_private.h"
 #include "radv_debug.h"
+#include "radv_private.h"
 
-#include "vk_format.h"
 #include "sid.h"
+#include "vk_format.h"
 
 #include "vk_util.h"
 
 #include "drm-uapi/drm_fourcc.h"
-#include "util/half_float.h"
-#include "util/format_srgb.h"
 #include "util/format_r11g11b10f.h"
 #include "util/format_rgb9e5.h"
+#include "util/format_srgb.h"
+#include "util/half_float.h"
 #include "vulkan/util/vk_format.h"
 
-uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
-					  int first_non_void)
+uint32_t
+radv_translate_buffer_dataformat(const struct util_format_description *desc, int first_non_void)
 {
-	unsigned type;
-	int i;
-
-	assert(util_format_get_num_planes(desc->format) == 1);
-
-	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
-		return V_008F0C_BUF_DATA_FORMAT_10_11_11;
-
-	if (first_non_void < 0)
-		return V_008F0C_BUF_DATA_FORMAT_INVALID;
-	type = desc->channel[first_non_void].type;
-
-	if (type == UTIL_FORMAT_TYPE_FIXED)
-		return V_008F0C_BUF_DATA_FORMAT_INVALID;
-	if (desc->nr_channels == 4 &&
-	    desc->channel[0].size == 10 &&
-	    desc->channel[1].size == 10 &&
-	    desc->channel[2].size == 10 &&
-	    desc->channel[3].size == 2)
-		return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
-
-	/* See whether the components are of the same size. */
-	for (i = 0; i < desc->nr_channels; i++) {
-		if (desc->channel[first_non_void].size != desc->channel[i].size)
-			return V_008F0C_BUF_DATA_FORMAT_INVALID;
-	}
-
-	switch (desc->channel[first_non_void].size) {
-	case 8:
-		switch (desc->nr_channels) {
-		case 1:
-			return V_008F0C_BUF_DATA_FORMAT_8;
-		case 2:
-			return V_008F0C_BUF_DATA_FORMAT_8_8;
-		case 4:
-			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
-		}
-		break;
-	case 16:
-		switch (desc->nr_channels) {
-		case 1:
-			return V_008F0C_BUF_DATA_FORMAT_16;
-		case 2:
-			return V_008F0C_BUF_DATA_FORMAT_16_16;
-		case 4:
-			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
-		}
-		break;
-	case 32:
-		/* From the Southern Islands ISA documentation about MTBUF:
-		 * 'Memory reads of data in memory that is 32 or 64 bits do not
-		 * undergo any format conversion.'
-		 */
-		if (type != UTIL_FORMAT_TYPE_FLOAT &&
-		    !desc->channel[first_non_void].pure_integer)
-			return V_008F0C_BUF_DATA_FORMAT_INVALID;
-
-		switch (desc->nr_channels) {
-		case 1:
-			return V_008F0C_BUF_DATA_FORMAT_32;
-		case 2:
-			return V_008F0C_BUF_DATA_FORMAT_32_32;
-		case 3:
-			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
-		case 4:
-			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
-		}
-		break;
-	case 64:
-		if (desc->nr_channels == 1)
-			return V_008F0C_BUF_DATA_FORMAT_32_32;
-	}
-
-	return V_008F0C_BUF_DATA_FORMAT_INVALID;
+   unsigned type;
+   int i;
+
+   assert(util_format_get_num_planes(desc->format) == 1);
+
+   if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
+      return V_008F0C_BUF_DATA_FORMAT_10_11_11;
+
+   if (first_non_void < 0)
+      return V_008F0C_BUF_DATA_FORMAT_INVALID;
+   type = desc->channel[first_non_void].type;
+
+   if (type == UTIL_FORMAT_TYPE_FIXED)
+      return V_008F0C_BUF_DATA_FORMAT_INVALID;
+   if (desc->nr_channels == 4 && desc->channel[0].size == 10 && desc->channel[1].size == 10 &&
+       desc->channel[2].size == 10 && desc->channel[3].size == 2)
+      return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
+
+   /* See whether the components are of the same size. */
+   for (i = 0; i < desc->nr_channels; i++) {
+      if (desc->channel[first_non_void].size != desc->channel[i].size)
+         return V_008F0C_BUF_DATA_FORMAT_INVALID;
+   }
+
+   switch (desc->channel[first_non_void].size) {
+   case 8:
+      switch (desc->nr_channels) {
+      case 1:
+         return V_008F0C_BUF_DATA_FORMAT_8;
+      case 2:
+         return V_008F0C_BUF_DATA_FORMAT_8_8;
+      case 4:
+         return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
+      }
+      break;
+   case 16:
+      switch (desc->nr_channels) {
+      case 1:
+         return V_008F0C_BUF_DATA_FORMAT_16;
+      case 2:
+         return V_008F0C_BUF_DATA_FORMAT_16_16;
+      case 4:
+         return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
+      }
+      break;
+   case 32:
+      /* From the Southern Islands ISA documentation about MTBUF:
+       * 'Memory reads of data in memory that is 32 or 64 bits do not
+       * undergo any format conversion.'
+       */
+      if (type != UTIL_FORMAT_TYPE_FLOAT && !desc->channel[first_non_void].pure_integer)
+         return V_008F0C_BUF_DATA_FORMAT_INVALID;
+
+      switch (desc->nr_channels) {
+      case 1:
+         return V_008F0C_BUF_DATA_FORMAT_32;
+      case 2:
+         return V_008F0C_BUF_DATA_FORMAT_32_32;
+      case 3:
+         return V_008F0C_BUF_DATA_FORMAT_32_32_32;
+      case 4:
+         return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
+      }
+      break;
+   case 64:
+      if (desc->nr_channels == 1)
+         return V_008F0C_BUF_DATA_FORMAT_32_32;
+   }
+
+   return V_008F0C_BUF_DATA_FORMAT_INVALID;
 }
 
-uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
-					 int first_non_void)
+uint32_t
+radv_translate_buffer_numformat(const struct util_format_description *desc, int first_non_void)
 {
-	assert(util_format_get_num_planes(desc->format) == 1);
-
-	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
-		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
-
-	if (first_non_void < 0)
-		return ~0;
-
-	switch (desc->channel[first_non_void].type) {
-	case UTIL_FORMAT_TYPE_SIGNED:
-		if (desc->channel[first_non_void].normalized)
-			return V_008F0C_BUF_NUM_FORMAT_SNORM;
-		else if (desc->channel[first_non_void].pure_integer)
-			return V_008F0C_BUF_NUM_FORMAT_SINT;
-		else
-			return V_008F0C_BUF_NUM_FORMAT_SSCALED;
-		break;
-	case UTIL_FORMAT_TYPE_UNSIGNED:
-		if (desc->channel[first_non_void].normalized)
-			return V_008F0C_BUF_NUM_FORMAT_UNORM;
-		else if (desc->channel[first_non_void].pure_integer)
-			return V_008F0C_BUF_NUM_FORMAT_UINT;
-		else
-			return V_008F0C_BUF_NUM_FORMAT_USCALED;
-		break;
-	case UTIL_FORMAT_TYPE_FLOAT:
-	default:
-		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
-	}
+   assert(util_format_get_num_planes(desc->format) == 1);
+
+   if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
+      return V_008F0C_BUF_NUM_FORMAT_FLOAT;
+
+   if (first_non_void < 0)
+      return ~0;
+
+   switch (desc->channel[first_non_void].type) {
+   case UTIL_FORMAT_TYPE_SIGNED:
+      if (desc->channel[first_non_void].normalized)
+         return V_008F0C_BUF_NUM_FORMAT_SNORM;
+      else if (desc->channel[first_non_void].pure_integer)
+         return V_008F0C_BUF_NUM_FORMAT_SINT;
+      else
+         return V_008F0C_BUF_NUM_FORMAT_SSCALED;
+      break;
+   case UTIL_FORMAT_TYPE_UNSIGNED:
+      if (desc->channel[first_non_void].normalized)
+         return V_008F0C_BUF_NUM_FORMAT_UNORM;
+      else if (desc->channel[first_non_void].pure_integer)
+         return V_008F0C_BUF_NUM_FORMAT_UINT;
+      else
+         return V_008F0C_BUF_NUM_FORMAT_USCALED;
+      break;
+   case UTIL_FORMAT_TYPE_FLOAT:
+   default:
+      return V_008F0C_BUF_NUM_FORMAT_FLOAT;
+   }
 }
 
-uint32_t radv_translate_tex_dataformat(VkFormat format,
-				       const struct util_format_description *desc,
-				       int first_non_void)
+uint32_t
+radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
+                              int first_non_void)
 {
-	bool uniform = true;
-	int i;
-
-	assert(vk_format_get_plane_count(format) == 1);
-
-	if (!desc)
-		return ~0;
-	/* Colorspace (return non-RGB formats directly). */
-	switch (desc->colorspace) {
-		/* Depth stencil formats */
-	case UTIL_FORMAT_COLORSPACE_ZS:
-		switch (format) {
-		case VK_FORMAT_D16_UNORM:
-			return V_008F14_IMG_DATA_FORMAT_16;
-		case VK_FORMAT_D24_UNORM_S8_UINT:
-		case VK_FORMAT_X8_D24_UNORM_PACK32:
-			return V_008F14_IMG_DATA_FORMAT_8_24;
-		case VK_FORMAT_S8_UINT:
-			return V_008F14_IMG_DATA_FORMAT_8;
-		case VK_FORMAT_D32_SFLOAT:
-			return V_008F14_IMG_DATA_FORMAT_32;
-		case VK_FORMAT_D32_SFLOAT_S8_UINT:
-			return V_008F14_IMG_DATA_FORMAT_X24_8_32;
-		default:
-			goto out_unknown;
-		}
-
-	case UTIL_FORMAT_COLORSPACE_YUV:
-		goto out_unknown; /* TODO */
-
-	case UTIL_FORMAT_COLORSPACE_SRGB:
-		if (desc->nr_channels != 4 && desc->nr_channels != 1)
-			goto out_unknown;
-		break;
-
-	default:
-		break;
-	}
-
-	if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
-		switch(format) {
-		/* Don't ask me why this looks inverted. PAL does the same. */
-		case VK_FORMAT_G8B8G8R8_422_UNORM:
-			return V_008F14_IMG_DATA_FORMAT_BG_RG;
-		case VK_FORMAT_B8G8R8G8_422_UNORM:
-			return V_008F14_IMG_DATA_FORMAT_GB_GR;
-		default:
-			goto out_unknown;
-		}
-	}
-
-	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
-		switch(format) {
-		case VK_FORMAT_BC4_UNORM_BLOCK:
-		case VK_FORMAT_BC4_SNORM_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_BC4;
-		case VK_FORMAT_BC5_UNORM_BLOCK:
-		case VK_FORMAT_BC5_SNORM_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_BC5;
-		default:
-			break;
-		}
-	}
-
-	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
-		switch(format) {
-		case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
-		case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
-		case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
-		case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_BC1;
-		case VK_FORMAT_BC2_UNORM_BLOCK:
-		case VK_FORMAT_BC2_SRGB_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_BC2;
-		case VK_FORMAT_BC3_UNORM_BLOCK:
-		case VK_FORMAT_BC3_SRGB_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_BC3;
-		default:
-			break;
-		}
-	}
-
-	if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
-		switch(format) {
-		case VK_FORMAT_BC6H_UFLOAT_BLOCK:
-		case VK_FORMAT_BC6H_SFLOAT_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_BC6;
-		case VK_FORMAT_BC7_UNORM_BLOCK:
-		case VK_FORMAT_BC7_SRGB_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_BC7;
-		default:
-			break;
-		}
-	}
-
-	if (desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
-		switch (format) {
-		case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
-		case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
-		case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
-		case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
-		case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
-		case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
-		case VK_FORMAT_EAC_R11_UNORM_BLOCK:
-		case VK_FORMAT_EAC_R11_SNORM_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_ETC2_R;
-		case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
-		case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
-			return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
-		default:
-			break;
-		}
-	}
-
-	if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
-		return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
-	} else if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
-		return V_008F14_IMG_DATA_FORMAT_10_11_11;
-	}
-
-	/* R8G8Bx_SNORM - TODO CxV8U8 */
-
-	/* hw cannot support mixed formats (except depth/stencil, since only
-	 * depth is read).*/
-	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
-		goto out_unknown;
-
-	/* See whether the components are of the same size. */
-	for (i = 1; i < desc->nr_channels; i++) {
-		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
-	}
-
-	/* Non-uniform formats. */
-	if (!uniform) {
-		switch(desc->nr_channels) {
-		case 3:
-			if (desc->channel[0].size == 5 &&
-			    desc->channel[1].size == 6 &&
-			    desc->channel[2].size == 5) {
-				return V_008F14_IMG_DATA_FORMAT_5_6_5;
-			}
-			goto out_unknown;
-		case 4:
-			if (desc->channel[0].size == 5 &&
-			    desc->channel[1].size == 5 &&
-			    desc->channel[2].size == 5 &&
-			    desc->channel[3].size == 1) {
-				return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
-			}
-			if (desc->channel[0].size == 1 &&
-			    desc->channel[1].size == 5 &&
-			    desc->channel[2].size == 5 &&
-			    desc->channel[3].size == 5) {
-				return V_008F14_IMG_DATA_FORMAT_5_5_5_1;
-			}
-			if (desc->channel[0].size == 10 &&
-			    desc->channel[1].size == 10 &&
-			    desc->channel[2].size == 10 &&
-			    desc->channel[3].size == 2) {
-				/* Closed VK driver does this also no 2/10/10/10 snorm */
-				if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED &&
-				    desc->channel[0].normalized)
-					goto out_unknown;
-				return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
-			}
-			goto out_unknown;
-		}
-		goto out_unknown;
-	}
-
-	if (first_non_void < 0 || first_non_void > 3)
-		goto out_unknown;
-
-	/* uniform formats */
-	switch (desc->channel[first_non_void].size) {
-	case 4:
-		switch (desc->nr_channels) {
+   bool uniform = true;
+   int i;
+
+   assert(vk_format_get_plane_count(format) == 1);
+
+   if (!desc)
+      return ~0;
+   /* Colorspace (return non-RGB formats directly). */
+   switch (desc->colorspace) {
+      /* Depth stencil formats */
+   case UTIL_FORMAT_COLORSPACE_ZS:
+      switch (format) {
+      case VK_FORMAT_D16_UNORM:
+         return V_008F14_IMG_DATA_FORMAT_16;
+      case VK_FORMAT_D24_UNORM_S8_UINT:
+      case VK_FORMAT_X8_D24_UNORM_PACK32:
+         return V_008F14_IMG_DATA_FORMAT_8_24;
+      case VK_FORMAT_S8_UINT:
+         return V_008F14_IMG_DATA_FORMAT_8;
+      case VK_FORMAT_D32_SFLOAT:
+         return V_008F14_IMG_DATA_FORMAT_32;
+      case VK_FORMAT_D32_SFLOAT_S8_UINT:
+         return V_008F14_IMG_DATA_FORMAT_X24_8_32;
+      default:
+         goto out_unknown;
+      }
+
+   case UTIL_FORMAT_COLORSPACE_YUV:
+      goto out_unknown; /* TODO */
+
+   case UTIL_FORMAT_COLORSPACE_SRGB:
+      if (desc->nr_channels != 4 && desc->nr_channels != 1)
+         goto out_unknown;
+      break;
+
+   default:
+      break;
+   }
+
+   if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+      switch (format) {
+      /* Don't ask me why this looks inverted. PAL does the same. */
+      case VK_FORMAT_G8B8G8R8_422_UNORM:
+         return V_008F14_IMG_DATA_FORMAT_BG_RG;
+      case VK_FORMAT_B8G8R8G8_422_UNORM:
+         return V_008F14_IMG_DATA_FORMAT_GB_GR;
+      default:
+         goto out_unknown;
+      }
+   }
+
+   if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+      switch (format) {
+      case VK_FORMAT_BC4_UNORM_BLOCK:
+      case VK_FORMAT_BC4_SNORM_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_BC4;
+      case VK_FORMAT_BC5_UNORM_BLOCK:
+      case VK_FORMAT_BC5_SNORM_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_BC5;
+      default:
+         break;
+      }
+   }
+
+   if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+      switch (format) {
+      case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
+      case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+      case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+      case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_BC1;
+      case VK_FORMAT_BC2_UNORM_BLOCK:
+      case VK_FORMAT_BC2_SRGB_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_BC2;
+      case VK_FORMAT_BC3_UNORM_BLOCK:
+      case VK_FORMAT_BC3_SRGB_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_BC3;
+      default:
+         break;
+      }
+   }
+
+   if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
+      switch (format) {
+      case VK_FORMAT_BC6H_UFLOAT_BLOCK:
+      case VK_FORMAT_BC6H_SFLOAT_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_BC6;
+      case VK_FORMAT_BC7_UNORM_BLOCK:
+      case VK_FORMAT_BC7_SRGB_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_BC7;
+      default:
+         break;
+      }
+   }
+
+   if (desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
+      switch (format) {
+      case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
+      case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
+      case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
+      case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
+      case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
+      case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
+      case VK_FORMAT_EAC_R11_UNORM_BLOCK:
+      case VK_FORMAT_EAC_R11_SNORM_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_ETC2_R;
+      case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
+      case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
+         return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
+      default:
+         break;
+      }
+   }
+
+   if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+      return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
+   } else if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
+      return V_008F14_IMG_DATA_FORMAT_10_11_11;
+   }
+
+   /* R8G8Bx_SNORM - TODO CxV8U8 */
+
+   /* hw cannot support mixed formats (except depth/stencil, since only
+    * depth is read).*/
+   if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+      goto out_unknown;
+
+   /* See whether the components are of the same size. */
+   for (i = 1; i < desc->nr_channels; i++) {
+      uniform = uniform && desc->channel[0].size == desc->channel[i].size;
+   }
+
+   /* Non-uniform formats. */
+   if (!uniform) {
+      switch (desc->nr_channels) {
+      case 3:
+         if (desc->channel[0].size == 5 && desc->channel[1].size == 6 &&
+             desc->channel[2].size == 5) {
+            return V_008F14_IMG_DATA_FORMAT_5_6_5;
+         }
+         goto out_unknown;
+      case 4:
+         if (desc->channel[0].size == 5 && desc->channel[1].size == 5 &&
+             desc->channel[2].size == 5 && desc->channel[3].size == 1) {
+            return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
+         }
+         if (desc->channel[0].size == 1 && desc->channel[1].size == 5 &&
+             desc->channel[2].size == 5 && desc->channel[3].size == 5) {
+            return V_008F14_IMG_DATA_FORMAT_5_5_5_1;
+         }
+         if (desc->channel[0].size == 10 && desc->channel[1].size == 10 &&
+             desc->channel[2].size == 10 && desc->channel[3].size == 2) {
+            /* Closed VK driver does this also no 2/10/10/10 snorm */
+            if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && desc->channel[0].normalized)
+               goto out_unknown;
+            return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
+         }
+         goto out_unknown;
+      }
+      goto out_unknown;
+   }
+
+   if (first_non_void < 0 || first_non_void > 3)
+      goto out_unknown;
+
+   /* uniform formats */
+   switch (desc->channel[first_non_void].size) {
+   case 4:
+      switch (desc->nr_channels) {
 #if 0 /* Not supported for render targets */
 		case 2:
 			return V_008F14_IMG_DATA_FORMAT_4_4;
 #endif
-		case 4:
-			return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
-		}
-		break;
-	case 8:
-		switch (desc->nr_channels) {
-		case 1:
-			return V_008F14_IMG_DATA_FORMAT_8;
-		case 2:
-			return V_008F14_IMG_DATA_FORMAT_8_8;
-		case 4:
-			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
-		}
-		break;
-	case 16:
-		switch (desc->nr_channels) {
-		case 1:
-			return V_008F14_IMG_DATA_FORMAT_16;
-		case 2:
-			return V_008F14_IMG_DATA_FORMAT_16_16;
-		case 4:
-			return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
-		}
-		break;
-	case 32:
-		switch (desc->nr_channels) {
-		case 1:
-			return V_008F14_IMG_DATA_FORMAT_32;
-		case 2:
-			return V_008F14_IMG_DATA_FORMAT_32_32;
-		case 3:
-			return V_008F14_IMG_DATA_FORMAT_32_32_32;
-		case 4:
-			return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
-		}
-		break;
-	case 64:
-		if (desc->nr_channels == 1)
-			return V_008F14_IMG_DATA_FORMAT_32_32;
-		break;
-	}
+      case 4:
+         return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
+      }
+      break;
+   case 8:
+      switch (desc->nr_channels) {
+      case 1:
+         return V_008F14_IMG_DATA_FORMAT_8;
+      case 2:
+         return V_008F14_IMG_DATA_FORMAT_8_8;
+      case 4:
+         return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
+      }
+      break;
+   case 16:
+      switch (desc->nr_channels) {
+      case 1:
+         return V_008F14_IMG_DATA_FORMAT_16;
+      case 2:
+         return V_008F14_IMG_DATA_FORMAT_16_16;
+      case 4:
+         return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
+      }
+      break;
+   case 32:
+      switch (desc->nr_channels) {
+      case 1:
+         return V_008F14_IMG_DATA_FORMAT_32;
+      case 2:
+         return V_008F14_IMG_DATA_FORMAT_32_32;
+      case 3:
+         return V_008F14_IMG_DATA_FORMAT_32_32_32;
+      case 4:
+         return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
+      }
+      break;
+   case 64:
+      if (desc->nr_channels == 1)
+         return V_008F14_IMG_DATA_FORMAT_32_32;
+      break;
+   }
 
 out_unknown:
-	/* R600_ERR("Unable to handle texformat %d %s\n", format, vk_format_name(format)); */
-	return ~0;
+   /* R600_ERR("Unable to handle texformat %d %s\n", format, vk_format_name(format)); */
+   return ~0;
 }
 
-uint32_t radv_translate_tex_numformat(VkFormat format,
-				      const struct util_format_description *desc,
-				      int first_non_void)
+uint32_t
+radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
+                             int first_non_void)
 {
-	assert(vk_format_get_plane_count(format) == 1);
-
-	switch (format) {
-	case VK_FORMAT_D24_UNORM_S8_UINT:
-		return V_008F14_IMG_NUM_FORMAT_UNORM;
-	default:
-		if (first_non_void < 0) {
-			if (vk_format_is_compressed(format)) {
-				switch (format) {
-				case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
-				case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
-				case VK_FORMAT_BC2_SRGB_BLOCK:
-				case VK_FORMAT_BC3_SRGB_BLOCK:
-				case VK_FORMAT_BC7_SRGB_BLOCK:
-				case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
-				case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
-				case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
-					return V_008F14_IMG_NUM_FORMAT_SRGB;
-				case VK_FORMAT_BC4_SNORM_BLOCK:
-				case VK_FORMAT_BC5_SNORM_BLOCK:
-			        case VK_FORMAT_BC6H_SFLOAT_BLOCK:
-				case VK_FORMAT_EAC_R11_SNORM_BLOCK:
-				case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
-					return V_008F14_IMG_NUM_FORMAT_SNORM;
-				default:
-					return V_008F14_IMG_NUM_FORMAT_UNORM;
-				}
-			} else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
-				return V_008F14_IMG_NUM_FORMAT_UNORM;
-			} else {
-				return V_008F14_IMG_NUM_FORMAT_FLOAT;
-			}
-		} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
-			return V_008F14_IMG_NUM_FORMAT_SRGB;
-		} else {
-			switch (desc->channel[first_non_void].type) {
-			case UTIL_FORMAT_TYPE_FLOAT:
-				return V_008F14_IMG_NUM_FORMAT_FLOAT;
-			case UTIL_FORMAT_TYPE_SIGNED:
-				if (desc->channel[first_non_void].normalized)
-					return V_008F14_IMG_NUM_FORMAT_SNORM;
-				else if (desc->channel[first_non_void].pure_integer)
-					return V_008F14_IMG_NUM_FORMAT_SINT;
-				else
-					return V_008F14_IMG_NUM_FORMAT_SSCALED;
-			case UTIL_FORMAT_TYPE_UNSIGNED:
-				if (desc->channel[first_non_void].normalized)
-					return V_008F14_IMG_NUM_FORMAT_UNORM;
-				else if (desc->channel[first_non_void].pure_integer)
-					return V_008F14_IMG_NUM_FORMAT_UINT;
-				else
-					return V_008F14_IMG_NUM_FORMAT_USCALED;
-			default:
-				return V_008F14_IMG_NUM_FORMAT_UNORM;
-			}
-		}
-	}
+   assert(vk_format_get_plane_count(format) == 1);
+
+   switch (format) {
+   case VK_FORMAT_D24_UNORM_S8_UINT:
+      return V_008F14_IMG_NUM_FORMAT_UNORM;
+   default:
+      if (first_non_void < 0) {
+         if (vk_format_is_compressed(format)) {
+            switch (format) {
+            case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+            case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+            case VK_FORMAT_BC2_SRGB_BLOCK:
+            case VK_FORMAT_BC3_SRGB_BLOCK:
+            case VK_FORMAT_BC7_SRGB_BLOCK:
+            case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+            case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+            case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+               return V_008F14_IMG_NUM_FORMAT_SRGB;
+            case VK_FORMAT_BC4_SNORM_BLOCK:
+            case VK_FORMAT_BC5_SNORM_BLOCK:
+            case VK_FORMAT_BC6H_SFLOAT_BLOCK:
+            case VK_FORMAT_EAC_R11_SNORM_BLOCK:
+            case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
+               return V_008F14_IMG_NUM_FORMAT_SNORM;
+            default:
+               return V_008F14_IMG_NUM_FORMAT_UNORM;
+            }
+         } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+            return V_008F14_IMG_NUM_FORMAT_UNORM;
+         } else {
+            return V_008F14_IMG_NUM_FORMAT_FLOAT;
+         }
+      } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+         return V_008F14_IMG_NUM_FORMAT_SRGB;
+      } else {
+         switch (desc->channel[first_non_void].type) {
+         case UTIL_FORMAT_TYPE_FLOAT:
+            return V_008F14_IMG_NUM_FORMAT_FLOAT;
+         case UTIL_FORMAT_TYPE_SIGNED:
+            if (desc->channel[first_non_void].normalized)
+               return V_008F14_IMG_NUM_FORMAT_SNORM;
+            else if (desc->channel[first_non_void].pure_integer)
+               return V_008F14_IMG_NUM_FORMAT_SINT;
+            else
+               return V_008F14_IMG_NUM_FORMAT_SSCALED;
+         case UTIL_FORMAT_TYPE_UNSIGNED:
+            if (desc->channel[first_non_void].normalized)
+               return V_008F14_IMG_NUM_FORMAT_UNORM;
+            else if (desc->channel[first_non_void].pure_integer)
+               return V_008F14_IMG_NUM_FORMAT_UINT;
+            else
+               return V_008F14_IMG_NUM_FORMAT_USCALED;
+         default:
+            return V_008F14_IMG_NUM_FORMAT_UNORM;
+         }
+      }
+   }
 }
 
-uint32_t radv_translate_color_numformat(VkFormat format,
-					const struct util_format_description *desc,
-					int first_non_void)
+uint32_t
+radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
+                               int first_non_void)
 {
-	unsigned ntype;
-
-	assert(vk_format_get_plane_count(format) == 1);
-
-	if (first_non_void == -1 || desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_FLOAT)
-		ntype = V_028C70_NUMBER_FLOAT;
-	else {
-		ntype = V_028C70_NUMBER_UNORM;
-		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
-			ntype = V_028C70_NUMBER_SRGB;
-		else if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_SIGNED) {
-			if (desc->channel[first_non_void].pure_integer) {
-				ntype = V_028C70_NUMBER_SINT;
-			} else if (desc->channel[first_non_void].normalized) {
-				ntype = V_028C70_NUMBER_SNORM;
-			} else
-				ntype = ~0u;
-		} else if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED) {
-			if (desc->channel[first_non_void].pure_integer) {
-				ntype = V_028C70_NUMBER_UINT;
-			} else if (desc->channel[first_non_void].normalized) {
-				ntype = V_028C70_NUMBER_UNORM;
-			} else
-				ntype = ~0u;
-		}
-	}
-	return ntype;
+   unsigned ntype;
+
+   assert(vk_format_get_plane_count(format) == 1);
+
+   if (first_non_void == -1 || desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_FLOAT)
+      ntype = V_028C70_NUMBER_FLOAT;
+   else {
+      ntype = V_028C70_NUMBER_UNORM;
+      if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+         ntype = V_028C70_NUMBER_SRGB;
+      else if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_SIGNED) {
+         if (desc->channel[first_non_void].pure_integer) {
+            ntype = V_028C70_NUMBER_SINT;
+         } else if (desc->channel[first_non_void].normalized) {
+            ntype = V_028C70_NUMBER_SNORM;
+         } else
+            ntype = ~0u;
+      } else if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         if (desc->channel[first_non_void].pure_integer) {
+            ntype = V_028C70_NUMBER_UINT;
+         } else if (desc->channel[first_non_void].normalized) {
+            ntype = V_028C70_NUMBER_UNORM;
+         } else
+            ntype = ~0u;
+      }
+   }
+   return ntype;
 }
 
-static bool radv_is_sampler_format_supported(VkFormat format, bool *linear_sampling)
+static bool
+radv_is_sampler_format_supported(VkFormat format, bool *linear_sampling)
 {
-	const struct util_format_description *desc = vk_format_description(format);
-	uint32_t num_format;
-	if (!desc || format == VK_FORMAT_UNDEFINED ||
-	    format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT ||
-	    format == VK_FORMAT_R64_SFLOAT)
-		return false;
-	num_format = radv_translate_tex_numformat(format, desc,
-						  vk_format_get_first_non_void_channel(format));
-
-	if (num_format == V_008F14_IMG_NUM_FORMAT_USCALED ||
-	    num_format == V_008F14_IMG_NUM_FORMAT_SSCALED)
-		return false;
-
-	if (num_format == V_008F14_IMG_NUM_FORMAT_UNORM ||
-	    num_format == V_008F14_IMG_NUM_FORMAT_SNORM ||
-	    num_format == V_008F14_IMG_NUM_FORMAT_FLOAT ||
-	    num_format == V_008F14_IMG_NUM_FORMAT_SRGB)
-		*linear_sampling = true;
-	else
-		*linear_sampling = false;
-	return radv_translate_tex_dataformat(format, vk_format_description(format),
-					     vk_format_get_first_non_void_channel(format)) != ~0U;
+   const struct util_format_description *desc = vk_format_description(format);
+   uint32_t num_format;
+   if (!desc || format == VK_FORMAT_UNDEFINED || format == VK_FORMAT_R64_UINT ||
+       format == VK_FORMAT_R64_SINT || format == VK_FORMAT_R64_SFLOAT)
+      return false;
+   num_format =
+      radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format));
+
+   if (num_format == V_008F14_IMG_NUM_FORMAT_USCALED ||
+       num_format == V_008F14_IMG_NUM_FORMAT_SSCALED)
+      return false;
+
+   if (num_format == V_008F14_IMG_NUM_FORMAT_UNORM || num_format == V_008F14_IMG_NUM_FORMAT_SNORM ||
+       num_format == V_008F14_IMG_NUM_FORMAT_FLOAT || num_format == V_008F14_IMG_NUM_FORMAT_SRGB)
+      *linear_sampling = true;
+   else
+      *linear_sampling = false;
+   return radv_translate_tex_dataformat(format, vk_format_description(format),
+                                        vk_format_get_first_non_void_channel(format)) != ~0U;
 }
 
 bool
 radv_is_atomic_format_supported(VkFormat format)
 {
-	return format == VK_FORMAT_R32_UINT ||
-		format == VK_FORMAT_R32_SINT ||
-		format == VK_FORMAT_R32_SFLOAT ||
-		format == VK_FORMAT_R64_UINT ||
-		format == VK_FORMAT_R64_SINT;
+   return format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT ||
+          format == VK_FORMAT_R32_SFLOAT || format == VK_FORMAT_R64_UINT ||
+          format == VK_FORMAT_R64_SINT;
 }
 
-static bool radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
-						   VkFormat format)
+static bool
+radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
+                                       VkFormat format)
 {
-	const struct util_format_description *desc = vk_format_description(format);
-	unsigned data_format, num_format;
-	if (!desc || format == VK_FORMAT_UNDEFINED)
-		return false;
-
-	data_format = radv_translate_tex_dataformat(format, desc,
-						    vk_format_get_first_non_void_channel(format));
-	num_format = radv_translate_tex_numformat(format, desc,
-						  vk_format_get_first_non_void_channel(format));
-
-	if(data_format == ~0 || num_format == ~0)
-		return false;
-
-	/* Extracted from the GCN3 ISA document. */
-	switch(num_format) {
-	case V_008F14_IMG_NUM_FORMAT_UNORM:
-	case V_008F14_IMG_NUM_FORMAT_SNORM:
-	case V_008F14_IMG_NUM_FORMAT_UINT:
-	case V_008F14_IMG_NUM_FORMAT_SINT:
-	case V_008F14_IMG_NUM_FORMAT_FLOAT:
-		break;
-	default:
-		return false;
-	}
-
-	switch(data_format) {
-	case V_008F14_IMG_DATA_FORMAT_8:
-	case V_008F14_IMG_DATA_FORMAT_16:
-	case V_008F14_IMG_DATA_FORMAT_8_8:
-	case V_008F14_IMG_DATA_FORMAT_32:
-	case V_008F14_IMG_DATA_FORMAT_16_16:
-	case V_008F14_IMG_DATA_FORMAT_10_11_11:
-	case V_008F14_IMG_DATA_FORMAT_11_11_10:
-	case V_008F14_IMG_DATA_FORMAT_10_10_10_2:
-	case V_008F14_IMG_DATA_FORMAT_2_10_10_10:
-	case V_008F14_IMG_DATA_FORMAT_8_8_8_8:
-	case V_008F14_IMG_DATA_FORMAT_32_32:
-	case V_008F14_IMG_DATA_FORMAT_16_16_16_16:
-	case V_008F14_IMG_DATA_FORMAT_32_32_32_32:
-	case V_008F14_IMG_DATA_FORMAT_5_6_5:
-	case V_008F14_IMG_DATA_FORMAT_1_5_5_5:
-	case V_008F14_IMG_DATA_FORMAT_5_5_5_1:
-	case V_008F14_IMG_DATA_FORMAT_4_4_4_4:
-		/* TODO: FMASK formats. */
-		return true;
-	default:
-		return false;
-	}
+   const struct util_format_description *desc = vk_format_description(format);
+   unsigned data_format, num_format;
+   if (!desc || format == VK_FORMAT_UNDEFINED)
+      return false;
+
+   data_format =
+      radv_translate_tex_dataformat(format, desc, vk_format_get_first_non_void_channel(format));
+   num_format =
+      radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format));
+
+   if (data_format == ~0 || num_format == ~0)
+      return false;
+
+   /* Extracted from the GCN3 ISA document. */
+   switch (num_format) {
+   case V_008F14_IMG_NUM_FORMAT_UNORM:
+   case V_008F14_IMG_NUM_FORMAT_SNORM:
+   case V_008F14_IMG_NUM_FORMAT_UINT:
+   case V_008F14_IMG_NUM_FORMAT_SINT:
+   case V_008F14_IMG_NUM_FORMAT_FLOAT:
+      break;
+   default:
+      return false;
+   }
+
+   switch (data_format) {
+   case V_008F14_IMG_DATA_FORMAT_8:
+   case V_008F14_IMG_DATA_FORMAT_16:
+   case V_008F14_IMG_DATA_FORMAT_8_8:
+   case V_008F14_IMG_DATA_FORMAT_32:
+   case V_008F14_IMG_DATA_FORMAT_16_16:
+   case V_008F14_IMG_DATA_FORMAT_10_11_11:
+   case V_008F14_IMG_DATA_FORMAT_11_11_10:
+   case V_008F14_IMG_DATA_FORMAT_10_10_10_2:
+   case V_008F14_IMG_DATA_FORMAT_2_10_10_10:
+   case V_008F14_IMG_DATA_FORMAT_8_8_8_8:
+   case V_008F14_IMG_DATA_FORMAT_32_32:
+   case V_008F14_IMG_DATA_FORMAT_16_16_16_16:
+   case V_008F14_IMG_DATA_FORMAT_32_32_32_32:
+   case V_008F14_IMG_DATA_FORMAT_5_6_5:
+   case V_008F14_IMG_DATA_FORMAT_1_5_5_5:
+   case V_008F14_IMG_DATA_FORMAT_5_5_5_1:
+   case V_008F14_IMG_DATA_FORMAT_4_4_4_4:
+      /* TODO: FMASK formats. */
+      return true;
+   default:
+      return false;
+   }
 }
 
-bool radv_is_buffer_format_supported(VkFormat format, bool *scaled)
+bool
+radv_is_buffer_format_supported(VkFormat format, bool *scaled)
 {
-	const struct util_format_description *desc = vk_format_description(format);
-	unsigned data_format, num_format;
-	if (!desc || format == VK_FORMAT_UNDEFINED)
-		return false;
-
-	data_format = radv_translate_buffer_dataformat(desc,
-						       vk_format_get_first_non_void_channel(format));
-	num_format = radv_translate_buffer_numformat(desc,
-						     vk_format_get_first_non_void_channel(format));
-
-	if (scaled)
-		*scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED);
-	return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID &&
-		num_format != ~0;
+   const struct util_format_description *desc = vk_format_description(format);
+   unsigned data_format, num_format;
+   if (!desc || format == VK_FORMAT_UNDEFINED)
+      return false;
+
+   data_format =
+      radv_translate_buffer_dataformat(desc, vk_format_get_first_non_void_channel(format));
+   num_format = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format));
+
+   if (scaled)
+      *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) ||
+                (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED);
+   return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID && num_format != ~0;
 }
 
-bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
-                                          VkFormat format, bool *blendable)
+bool
+radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, VkFormat format,
+                                     bool *blendable)
 {
-	const struct util_format_description *desc = vk_format_description(format);
-	uint32_t color_format = radv_translate_colorformat(format);
-	uint32_t color_swap = radv_translate_colorswap(format, false);
-	uint32_t color_num_format = radv_translate_color_numformat(format,
-								   desc,
-								   vk_format_get_first_non_void_channel(format));
-
-	if (color_num_format == V_028C70_NUMBER_UINT || color_num_format == V_028C70_NUMBER_SINT ||
-	    color_format == V_028C70_COLOR_8_24 || color_format == V_028C70_COLOR_24_8 ||
-	    color_format == V_028C70_COLOR_X24_8_32_FLOAT) {
-		*blendable = false;
-	} else
-		*blendable = true;
-
-	if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 && pdevice->rad_info.chip_class < GFX10_3)
-		return false;
-
-	return color_format != V_028C70_COLOR_INVALID &&
-		color_swap != ~0U &&
-		color_num_format != ~0;
+   const struct util_format_description *desc = vk_format_description(format);
+   uint32_t color_format = radv_translate_colorformat(format);
+   uint32_t color_swap = radv_translate_colorswap(format, false);
+   uint32_t color_num_format =
+      radv_translate_color_numformat(format, desc, vk_format_get_first_non_void_channel(format));
+
+   if (color_num_format == V_028C70_NUMBER_UINT || color_num_format == V_028C70_NUMBER_SINT ||
+       color_format == V_028C70_COLOR_8_24 || color_format == V_028C70_COLOR_24_8 ||
+       color_format == V_028C70_COLOR_X24_8_32_FLOAT) {
+      *blendable = false;
+   } else
+      *blendable = true;
+
+   if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 && pdevice->rad_info.chip_class < GFX10_3)
+      return false;
+
+   return color_format != V_028C70_COLOR_INVALID && color_swap != ~0U && color_num_format != ~0;
 }
 
-static bool radv_is_zs_format_supported(VkFormat format)
+static bool
+radv_is_zs_format_supported(VkFormat format)
 {
-	return radv_translate_dbformat(format) != V_028040_Z_INVALID || format == VK_FORMAT_S8_UINT;
+   return radv_translate_dbformat(format) != V_028040_Z_INVALID || format == VK_FORMAT_S8_UINT;
 }
 
-static bool radv_is_filter_minmax_format_supported(VkFormat format)
+static bool
+radv_is_filter_minmax_format_supported(VkFormat format)
 {
-	/* From the Vulkan spec 1.1.71:
-	 *
-	 * "The following formats must support the
-	 *  VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT feature with
-	 *  VK_IMAGE_TILING_OPTIMAL, if they support
-	 *  VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT."
-	 */
-	/* TODO: enable more formats. */
-	switch (format) {
-	case VK_FORMAT_R8_UNORM:
-	case VK_FORMAT_R8_SNORM:
-	case VK_FORMAT_R16_UNORM:
-	case VK_FORMAT_R16_SNORM:
-	case VK_FORMAT_R16_SFLOAT:
-	case VK_FORMAT_R32_SFLOAT:
-	case VK_FORMAT_D16_UNORM:
-	case VK_FORMAT_X8_D24_UNORM_PACK32:
-	case VK_FORMAT_D32_SFLOAT:
-	case VK_FORMAT_D16_UNORM_S8_UINT:
-	case VK_FORMAT_D24_UNORM_S8_UINT:
-	case VK_FORMAT_D32_SFLOAT_S8_UINT:
-		return true;
-	default:
-		return false;
-	}
+   /* From the Vulkan spec 1.1.71:
+    *
+    * "The following formats must support the
+    *  VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT feature with
+    *  VK_IMAGE_TILING_OPTIMAL, if they support
+    *  VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT."
+    */
+   /* TODO: enable more formats. */
+   switch (format) {
+   case VK_FORMAT_R8_UNORM:
+   case VK_FORMAT_R8_SNORM:
+   case VK_FORMAT_R16_UNORM:
+   case VK_FORMAT_R16_SNORM:
+   case VK_FORMAT_R16_SFLOAT:
+   case VK_FORMAT_R32_SFLOAT:
+   case VK_FORMAT_D16_UNORM:
+   case VK_FORMAT_X8_D24_UNORM_PACK32:
+   case VK_FORMAT_D32_SFLOAT:
+   case VK_FORMAT_D16_UNORM_S8_UINT:
+   case VK_FORMAT_D24_UNORM_S8_UINT:
+   case VK_FORMAT_D32_SFLOAT_S8_UINT:
+      return true;
+   default:
+      return false;
+   }
 }
 
 bool
 radv_device_supports_etc(struct radv_physical_device *physical_device)
 {
-	return physical_device->rad_info.family == CHIP_VEGA10 ||
-	       physical_device->rad_info.family == CHIP_RAVEN ||
-	       physical_device->rad_info.family == CHIP_RAVEN2 ||
-	       physical_device->rad_info.family == CHIP_STONEY;
+   return physical_device->rad_info.family == CHIP_VEGA10 ||
+          physical_device->rad_info.family == CHIP_RAVEN ||
+          physical_device->rad_info.family == CHIP_RAVEN2 ||
+          physical_device->rad_info.family == CHIP_STONEY;
 }
 
 static void
 radv_physical_device_get_format_properties(struct radv_physical_device *physical_device,
-					   VkFormat format,
-					   VkFormatProperties *out_properties)
+                                           VkFormat format, VkFormatProperties *out_properties)
 {
-	VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0;
-	const struct util_format_description *desc = vk_format_description(format);
-	bool blendable;
-	bool scaled = false;
-	/* TODO: implement some software emulation of SUBSAMPLED formats. */
-	if (!desc || vk_format_to_pipe_format(format) == PIPE_FORMAT_NONE ||
-	    desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
-		out_properties->linearTilingFeatures = linear;
-		out_properties->optimalTilingFeatures = tiled;
-		out_properties->bufferFeatures = buffer;
-		return;
-	}
-
-	if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
-	    !radv_device_supports_etc(physical_device)) {
-		out_properties->linearTilingFeatures = linear;
-		out_properties->optimalTilingFeatures = tiled;
-		out_properties->bufferFeatures = buffer;
-		return;
-	}
-
-	if (vk_format_get_plane_count(format) > 1 ||
-	    desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
-		uint32_t tiling = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
-		                  VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
-		                  VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
-		                  VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
-		                  VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
-
-		/* The subsampled formats have no support for linear filters. */
-		if (desc->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
-			tiling |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT;
-		}
-
-		/* Fails for unknown reasons with linear tiling & subsampled formats. */
-		out_properties->linearTilingFeatures = desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED ? 0 : tiling;
-		out_properties->optimalTilingFeatures = tiling;
-		out_properties->bufferFeatures = 0;
-		return;
-	}
-
-	if (radv_is_storage_image_format_supported(physical_device, format)) {
-		tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
-		linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
-	}
-
-	if (radv_is_buffer_format_supported(format, &scaled)) {
-		if (format != VK_FORMAT_R64_UINT && format != VK_FORMAT_R64_SINT) {
-			buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
-			if (!scaled)
-				buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
-		}
-		buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
-	}
-
-	if (vk_format_is_depth_or_stencil(format)) {
-		if (radv_is_zs_format_supported(format)) {
-			tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
-			tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
-			tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
-			         VK_FORMAT_FEATURE_BLIT_DST_BIT;
-			tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
-			         VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
-
-			if (radv_is_filter_minmax_format_supported(format))
-				 tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
-
-			if (vk_format_has_depth(format))
-				tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
-
-			/* Don't support blitting surfaces with depth/stencil. */
-			if (vk_format_has_depth(format) && vk_format_has_stencil(format))
-				tiled &= ~VK_FORMAT_FEATURE_BLIT_DST_BIT;
-
-			/* Don't support linear depth surfaces */
-			linear = 0;
-		}
-	} else {
-		bool linear_sampling;
-		if (radv_is_sampler_format_supported(format, &linear_sampling)) {
-			linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
-				VK_FORMAT_FEATURE_BLIT_SRC_BIT;
-			tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
-				VK_FORMAT_FEATURE_BLIT_SRC_BIT;
-
-			if (radv_is_filter_minmax_format_supported(format))
-				 tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
-
-			if (linear_sampling) {
-				linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
-				tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
-			}
-
-			/* Don't support blitting for R32G32B32 formats. */
-			if (format == VK_FORMAT_R32G32B32_SFLOAT ||
-			    format == VK_FORMAT_R32G32B32_UINT ||
-			    format == VK_FORMAT_R32G32B32_SINT) {
-				linear &= ~VK_FORMAT_FEATURE_BLIT_SRC_BIT;
-			}
-		}
-		if (radv_is_colorbuffer_format_supported(physical_device, format, &blendable)) {
-			linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
-			tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
-			if (blendable) {
-				linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
-				tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
-			}
-		}
-		if (tiled && !scaled) {
-			tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
-			         VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
-		}
-
-		/* Tiled formatting does not support NPOT pixel sizes */
-		if (!util_is_power_of_two_or_zero(vk_format_get_blocksize(format)))
-			tiled = 0;
-	}
-
-	if (linear && !scaled) {
-		linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
-		          VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
-	}
-
-	if (radv_is_atomic_format_supported(format)) {
-		buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
-		linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
-		tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
-	}
-
-	switch(format) {
-	case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
-	case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
-	case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
-	case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
-	case VK_FORMAT_A2R10G10B10_SINT_PACK32:
-	case VK_FORMAT_A2B10G10R10_SINT_PACK32:
-		buffer &= ~(VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT |
-			    VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT);
-		linear = 0;
-		tiled = 0;
-		break;
-	default:
-		break;
-	}
-
-	/* addrlib does not support linear compressed textures. */
-	if (vk_format_is_compressed(format))
-		linear = 0;
-
-	out_properties->linearTilingFeatures = linear;
-	out_properties->optimalTilingFeatures = tiled;
-	out_properties->bufferFeatures = buffer;
+   VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0;
+   const struct util_format_description *desc = vk_format_description(format);
+   bool blendable;
+   bool scaled = false;
+   /* TODO: implement some software emulation of SUBSAMPLED formats. */
+   if (!desc || vk_format_to_pipe_format(format) == PIPE_FORMAT_NONE ||
+       desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+      out_properties->linearTilingFeatures = linear;
+      out_properties->optimalTilingFeatures = tiled;
+      out_properties->bufferFeatures = buffer;
+      return;
+   }
+
+   if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && !radv_device_supports_etc(physical_device)) {
+      out_properties->linearTilingFeatures = linear;
+      out_properties->optimalTilingFeatures = tiled;
+      out_properties->bufferFeatures = buffer;
+      return;
+   }
+
+   if (vk_format_get_plane_count(format) > 1 || desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+      uint32_t tiling = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
+                        VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
+                        VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
+                        VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
+
+      /* The subsampled formats have no support for linear filters. */
+      if (desc->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+         tiling |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT;
+      }
+
+      /* Fails for unknown reasons with linear tiling & subsampled formats. */
+      out_properties->linearTilingFeatures =
+         desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED ? 0 : tiling;
+      out_properties->optimalTilingFeatures = tiling;
+      out_properties->bufferFeatures = 0;
+      return;
+   }
+
+   if (radv_is_storage_image_format_supported(physical_device, format)) {
+      tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
+      linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
+   }
+
+   if (radv_is_buffer_format_supported(format, &scaled)) {
+      if (format != VK_FORMAT_R64_UINT && format != VK_FORMAT_R64_SINT) {
+         buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
+         if (!scaled)
+            buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
+      }
+      buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
+   }
+
+   if (vk_format_is_depth_or_stencil(format)) {
+      if (radv_is_zs_format_supported(format)) {
+         tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
+         tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
+         tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+         tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
+
+         if (radv_is_filter_minmax_format_supported(format))
+            tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
+
+         if (vk_format_has_depth(format))
+            tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+
+         /* Don't support blitting surfaces with depth/stencil. */
+         if (vk_format_has_depth(format) && vk_format_has_stencil(format))
+            tiled &= ~VK_FORMAT_FEATURE_BLIT_DST_BIT;
+
+         /* Don't support linear depth surfaces */
+         linear = 0;
+      }
+   } else {
+      bool linear_sampling;
+      if (radv_is_sampler_format_supported(format, &linear_sampling)) {
+         linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+         tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+
+         if (radv_is_filter_minmax_format_supported(format))
+            tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
+
+         if (linear_sampling) {
+            linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+            tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+         }
+
+         /* Don't support blitting for R32G32B32 formats. */
+         if (format == VK_FORMAT_R32G32B32_SFLOAT || format == VK_FORMAT_R32G32B32_UINT ||
+             format == VK_FORMAT_R32G32B32_SINT) {
+            linear &= ~VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+         }
+      }
+      if (radv_is_colorbuffer_format_supported(physical_device, format, &blendable)) {
+         linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+         tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+         if (blendable) {
+            linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
+            tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
+         }
+      }
+      if (tiled && !scaled) {
+         tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
+      }
+
+      /* Tiled formatting does not support NPOT pixel sizes */
+      if (!util_is_power_of_two_or_zero(vk_format_get_blocksize(format)))
+         tiled = 0;
+   }
+
+   if (linear && !scaled) {
+      linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
+   }
+
+   if (radv_is_atomic_format_supported(format)) {
+      buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
+      linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
+      tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
+   }
+
+   switch (format) {
+   case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+   case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
+   case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
+   case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
+   case VK_FORMAT_A2R10G10B10_SINT_PACK32:
+   case VK_FORMAT_A2B10G10R10_SINT_PACK32:
+      buffer &=
+         ~(VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT);
+      linear = 0;
+      tiled = 0;
+      break;
+   default:
+      break;
+   }
+
+   /* addrlib does not support linear compressed textures. */
+   if (vk_format_is_compressed(format))
+      linear = 0;
+
+   out_properties->linearTilingFeatures = linear;
+   out_properties->optimalTilingFeatures = tiled;
+   out_properties->bufferFeatures = buffer;
 }
 
-uint32_t radv_translate_colorformat(VkFormat format)
+uint32_t
+radv_translate_colorformat(VkFormat format)
 {
-	const struct util_format_description *desc = vk_format_description(format);
-
-#define HAS_SIZE(x,y,z,w)						\
-	(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
-         desc->channel[2].size == (z) && desc->channel[3].size == (w))
-
-	if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) /* isn't plain */
-		return V_028C70_COLOR_10_11_11;
-
-	if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
-		return V_028C70_COLOR_5_9_9_9;
-
-	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
-		return V_028C70_COLOR_INVALID;
-
-	/* hw cannot support mixed formats (except depth/stencil, since
-	 * stencil is not written to). */
-	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
-		return V_028C70_COLOR_INVALID;
-
-	switch (desc->nr_channels) {
-	case 1:
-		switch (desc->channel[0].size) {
-		case 8:
-			return V_028C70_COLOR_8;
-		case 16:
-			return V_028C70_COLOR_16;
-		case 32:
-			return V_028C70_COLOR_32;
-		}
-		break;
-	case 2:
-		if (desc->channel[0].size == desc->channel[1].size) {
-			switch (desc->channel[0].size) {
-			case 8:
-				return V_028C70_COLOR_8_8;
-			case 16:
-				return V_028C70_COLOR_16_16;
-			case 32:
-				return V_028C70_COLOR_32_32;
-			}
-		} else if (HAS_SIZE(8,24,0,0)) {
-			return V_028C70_COLOR_24_8;
-		} else if (HAS_SIZE(24,8,0,0)) {
-			return V_028C70_COLOR_8_24;
-		}
-		break;
-	case 3:
-		if (HAS_SIZE(5,6,5,0)) {
-			return V_028C70_COLOR_5_6_5;
-		} else if (HAS_SIZE(32,8,24,0)) {
-			return V_028C70_COLOR_X24_8_32_FLOAT;
-		}
-		break;
-	case 4:
-		if (desc->channel[0].size == desc->channel[1].size &&
-		    desc->channel[0].size == desc->channel[2].size &&
-		    desc->channel[0].size == desc->channel[3].size) {
-			switch (desc->channel[0].size) {
-			case 4:
-				return V_028C70_COLOR_4_4_4_4;
-			case 8:
-				return V_028C70_COLOR_8_8_8_8;
-			case 16:
-				return V_028C70_COLOR_16_16_16_16;
-			case 32:
-				return V_028C70_COLOR_32_32_32_32;
-			}
-		} else if (HAS_SIZE(5,5,5,1)) {
-			return V_028C70_COLOR_1_5_5_5;
-		} else if (HAS_SIZE(1,5,5,5)) {
-			return V_028C70_COLOR_5_5_5_1;
-		} else if (HAS_SIZE(10,10,10,2)) {
-			return V_028C70_COLOR_2_10_10_10;
-		}
-		break;
-	}
-	return V_028C70_COLOR_INVALID;
+   const struct util_format_description *desc = vk_format_description(format);
+
+#define HAS_SIZE(x, y, z, w)                                                                       \
+   (desc->channel[0].size == (x) && desc->channel[1].size == (y) &&                                \
+    desc->channel[2].size == (z) && desc->channel[3].size == (w))
+
+   if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) /* isn't plain */
+      return V_028C70_COLOR_10_11_11;
+
+   if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+      return V_028C70_COLOR_5_9_9_9;
+
+   if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+      return V_028C70_COLOR_INVALID;
+
+   /* hw cannot support mixed formats (except depth/stencil, since
+    * stencil is not written to). */
+   if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+      return V_028C70_COLOR_INVALID;
+
+   switch (desc->nr_channels) {
+   case 1:
+      switch (desc->channel[0].size) {
+      case 8:
+         return V_028C70_COLOR_8;
+      case 16:
+         return V_028C70_COLOR_16;
+      case 32:
+         return V_028C70_COLOR_32;
+      }
+      break;
+   case 2:
+      if (desc->channel[0].size == desc->channel[1].size) {
+         switch (desc->channel[0].size) {
+         case 8:
+            return V_028C70_COLOR_8_8;
+         case 16:
+            return V_028C70_COLOR_16_16;
+         case 32:
+            return V_028C70_COLOR_32_32;
+         }
+      } else if (HAS_SIZE(8, 24, 0, 0)) {
+         return V_028C70_COLOR_24_8;
+      } else if (HAS_SIZE(24, 8, 0, 0)) {
+         return V_028C70_COLOR_8_24;
+      }
+      break;
+   case 3:
+      if (HAS_SIZE(5, 6, 5, 0)) {
+         return V_028C70_COLOR_5_6_5;
+      } else if (HAS_SIZE(32, 8, 24, 0)) {
+         return V_028C70_COLOR_X24_8_32_FLOAT;
+      }
+      break;
+   case 4:
+      if (desc->channel[0].size == desc->channel[1].size &&
+          desc->channel[0].size == desc->channel[2].size &&
+          desc->channel[0].size == desc->channel[3].size) {
+         switch (desc->channel[0].size) {
+         case 4:
+            return V_028C70_COLOR_4_4_4_4;
+         case 8:
+            return V_028C70_COLOR_8_8_8_8;
+         case 16:
+            return V_028C70_COLOR_16_16_16_16;
+         case 32:
+            return V_028C70_COLOR_32_32_32_32;
+         }
+      } else if (HAS_SIZE(5, 5, 5, 1)) {
+         return V_028C70_COLOR_1_5_5_5;
+      } else if (HAS_SIZE(1, 5, 5, 5)) {
+         return V_028C70_COLOR_5_5_5_1;
+      } else if (HAS_SIZE(10, 10, 10, 2)) {
+         return V_028C70_COLOR_2_10_10_10;
+      }
+      break;
+   }
+   return V_028C70_COLOR_INVALID;
 }
 
-uint32_t radv_colorformat_endian_swap(uint32_t colorformat)
+uint32_t
+radv_colorformat_endian_swap(uint32_t colorformat)
 {
-	if (0/*SI_BIG_ENDIAN*/) {
-		switch(colorformat) {
-			/* 8-bit buffers. */
-		case V_028C70_COLOR_8:
-			return V_028C70_ENDIAN_NONE;
-
-			/* 16-bit buffers. */
-		case V_028C70_COLOR_5_6_5:
-		case V_028C70_COLOR_1_5_5_5:
-		case V_028C70_COLOR_4_4_4_4:
-		case V_028C70_COLOR_16:
-		case V_028C70_COLOR_8_8:
-			return V_028C70_ENDIAN_8IN16;
-
-			/* 32-bit buffers. */
-		case V_028C70_COLOR_8_8_8_8:
-		case V_028C70_COLOR_2_10_10_10:
-		case V_028C70_COLOR_8_24:
-		case V_028C70_COLOR_24_8:
-		case V_028C70_COLOR_16_16:
-			return V_028C70_ENDIAN_8IN32;
-
-			/* 64-bit buffers. */
-		case V_028C70_COLOR_16_16_16_16:
-			return V_028C70_ENDIAN_8IN16;
-
-		case V_028C70_COLOR_32_32:
-			return V_028C70_ENDIAN_8IN32;
-
-			/* 128-bit buffers. */
-		case V_028C70_COLOR_32_32_32_32:
-			return V_028C70_ENDIAN_8IN32;
-		default:
-			return V_028C70_ENDIAN_NONE; /* Unsupported. */
-		}
-	} else {
-		return V_028C70_ENDIAN_NONE;
-	}
+   if (0 /*SI_BIG_ENDIAN*/) {
+      switch (colorformat) {
+         /* 8-bit buffers. */
+      case V_028C70_COLOR_8:
+         return V_028C70_ENDIAN_NONE;
+
+         /* 16-bit buffers. */
+      case V_028C70_COLOR_5_6_5:
+      case V_028C70_COLOR_1_5_5_5:
+      case V_028C70_COLOR_4_4_4_4:
+      case V_028C70_COLOR_16:
+      case V_028C70_COLOR_8_8:
+         return V_028C70_ENDIAN_8IN16;
+
+         /* 32-bit buffers. */
+      case V_028C70_COLOR_8_8_8_8:
+      case V_028C70_COLOR_2_10_10_10:
+      case V_028C70_COLOR_8_24:
+      case V_028C70_COLOR_24_8:
+      case V_028C70_COLOR_16_16:
+         return V_028C70_ENDIAN_8IN32;
+
+         /* 64-bit buffers. */
+      case V_028C70_COLOR_16_16_16_16:
+         return V_028C70_ENDIAN_8IN16;
+
+      case V_028C70_COLOR_32_32:
+         return V_028C70_ENDIAN_8IN32;
+
+         /* 128-bit buffers. */
+      case V_028C70_COLOR_32_32_32_32:
+         return V_028C70_ENDIAN_8IN32;
+      default:
+         return V_028C70_ENDIAN_NONE; /* Unsupported. */
+      }
+   } else {
+      return V_028C70_ENDIAN_NONE;
+   }
 }
 
-uint32_t radv_translate_dbformat(VkFormat format)
+uint32_t
+radv_translate_dbformat(VkFormat format)
 {
-	switch (format) {
-	case VK_FORMAT_D16_UNORM:
-	case VK_FORMAT_D16_UNORM_S8_UINT:
-		return V_028040_Z_16;
-	case VK_FORMAT_D32_SFLOAT:
-	case VK_FORMAT_D32_SFLOAT_S8_UINT:
-		return V_028040_Z_32_FLOAT;
-	default:
-		return V_028040_Z_INVALID;
-	}
+   switch (format) {
+   case VK_FORMAT_D16_UNORM:
+   case VK_FORMAT_D16_UNORM_S8_UINT:
+      return V_028040_Z_16;
+   case VK_FORMAT_D32_SFLOAT:
+   case VK_FORMAT_D32_SFLOAT_S8_UINT:
+      return V_028040_Z_32_FLOAT;
+   default:
+      return V_028040_Z_INVALID;
+   }
 }
 
-unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap)
+unsigned
+radv_translate_colorswap(VkFormat format, bool do_endian_swap)
 {
-	const struct util_format_description *desc = vk_format_description(format);
-
-#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
-
-	if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32)
-		return V_028C70_SWAP_STD;
-
-	if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
-		return V_028C70_SWAP_STD;
-
-	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
-		return ~0U;
-
-	switch (desc->nr_channels) {
-	case 1:
-		if (HAS_SWIZZLE(0,X))
-			return V_028C70_SWAP_STD; /* X___ */
-		else if (HAS_SWIZZLE(3,X))
-			return V_028C70_SWAP_ALT_REV; /* ___X */
-		break;
-	case 2:
-		if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
-		    (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
-		    (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
-			return V_028C70_SWAP_STD; /* XY__ */
-		else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
-			 (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
-		         (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
-			/* YX__ */
-			return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
-		else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
-			return V_028C70_SWAP_ALT; /* X__Y */
-		else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
-			return V_028C70_SWAP_ALT_REV; /* Y__X */
-		break;
-	case 3:
-		if (HAS_SWIZZLE(0,X))
-			return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
-		else if (HAS_SWIZZLE(0,Z))
-			return V_028C70_SWAP_STD_REV; /* ZYX */
-		break;
-	case 4:
-		/* check the middle channels, the 1st and 4th channel can be NONE */
-		if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
-			return V_028C70_SWAP_STD; /* XYZW */
-		} else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
-			return V_028C70_SWAP_STD_REV; /* WZYX */
-		} else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
-			return V_028C70_SWAP_ALT; /* ZYXW */
-		} else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
-			/* YZWX */
-			if (desc->is_array)
-				return V_028C70_SWAP_ALT_REV;
-			else
-				return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
-		}
-		break;
-	}
-	return ~0U;
+   const struct util_format_description *desc = vk_format_description(format);
+
+#define HAS_SWIZZLE(chan, swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
+
+   if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32)
+      return V_028C70_SWAP_STD;
+
+   if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+      return V_028C70_SWAP_STD;
+
+   if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+      return ~0U;
+
+   switch (desc->nr_channels) {
+   case 1:
+      if (HAS_SWIZZLE(0, X))
+         return V_028C70_SWAP_STD; /* X___ */
+      else if (HAS_SWIZZLE(3, X))
+         return V_028C70_SWAP_ALT_REV; /* ___X */
+      break;
+   case 2:
+      if ((HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, Y)) || (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, NONE)) ||
+          (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, Y)))
+         return V_028C70_SWAP_STD; /* XY__ */
+      else if ((HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, X)) ||
+               (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, NONE)) ||
+               (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, X)))
+         /* YX__ */
+         return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
+      else if (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(3, Y))
+         return V_028C70_SWAP_ALT; /* X__Y */
+      else if (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(3, X))
+         return V_028C70_SWAP_ALT_REV; /* Y__X */
+      break;
+   case 3:
+      if (HAS_SWIZZLE(0, X))
+         return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
+      else if (HAS_SWIZZLE(0, Z))
+         return V_028C70_SWAP_STD_REV; /* ZYX */
+      break;
+   case 4:
+      /* check the middle channels, the 1st and 4th channel can be NONE */
+      if (HAS_SWIZZLE(1, Y) && HAS_SWIZZLE(2, Z)) {
+         return V_028C70_SWAP_STD; /* XYZW */
+      } else if (HAS_SWIZZLE(1, Z) && HAS_SWIZZLE(2, Y)) {
+         return V_028C70_SWAP_STD_REV; /* WZYX */
+      } else if (HAS_SWIZZLE(1, Y) && HAS_SWIZZLE(2, X)) {
+         return V_028C70_SWAP_ALT; /* ZYXW */
+      } else if (HAS_SWIZZLE(1, Z) && HAS_SWIZZLE(2, W)) {
+         /* YZWX */
+         if (desc->is_array)
+            return V_028C70_SWAP_ALT_REV;
+         else
+            return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
+      }
+      break;
+   }
+   return ~0U;
 }
 
-bool radv_format_pack_clear_color(VkFormat format,
-				  uint32_t clear_vals[2],
-				  VkClearColorValue *value)
+bool
+radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2], VkClearColorValue *value)
 {
-	const struct util_format_description *desc = vk_format_description(format);
-
-	if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
-		clear_vals[0] = float3_to_r11g11b10f(value->float32);
-		clear_vals[1] = 0;
-		return true;
-	} else if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
-		clear_vals[0] = float3_to_rgb9e5(value->float32);
-		clear_vals[1] = 0;
-		return true;
-	}
-
-	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
-		fprintf(stderr, "failed to fast clear for non-plain format %d\n", format);
-		return false;
-	}
-
-	if (!util_is_power_of_two_or_zero(desc->block.bits)) {
-		fprintf(stderr, "failed to fast clear for NPOT format %d\n", format);
-		return false;
-	}
-
-	if (desc->block.bits > 64) {
-		/*
-		 * We have a 128 bits format, check if the first 3 components are the same.
-		 * Every elements has to be 32 bits since we don't support 64-bit formats,
-		 * and we can skip swizzling checks as alpha always comes last for these and
-		 * we do not care about the rest as they have to be the same.
-		 */
-		if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
-			if (value->float32[0] != value->float32[1] ||
-			    value->float32[0] != value->float32[2])
-				return false;
-		} else {
-			if (value->uint32[0] != value->uint32[1] ||
-			    value->uint32[0] != value->uint32[2])
-				return false;
-		}
-		clear_vals[0] = value->uint32[0];
-		clear_vals[1] = value->uint32[3];
-		return true;
-	}
-	uint64_t clear_val = 0;
-
-	for (unsigned c = 0; c < 4; ++c) {
-		if (desc->swizzle[c] >= 4)
-			continue;
-
-		const struct util_format_channel_description *channel = &desc->channel[desc->swizzle[c]];
-		assert(channel->size);
-
-		uint64_t v = 0;
-		if (channel->pure_integer) {
-			v = value->uint32[c]  & ((1ULL << channel->size) - 1);
-		} else if (channel->normalized) {
-			if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED &&
-			    desc->swizzle[c] < 3 &&
-			    desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
-				assert(channel->size == 8);
-
-				v = util_format_linear_float_to_srgb_8unorm(value->float32[c]);
-			} else {
-				float f = MIN2(value->float32[c], 1.0f);
-
-				if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
-					f = MAX2(f, 0.0f) * ((1ULL << channel->size) - 1);
-				} else {
-					f = MAX2(f, -1.0f) * ((1ULL << (channel->size - 1)) - 1);
-				}
-
-				/* The hardware rounds before conversion. */
-				if (f > 0)
-					f += 0.5f;
-				else
-					f -= 0.5f;
-
-				v = (uint64_t)f;
-			}
-		} else if (channel->type == UTIL_FORMAT_TYPE_FLOAT) {
-			if (channel->size == 32) {
-				memcpy(&v, &value->float32[c], 4);
-			} else if(channel->size == 16) {
-				v = _mesa_float_to_float16_rtz(value->float32[c]);
-			} else {
-				fprintf(stderr, "failed to fast clear for unhandled float size in format %d\n", format);
-				return false;
-			}
-		} else {
-			fprintf(stderr, "failed to fast clear for unhandled component type in format %d\n", format);
-			return false;
-		}
-		clear_val |= (v & ((1ULL << channel->size) - 1)) << channel->shift;
-	}
-
-	clear_vals[0] = clear_val;
-	clear_vals[1] = clear_val >> 32;
-
-	return true;
+   const struct util_format_description *desc = vk_format_description(format);
+
+   if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
+      clear_vals[0] = float3_to_r11g11b10f(value->float32);
+      clear_vals[1] = 0;
+      return true;
+   } else if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+      clear_vals[0] = float3_to_rgb9e5(value->float32);
+      clear_vals[1] = 0;
+      return true;
+   }
+
+   if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+      fprintf(stderr, "failed to fast clear for non-plain format %d\n", format);
+      return false;
+   }
+
+   if (!util_is_power_of_two_or_zero(desc->block.bits)) {
+      fprintf(stderr, "failed to fast clear for NPOT format %d\n", format);
+      return false;
+   }
+
+   if (desc->block.bits > 64) {
+      /*
+       * We have a 128 bits format, check if the first 3 components are the same.
+       * Every elements has to be 32 bits since we don't support 64-bit formats,
+       * and we can skip swizzling checks as alpha always comes last for these and
+       * we do not care about the rest as they have to be the same.
+       */
+      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
+         if (value->float32[0] != value->float32[1] || value->float32[0] != value->float32[2])
+            return false;
+      } else {
+         if (value->uint32[0] != value->uint32[1] || value->uint32[0] != value->uint32[2])
+            return false;
+      }
+      clear_vals[0] = value->uint32[0];
+      clear_vals[1] = value->uint32[3];
+      return true;
+   }
+   uint64_t clear_val = 0;
+
+   for (unsigned c = 0; c < 4; ++c) {
+      if (desc->swizzle[c] >= 4)
+         continue;
+
+      const struct util_format_channel_description *channel = &desc->channel[desc->swizzle[c]];
+      assert(channel->size);
+
+      uint64_t v = 0;
+      if (channel->pure_integer) {
+         v = value->uint32[c] & ((1ULL << channel->size) - 1);
+      } else if (channel->normalized) {
+         if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED && desc->swizzle[c] < 3 &&
+             desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+            assert(channel->size == 8);
+
+            v = util_format_linear_float_to_srgb_8unorm(value->float32[c]);
+         } else {
+            float f = MIN2(value->float32[c], 1.0f);
+
+            if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
+               f = MAX2(f, 0.0f) * ((1ULL << channel->size) - 1);
+            } else {
+               f = MAX2(f, -1.0f) * ((1ULL << (channel->size - 1)) - 1);
+            }
+
+            /* The hardware rounds before conversion. */
+            if (f > 0)
+               f += 0.5f;
+            else
+               f -= 0.5f;
+
+            v = (uint64_t)f;
+         }
+      } else if (channel->type == UTIL_FORMAT_TYPE_FLOAT) {
+         if (channel->size == 32) {
+            memcpy(&v, &value->float32[c], 4);
+         } else if (channel->size == 16) {
+            v = _mesa_float_to_float16_rtz(value->float32[c]);
+         } else {
+            fprintf(stderr, "failed to fast clear for unhandled float size in format %d\n", format);
+            return false;
+         }
+      } else {
+         fprintf(stderr, "failed to fast clear for unhandled component type in format %d\n",
+                 format);
+         return false;
+      }
+      clear_val |= (v & ((1ULL << channel->size) - 1)) << channel->shift;
+   }
+
+   clear_vals[0] = clear_val;
+   clear_vals[1] = clear_val >> 32;
+
+   return true;
 }
 
-void radv_GetPhysicalDeviceFormatProperties(
-	VkPhysicalDevice                            physicalDevice,
-	VkFormat                                    format,
-	VkFormatProperties*                         pFormatProperties)
+void
+radv_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format,
+                                       VkFormatProperties *pFormatProperties)
 {
-	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+   RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
 
-	radv_physical_device_get_format_properties(physical_device,
-						   format,
-						   pFormatProperties);
+   radv_physical_device_get_format_properties(physical_device, format, pFormatProperties);
 }
 
 static const struct ac_modifier_options radv_modifier_options = {
-	.dcc = true,
-	.dcc_retile = true,
+   .dcc = true,
+   .dcc_retile = true,
 };
 
 static VkFormatFeatureFlags
-radv_get_modifier_flags(struct radv_physical_device *dev,
-                        VkFormat format, uint64_t modifier,
+radv_get_modifier_flags(struct radv_physical_device *dev, VkFormat format, uint64_t modifier,
                         const VkFormatProperties *props)
 {
-	VkFormatFeatureFlags features;
+   VkFormatFeatureFlags features;
 
-	if (vk_format_is_compressed(format) || vk_format_is_depth_or_stencil(format))
-		return 0;
+   if (vk_format_is_compressed(format) || vk_format_is_depth_or_stencil(format))
+      return 0;
 
-	if (modifier == DRM_FORMAT_MOD_LINEAR)
-		features = props->linearTilingFeatures;
-	else
-		features = props->optimalTilingFeatures;
+   if (modifier == DRM_FORMAT_MOD_LINEAR)
+      features = props->linearTilingFeatures;
+   else
+      features = props->optimalTilingFeatures;
 
-	if (modifier != DRM_FORMAT_MOD_LINEAR && vk_format_get_plane_count(format) > 1)
-		return 0;
+   if (modifier != DRM_FORMAT_MOD_LINEAR && vk_format_get_plane_count(format) > 1)
+      return 0;
 
-	if (ac_modifier_has_dcc(modifier)) {
-		features &= ~VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
+   if (ac_modifier_has_dcc(modifier)) {
+      features &= ~VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
 
-		if (dev->instance->debug_flags & (RADV_DEBUG_NO_DCC | RADV_DEBUG_NO_DISPLAY_DCC))
-			return 0;
-	}
+      if (dev->instance->debug_flags & (RADV_DEBUG_NO_DCC | RADV_DEBUG_NO_DISPLAY_DCC))
+         return 0;
+   }
 
-	return features;
+   return features;
 }
 
-static void radv_list_drm_format_modifiers(struct radv_physical_device *dev,
-                                           VkFormat format,
-                                           VkFormatProperties2 *pFormatProperties)
+static void
+radv_list_drm_format_modifiers(struct radv_physical_device *dev, VkFormat format,
+                               VkFormatProperties2 *pFormatProperties)
 {
-	VkDrmFormatModifierPropertiesListEXT *mod_list =
-		vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT);
-	unsigned mod_count;
-
-	if (!mod_list)
-		return;
-
-	if (vk_format_is_compressed(format) || vk_format_is_depth_or_stencil(format)) {
-		mod_list->drmFormatModifierCount = 0;
-		return;
-	}
-
-	ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
-	                           vk_format_to_pipe_format(format), &mod_count, NULL);
-	if (!mod_list->pDrmFormatModifierProperties) {
-		mod_list->drmFormatModifierCount = mod_count;
-		return;
-	}
-
-	mod_count = MIN2(mod_count, mod_list->drmFormatModifierCount);
-
-	uint64_t *mods = malloc(mod_count * sizeof(uint64_t));
-	if (!mods) {
-		/* We can't return an error here ... */
-		mod_list->drmFormatModifierCount = 0;
-		return;
-	}
-	ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
-	                           vk_format_to_pipe_format(format), &mod_count, mods);
-
-	mod_list->drmFormatModifierCount = 0;
-	for (unsigned i = 0; i < mod_count; ++i) {
-		VkFormatFeatureFlags features =
-			radv_get_modifier_flags(dev, format, mods[i], &pFormatProperties->formatProperties);
-		unsigned planes = vk_format_get_plane_count(format);
-		if (planes == 1) {
-			if (ac_modifier_has_dcc_retile(mods[i]))
-				planes = 3;
-			else if (ac_modifier_has_dcc(mods[i]))
-				planes = 2;
-		}
-
-		if (!features)
-			continue;
-
-		mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount].drmFormatModifier = mods[i];
-		mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount].drmFormatModifierPlaneCount = planes;
-		mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount].drmFormatModifierTilingFeatures = features;
-
-		++mod_list->drmFormatModifierCount;
-	}
-
-	free(mods);
+   VkDrmFormatModifierPropertiesListEXT *mod_list =
+      vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT);
+   unsigned mod_count;
+
+   if (!mod_list)
+      return;
+
+   if (vk_format_is_compressed(format) || vk_format_is_depth_or_stencil(format)) {
+      mod_list->drmFormatModifierCount = 0;
+      return;
+   }
+
+   ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
+                              vk_format_to_pipe_format(format), &mod_count, NULL);
+   if (!mod_list->pDrmFormatModifierProperties) {
+      mod_list->drmFormatModifierCount = mod_count;
+      return;
+   }
+
+   mod_count = MIN2(mod_count, mod_list->drmFormatModifierCount);
+
+   uint64_t *mods = malloc(mod_count * sizeof(uint64_t));
+   if (!mods) {
+      /* We can't return an error here ... */
+      mod_list->drmFormatModifierCount = 0;
+      return;
+   }
+   ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
+                              vk_format_to_pipe_format(format), &mod_count, mods);
+
+   mod_list->drmFormatModifierCount = 0;
+   for (unsigned i = 0; i < mod_count; ++i) {
+      VkFormatFeatureFlags features =
+         radv_get_modifier_flags(dev, format, mods[i], &pFormatProperties->formatProperties);
+      unsigned planes = vk_format_get_plane_count(format);
+      if (planes == 1) {
+         if (ac_modifier_has_dcc_retile(mods[i]))
+            planes = 3;
+         else if (ac_modifier_has_dcc(mods[i]))
+            planes = 2;
+      }
+
+      if (!features)
+         continue;
+
+      mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount].drmFormatModifier =
+         mods[i];
+      mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount]
+         .drmFormatModifierPlaneCount = planes;
+      mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount]
+         .drmFormatModifierTilingFeatures = features;
+
+      ++mod_list->drmFormatModifierCount;
+   }
+
+   free(mods);
 }
 
-
-static VkResult radv_check_modifier_support(struct radv_physical_device *dev,
-                                       const VkPhysicalDeviceImageFormatInfo2 *info,
-                                       VkImageFormatProperties *props,
-                                       VkFormat format,
-                                       uint64_t modifier)
+static VkResult
+radv_check_modifier_support(struct radv_physical_device *dev,
+                            const VkPhysicalDeviceImageFormatInfo2 *info,
+                            VkImageFormatProperties *props, VkFormat format, uint64_t modifier)
 {
-	if (info->type != VK_IMAGE_TYPE_2D)
-		return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
-	/* We did not add modifiers for sparse textures. */
-	if (info->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
-	                   VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
-	                   VK_IMAGE_CREATE_SPARSE_ALIASED_BIT))
-		return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
-	/*
-	 * Need to check the modifier is supported in general:
-	 * "If the drmFormatModifier is incompatible with the parameters specified
-	 * in VkPhysicalDeviceImageFormatInfo2 and its pNext chain, then
-	 * vkGetPhysicalDeviceImageFormatProperties2 returns VK_ERROR_FORMAT_NOT_SUPPORTED.
-	 * The implementation must support the query of any drmFormatModifier,
-	 * including unknown and invalid modifier values."
-	 */
-	VkDrmFormatModifierPropertiesListEXT mod_list = {
-		.sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
-	};
-
-	VkFormatProperties2 format_props2 = {
-		.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
-		.pNext = &mod_list
-	};
-
-	radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format,  &format_props2);
-
-	if (!mod_list.drmFormatModifierCount)
-		return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
-	mod_list.pDrmFormatModifierProperties = calloc(mod_list.drmFormatModifierCount, sizeof(*mod_list.pDrmFormatModifierProperties));
-	if (!mod_list.pDrmFormatModifierProperties)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-	radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format,  &format_props2);
-
-	bool found = false;
-	for (uint32_t i = 0; i < mod_list.drmFormatModifierCount && !found; ++i)
-		if (mod_list.pDrmFormatModifierProperties[i].drmFormatModifier == modifier)
-			found = true;
-
-	free(mod_list.pDrmFormatModifierProperties);
-
-	if (!found)
-		return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
-	if (ac_modifier_has_dcc(modifier) &&
-	    !radv_are_formats_dcc_compatible(dev, info->pNext, format, info->flags))
-		return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
-	/* We can expand this as needed and implemented but there is not much demand
-	 * for more. */
-	if (ac_modifier_has_dcc(modifier)) {
-		props->maxMipLevels = 1;
-		props->maxArrayLayers = 1;
-	}
-	/* We don't support MSAA for modifiers */
-	props->sampleCounts &= VK_SAMPLE_COUNT_1_BIT;
-	return VK_SUCCESS;
+   if (info->type != VK_IMAGE_TYPE_2D)
+      return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   /* We did not add modifiers for sparse textures. */
+   if (info->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
+                      VK_IMAGE_CREATE_SPARSE_ALIASED_BIT))
+      return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   /*
+    * Need to check the modifier is supported in general:
+    * "If the drmFormatModifier is incompatible with the parameters specified
+    * in VkPhysicalDeviceImageFormatInfo2 and its pNext chain, then
+    * vkGetPhysicalDeviceImageFormatProperties2 returns VK_ERROR_FORMAT_NOT_SUPPORTED.
+    * The implementation must support the query of any drmFormatModifier,
+    * including unknown and invalid modifier values."
+    */
+   VkDrmFormatModifierPropertiesListEXT mod_list = {
+      .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
+   };
+
+   VkFormatProperties2 format_props2 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
+                                        .pNext = &mod_list};
+
+   radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format,
+                                           &format_props2);
+
+   if (!mod_list.drmFormatModifierCount)
+      return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   mod_list.pDrmFormatModifierProperties =
+      calloc(mod_list.drmFormatModifierCount, sizeof(*mod_list.pDrmFormatModifierProperties));
+   if (!mod_list.pDrmFormatModifierProperties)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format,
+                                           &format_props2);
+
+   bool found = false;
+   for (uint32_t i = 0; i < mod_list.drmFormatModifierCount && !found; ++i)
+      if (mod_list.pDrmFormatModifierProperties[i].drmFormatModifier == modifier)
+         found = true;
+
+   free(mod_list.pDrmFormatModifierProperties);
+
+   if (!found)
+      return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   if (ac_modifier_has_dcc(modifier) &&
+       !radv_are_formats_dcc_compatible(dev, info->pNext, format, info->flags))
+      return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   /* We can expand this as needed and implemented but there is not much demand
+    * for more. */
+   if (ac_modifier_has_dcc(modifier)) {
+      props->maxMipLevels = 1;
+      props->maxArrayLayers = 1;
+   }
+   /* We don't support MSAA for modifiers */
+   props->sampleCounts &= VK_SAMPLE_COUNT_1_BIT;
+   return VK_SUCCESS;
 }
 
-void radv_GetPhysicalDeviceFormatProperties2(
-	VkPhysicalDevice                            physicalDevice,
-	VkFormat                                    format,
-	VkFormatProperties2*                        pFormatProperties)
+void
+radv_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, VkFormat format,
+                                        VkFormatProperties2 *pFormatProperties)
 {
-	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+   RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
 
-	radv_physical_device_get_format_properties(physical_device,
-						   format,
-						   &pFormatProperties->formatProperties);
+   radv_physical_device_get_format_properties(physical_device, format,
+                                              &pFormatProperties->formatProperties);
 
-	radv_list_drm_format_modifiers(physical_device, format, pFormatProperties);
+   radv_list_drm_format_modifiers(physical_device, format, pFormatProperties);
 }
 
-static VkResult radv_get_image_format_properties(struct radv_physical_device *physical_device,
-						 const VkPhysicalDeviceImageFormatInfo2 *info,
-						 VkFormat format,
-						 VkImageFormatProperties *pImageFormatProperties)
+static VkResult
+radv_get_image_format_properties(struct radv_physical_device *physical_device,
+                                 const VkPhysicalDeviceImageFormatInfo2 *info, VkFormat format,
+                                 VkImageFormatProperties *pImageFormatProperties)
 
 {
-	VkFormatProperties format_props;
-	VkFormatFeatureFlags format_feature_flags;
-	VkExtent3D maxExtent;
-	uint32_t maxMipLevels;
-	uint32_t maxArraySize;
-	VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
-	const struct util_format_description *desc = vk_format_description(format);
-	enum chip_class chip_class = physical_device->rad_info.chip_class;
-	VkImageTiling tiling = info->tiling;
-	const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *mod_info =
-		vk_find_struct_const(info->pNext, PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT);
-	VkResult result = VK_ERROR_FORMAT_NOT_SUPPORTED;
-
-	radv_physical_device_get_format_properties(physical_device, format,
-						   &format_props);
-	if (tiling == VK_IMAGE_TILING_LINEAR) {
-		format_feature_flags = format_props.linearTilingFeatures;
-	} else if (tiling == VK_IMAGE_TILING_OPTIMAL) {
-		format_feature_flags = format_props.optimalTilingFeatures;
-	} else if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
-		format_feature_flags = radv_get_modifier_flags(physical_device, format,
-		                                               mod_info->drmFormatModifier,
-		                                               &format_props);
-	} else {
-		unreachable("bad VkImageTiling");
-	}
-
-	if (format_feature_flags == 0)
-		goto unsupported;
-
-	if (info->type != VK_IMAGE_TYPE_2D && vk_format_is_depth_or_stencil(format))
-		goto unsupported;
-
-	switch (info->type) {
-	default:
-		unreachable("bad vkimage type\n");
-	case VK_IMAGE_TYPE_1D:
-		maxExtent.width = 16384;
-		maxExtent.height = 1;
-		maxExtent.depth = 1;
-		maxMipLevels = 15; /* log2(maxWidth) + 1 */
-		maxArraySize = chip_class >= GFX10 ? 8192 : 2048;
-		break;
-	case VK_IMAGE_TYPE_2D:
-		maxExtent.width = 16384;
-		maxExtent.height = 16384;
-		maxExtent.depth = 1;
-		maxMipLevels = 15; /* log2(maxWidth) + 1 */
-		maxArraySize = chip_class >= GFX10 ? 8192 : 2048;
-		break;
-	case VK_IMAGE_TYPE_3D:
-		if (chip_class >= GFX10) {
-			maxExtent.width = 8192;
-			maxExtent.height = 8192;
-			maxExtent.depth = 8192;
-		} else {
-			maxExtent.width = 2048;
-			maxExtent.height = 2048;
-			maxExtent.depth = 2048;
-		}
-		maxMipLevels = util_logbase2(maxExtent.width) + 1;
-		maxArraySize = 1;
-		break;
-	}
-
-	if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
-		/* Might be able to support but the entire format support is
-		 * messy, so taking the lazy way out. */
-		maxArraySize = 1;
-	}
-
-	if (tiling == VK_IMAGE_TILING_OPTIMAL &&
-	    info->type == VK_IMAGE_TYPE_2D &&
-	    (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
-				     VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
-	    !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) {
-		sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
-	}
-
-	if (tiling == VK_IMAGE_TILING_LINEAR &&
-	    (format == VK_FORMAT_R32G32B32_SFLOAT ||
-	     format == VK_FORMAT_R32G32B32_SINT ||
-	     format == VK_FORMAT_R32G32B32_UINT)) {
-		/* R32G32B32 is a weird format and the driver currently only
-		 * supports the barely minimum.
-		 * TODO: Implement more if we really need to.
-		 */
-		if (info->type == VK_IMAGE_TYPE_3D)
-			goto unsupported;
-		maxArraySize = 1;
-		maxMipLevels = 1;
-	}
-
-
-	/* We can't create 3d compressed 128bpp images that can be rendered to on GFX9 */
-	if (physical_device->rad_info.chip_class >= GFX9 &&
-	    info->type == VK_IMAGE_TYPE_3D &&
-	    vk_format_get_blocksizebits(format) == 128 &&
-	    vk_format_is_compressed(format) &&
-	    (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
-	    ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) ||
-	     (info->usage & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT))) {
-		goto unsupported;
-	}
-
-	if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
-		if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
-			goto unsupported;
-		}
-	}
-
-	if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
-		if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
-			goto unsupported;
-		}
-	}
-
-	if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
-		if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
-			goto unsupported;
-		}
-	}
-
-	if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
-		if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
-			goto unsupported;
-		}
-	}
-
-	if (info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
-		if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) {
-			goto unsupported;
-		}
-	}
-
-	if (info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
-		if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) {
-			goto unsupported;
-		}
-	}
-
-	if (info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
-		if (!(format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
-		                              VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))) {
-			goto unsupported;
-		}
-	}
-
-	/* Sparse resources with multi-planar formats are unsupported. */
-	if (info->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
-		if (vk_format_get_plane_count(format) > 1)
-			goto unsupported;
-	}
-
-	if (info->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
-		/* Sparse textures are only supported on GFX8+. */
-		if (physical_device->rad_info.chip_class < GFX8)
-			goto unsupported;
-
-		if (vk_format_get_plane_count(format) > 1 || info->type != VK_IMAGE_TYPE_2D ||
-		    info->tiling != VK_IMAGE_TILING_OPTIMAL ||
-		    vk_format_is_depth_or_stencil(format))
-			goto unsupported;
-	}
-
-	*pImageFormatProperties = (VkImageFormatProperties) {
-		.maxExtent = maxExtent,
-		.maxMipLevels = maxMipLevels,
-		.maxArrayLayers = maxArraySize,
-		.sampleCounts = sampleCounts,
-
-		/* FINISHME: Accurately calculate
-		 * VkImageFormatProperties::maxResourceSize.
-		 */
-		.maxResourceSize = UINT32_MAX,
-	};
-
-	if (mod_info) {
-		result = radv_check_modifier_support(physical_device, info,
-		                                     pImageFormatProperties,
-		                                     format, mod_info->drmFormatModifier);
-		if (result != VK_SUCCESS)
-			goto unsupported;
-	}
-
-	return VK_SUCCESS;
+   VkFormatProperties format_props;
+   VkFormatFeatureFlags format_feature_flags;
+   VkExtent3D maxExtent;
+   uint32_t maxMipLevels;
+   uint32_t maxArraySize;
+   VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
+   const struct util_format_description *desc = vk_format_description(format);
+   enum chip_class chip_class = physical_device->rad_info.chip_class;
+   VkImageTiling tiling = info->tiling;
+   const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *mod_info =
+      vk_find_struct_const(info->pNext, PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT);
+   VkResult result = VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   radv_physical_device_get_format_properties(physical_device, format, &format_props);
+   if (tiling == VK_IMAGE_TILING_LINEAR) {
+      format_feature_flags = format_props.linearTilingFeatures;
+   } else if (tiling == VK_IMAGE_TILING_OPTIMAL) {
+      format_feature_flags = format_props.optimalTilingFeatures;
+   } else if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+      format_feature_flags = radv_get_modifier_flags(physical_device, format,
+                                                     mod_info->drmFormatModifier, &format_props);
+   } else {
+      unreachable("bad VkImageTiling");
+   }
+
+   if (format_feature_flags == 0)
+      goto unsupported;
+
+   if (info->type != VK_IMAGE_TYPE_2D && vk_format_is_depth_or_stencil(format))
+      goto unsupported;
+
+   switch (info->type) {
+   default:
+      unreachable("bad vkimage type\n");
+   case VK_IMAGE_TYPE_1D:
+      maxExtent.width = 16384;
+      maxExtent.height = 1;
+      maxExtent.depth = 1;
+      maxMipLevels = 15; /* log2(maxWidth) + 1 */
+      maxArraySize = chip_class >= GFX10 ? 8192 : 2048;
+      break;
+   case VK_IMAGE_TYPE_2D:
+      maxExtent.width = 16384;
+      maxExtent.height = 16384;
+      maxExtent.depth = 1;
+      maxMipLevels = 15; /* log2(maxWidth) + 1 */
+      maxArraySize = chip_class >= GFX10 ? 8192 : 2048;
+      break;
+   case VK_IMAGE_TYPE_3D:
+      if (chip_class >= GFX10) {
+         maxExtent.width = 8192;
+         maxExtent.height = 8192;
+         maxExtent.depth = 8192;
+      } else {
+         maxExtent.width = 2048;
+         maxExtent.height = 2048;
+         maxExtent.depth = 2048;
+      }
+      maxMipLevels = util_logbase2(maxExtent.width) + 1;
+      maxArraySize = 1;
+      break;
+   }
+
+   if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+      /* Might be able to support but the entire format support is
+       * messy, so taking the lazy way out. */
+      maxArraySize = 1;
+   }
+
+   if (tiling == VK_IMAGE_TILING_OPTIMAL && info->type == VK_IMAGE_TYPE_2D &&
+       (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
+                                VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
+       !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) {
+      sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
+   }
+
+   if (tiling == VK_IMAGE_TILING_LINEAR &&
+       (format == VK_FORMAT_R32G32B32_SFLOAT || format == VK_FORMAT_R32G32B32_SINT ||
+        format == VK_FORMAT_R32G32B32_UINT)) {
+      /* R32G32B32 is a weird format and the driver currently only
+       * supports the barely minimum.
+       * TODO: Implement more if we really need to.
+       */
+      if (info->type == VK_IMAGE_TYPE_3D)
+         goto unsupported;
+      maxArraySize = 1;
+      maxMipLevels = 1;
+   }
+
+   /* We can't create 3d compressed 128bpp images that can be rendered to on GFX9 */
+   if (physical_device->rad_info.chip_class >= GFX9 && info->type == VK_IMAGE_TYPE_3D &&
+       vk_format_get_blocksizebits(format) == 128 && vk_format_is_compressed(format) &&
+       (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
+       ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) ||
+        (info->usage & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT))) {
+      goto unsupported;
+   }
+
+   if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
+      if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
+         goto unsupported;
+      }
+   }
+
+   if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
+      if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
+         goto unsupported;
+      }
+   }
+
+   if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
+      if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
+         goto unsupported;
+      }
+   }
+
+   if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+      if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
+         goto unsupported;
+      }
+   }
+
+   if (info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
+      if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) {
+         goto unsupported;
+      }
+   }
+
+   if (info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
+      if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) {
+         goto unsupported;
+      }
+   }
+
+   if (info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
+      if (!(format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
+                                    VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))) {
+         goto unsupported;
+      }
+   }
+
+   /* Sparse resources with multi-planar formats are unsupported. */
+   if (info->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
+      if (vk_format_get_plane_count(format) > 1)
+         goto unsupported;
+   }
+
+   if (info->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
+      /* Sparse textures are only supported on GFX8+. */
+      if (physical_device->rad_info.chip_class < GFX8)
+         goto unsupported;
+
+      if (vk_format_get_plane_count(format) > 1 || info->type != VK_IMAGE_TYPE_2D ||
+          info->tiling != VK_IMAGE_TILING_OPTIMAL || vk_format_is_depth_or_stencil(format))
+         goto unsupported;
+   }
+
+   *pImageFormatProperties = (VkImageFormatProperties){
+      .maxExtent = maxExtent,
+      .maxMipLevels = maxMipLevels,
+      .maxArrayLayers = maxArraySize,
+      .sampleCounts = sampleCounts,
+
+      /* FINISHME: Accurately calculate
+       * VkImageFormatProperties::maxResourceSize.
+       */
+      .maxResourceSize = UINT32_MAX,
+   };
+
+   if (mod_info) {
+      result = radv_check_modifier_support(physical_device, info, pImageFormatProperties, format,
+                                           mod_info->drmFormatModifier);
+      if (result != VK_SUCCESS)
+         goto unsupported;
+   }
+
+   return VK_SUCCESS;
 unsupported:
-	*pImageFormatProperties = (VkImageFormatProperties) {
-		.maxExtent = { 0, 0, 0 },
-		.maxMipLevels = 0,
-		.maxArrayLayers = 0,
-		.sampleCounts = 0,
-		.maxResourceSize = 0,
-	};
-
-	return result;
+   *pImageFormatProperties = (VkImageFormatProperties){
+      .maxExtent = {0, 0, 0},
+      .maxMipLevels = 0,
+      .maxArrayLayers = 0,
+      .sampleCounts = 0,
+      .maxResourceSize = 0,
+   };
+
+   return result;
 }
 
-VkResult radv_GetPhysicalDeviceImageFormatProperties(
-	VkPhysicalDevice                            physicalDevice,
-	VkFormat                                    format,
-	VkImageType                                 type,
-	VkImageTiling                               tiling,
-	VkImageUsageFlags                           usage,
-	VkImageCreateFlags                          createFlags,
-	VkImageFormatProperties*                    pImageFormatProperties)
+VkResult
+radv_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format,
+                                            VkImageType type, VkImageTiling tiling,
+                                            VkImageUsageFlags usage, VkImageCreateFlags createFlags,
+                                            VkImageFormatProperties *pImageFormatProperties)
 {
-	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
-
-	const VkPhysicalDeviceImageFormatInfo2 info = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
-		.pNext = NULL,
-		.format = format,
-		.type = type,
-		.tiling = tiling,
-		.usage = usage,
-		.flags = createFlags,
-	};
-
-	return radv_get_image_format_properties(physical_device, &info, format,
-						pImageFormatProperties);
+   RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+
+   const VkPhysicalDeviceImageFormatInfo2 info = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+      .pNext = NULL,
+      .format = format,
+      .type = type,
+      .tiling = tiling,
+      .usage = usage,
+      .flags = createFlags,
+   };
+
+   return radv_get_image_format_properties(physical_device, &info, format, pImageFormatProperties);
 }
 
 static void
 get_external_image_format_properties(struct radv_physical_device *physical_device,
-				     const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
-				     VkExternalMemoryHandleTypeFlagBits handleType,
-				     VkExternalMemoryProperties *external_properties,
-				     VkImageFormatProperties *format_properties)
+                                     const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
+                                     VkExternalMemoryHandleTypeFlagBits handleType,
+                                     VkExternalMemoryProperties *external_properties,
+                                     VkImageFormatProperties *format_properties)
 {
-	VkExternalMemoryFeatureFlagBits flags = 0;
-	VkExternalMemoryHandleTypeFlags export_flags = 0;
-	VkExternalMemoryHandleTypeFlags compat_flags = 0;
-
-	if (pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
-		return;
-
-	switch (handleType) {
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
-		if (pImageFormatInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
-			break;
-
-		switch (pImageFormatInfo->type) {
-		case VK_IMAGE_TYPE_2D:
-			flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
-
-			compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
-			break;
-		default:
-			break;
-		}
-		break;
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
-		switch (pImageFormatInfo->type) {
-		case VK_IMAGE_TYPE_2D:
-			flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
-			if (pImageFormatInfo->tiling != VK_IMAGE_TILING_LINEAR)
-				flags |= VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT;
-
-			compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
-			break;
-		default:
-			break;
-		}
-		break;
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID:
-		if (!physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer)
-			break;
-
-		if (!radv_android_gralloc_supports_format(pImageFormatInfo->format,
-		                                          pImageFormatInfo->usage))
-			break;
-
-		if (pImageFormatInfo->type != VK_IMAGE_TYPE_2D)
-			break;
-
-		format_properties->maxMipLevels = MIN2(1, format_properties->maxMipLevels);
-		format_properties->maxArrayLayers = MIN2(1, format_properties->maxArrayLayers);
-		format_properties->sampleCounts &= VK_SAMPLE_COUNT_1_BIT;
-
-		flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT|VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
-		if (pImageFormatInfo->tiling != VK_IMAGE_TILING_LINEAR)
-			flags |= VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT;
-
-		compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID;
-		break;
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
-		flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
-		compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
-		break;
-	default:
-		break;
-	}
-
-	*external_properties = (VkExternalMemoryProperties) {
-		.externalMemoryFeatures = flags,
-		.exportFromImportedHandleTypes = export_flags,
-		.compatibleHandleTypes = compat_flags,
-	};
+   VkExternalMemoryFeatureFlagBits flags = 0;
+   VkExternalMemoryHandleTypeFlags export_flags = 0;
+   VkExternalMemoryHandleTypeFlags compat_flags = 0;
+
+   if (pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
+      return;
+
+   switch (handleType) {
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+      if (pImageFormatInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+         break;
+
+      switch (pImageFormatInfo->type) {
+      case VK_IMAGE_TYPE_2D:
+         flags =
+            VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+
+         compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+         break;
+      default:
+         break;
+      }
+      break;
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
+      switch (pImageFormatInfo->type) {
+      case VK_IMAGE_TYPE_2D:
+         flags =
+            VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+         if (pImageFormatInfo->tiling != VK_IMAGE_TILING_LINEAR)
+            flags |= VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT;
+
+         compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+         break;
+      default:
+         break;
+      }
+      break;
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID:
+      if (!physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer)
+         break;
+
+      if (!radv_android_gralloc_supports_format(pImageFormatInfo->format, pImageFormatInfo->usage))
+         break;
+
+      if (pImageFormatInfo->type != VK_IMAGE_TYPE_2D)
+         break;
+
+      format_properties->maxMipLevels = MIN2(1, format_properties->maxMipLevels);
+      format_properties->maxArrayLayers = MIN2(1, format_properties->maxArrayLayers);
+      format_properties->sampleCounts &= VK_SAMPLE_COUNT_1_BIT;
+
+      flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+      if (pImageFormatInfo->tiling != VK_IMAGE_TILING_LINEAR)
+         flags |= VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT;
+
+      compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID;
+      break;
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
+      flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+      compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+      break;
+   default:
+      break;
+   }
+
+   *external_properties = (VkExternalMemoryProperties){
+      .externalMemoryFeatures = flags,
+      .exportFromImportedHandleTypes = export_flags,
+      .compatibleHandleTypes = compat_flags,
+   };
 }
 
-VkResult radv_GetPhysicalDeviceImageFormatProperties2(
-	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceImageFormatInfo2     *base_info,
-	VkImageFormatProperties2                   *base_props)
+VkResult
+radv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,
+                                             const VkPhysicalDeviceImageFormatInfo2 *base_info,
+                                             VkImageFormatProperties2 *base_props)
 {
-	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
-	const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
-	VkExternalImageFormatProperties *external_props = NULL;
-	struct VkAndroidHardwareBufferUsageANDROID *android_usage = NULL;
-	VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
-	VkTextureLODGatherFormatPropertiesAMD *texture_lod_props = NULL;
-	VkResult result;
-	VkFormat format = radv_select_android_external_format(base_info->pNext, base_info->format);
-
-	result = radv_get_image_format_properties(physical_device, base_info, format,
-						&base_props->imageFormatProperties);
-	if (result != VK_SUCCESS)
-		return result;
-
-	   /* Extract input structs */
-	vk_foreach_struct_const(s, base_info->pNext) {
-		switch (s->sType) {
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
-			external_info = (const void *) s;
-			break;
-		default:
-			break;
-		}
-	}
-
-	/* Extract output structs */
-	vk_foreach_struct(s, base_props->pNext) {
-		switch (s->sType) {
-		case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
-			external_props = (void *) s;
-			break;
-		case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
-			ycbcr_props = (void *) s;
-			break;
-		case VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_USAGE_ANDROID:
-			android_usage = (void *) s;
-			break;
-		case VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD:
-			texture_lod_props = (void *) s;
-			break;
-		default:
-			break;
-		}
-	}
-
-	bool ahb_supported = physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer;
-	if (android_usage && ahb_supported) {
+   RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+   const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
+   VkExternalImageFormatProperties *external_props = NULL;
+   struct VkAndroidHardwareBufferUsageANDROID *android_usage = NULL;
+   VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
+   VkTextureLODGatherFormatPropertiesAMD *texture_lod_props = NULL;
+   VkResult result;
+   VkFormat format = radv_select_android_external_format(base_info->pNext, base_info->format);
+
+   result = radv_get_image_format_properties(physical_device, base_info, format,
+                                             &base_props->imageFormatProperties);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* Extract input structs */
+   vk_foreach_struct_const(s, base_info->pNext)
+   {
+      switch (s->sType) {
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
+         external_info = (const void *)s;
+         break;
+      default:
+         break;
+      }
+   }
+
+   /* Extract output structs */
+   vk_foreach_struct(s, base_props->pNext)
+   {
+      switch (s->sType) {
+      case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
+         external_props = (void *)s;
+         break;
+      case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
+         ycbcr_props = (void *)s;
+         break;
+      case VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_USAGE_ANDROID:
+         android_usage = (void *)s;
+         break;
+      case VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD:
+         texture_lod_props = (void *)s;
+         break;
+      default:
+         break;
+      }
+   }
+
+   bool ahb_supported =
+      physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer;
+   if (android_usage && ahb_supported) {
 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
-		android_usage->androidHardwareBufferUsage =
-			radv_ahb_usage_from_vk_usage(base_info->flags,
-			                             base_info->usage);
+      android_usage->androidHardwareBufferUsage =
+         radv_ahb_usage_from_vk_usage(base_info->flags, base_info->usage);
 #endif
-	}
-
-	/* From the Vulkan 1.0.97 spec:
-	 *
-	 *    If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
-	 *    behave as if VkPhysicalDeviceExternalImageFormatInfo was not
-	 *    present and VkExternalImageFormatProperties will be ignored.
-	 */
-	if (external_info && external_info->handleType != 0) {
-		get_external_image_format_properties(physical_device, base_info, external_info->handleType,
-		                                     &external_props->externalMemoryProperties,
-		                                     &base_props->imageFormatProperties);
-		if (!external_props->externalMemoryProperties.externalMemoryFeatures) {
-			/* From the Vulkan 1.0.97 spec:
-			 *
-			 *    If handleType is not compatible with the [parameters] specified
-			 *    in VkPhysicalDeviceImageFormatInfo2, then
-			 *    vkGetPhysicalDeviceImageFormatProperties2 returns
-			 *    VK_ERROR_FORMAT_NOT_SUPPORTED.
-			 */
-			result = vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
-					   "unsupported VkExternalMemoryTypeFlagBitsKHR 0x%x",
-					   external_info->handleType);
-			goto fail;
-		}
-	}
-
-	if (ycbcr_props) {
-		ycbcr_props->combinedImageSamplerDescriptorCount = vk_format_get_plane_count(format);
-	}
-
-	if (texture_lod_props) {
-		if (physical_device->rad_info.chip_class >= GFX9) {
-			texture_lod_props->supportsTextureGatherLODBiasAMD = true;
-		} else {
-			texture_lod_props->supportsTextureGatherLODBiasAMD = !vk_format_is_int(format);
-		}
-	}
-
-	return VK_SUCCESS;
+   }
+
+   /* From the Vulkan 1.0.97 spec:
+    *
+    *    If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
+    *    behave as if VkPhysicalDeviceExternalImageFormatInfo was not
+    *    present and VkExternalImageFormatProperties will be ignored.
+    */
+   if (external_info && external_info->handleType != 0) {
+      get_external_image_format_properties(physical_device, base_info, external_info->handleType,
+                                           &external_props->externalMemoryProperties,
+                                           &base_props->imageFormatProperties);
+      if (!external_props->externalMemoryProperties.externalMemoryFeatures) {
+         /* From the Vulkan 1.0.97 spec:
+          *
+          *    If handleType is not compatible with the [parameters] specified
+          *    in VkPhysicalDeviceImageFormatInfo2, then
+          *    vkGetPhysicalDeviceImageFormatProperties2 returns
+          *    VK_ERROR_FORMAT_NOT_SUPPORTED.
+          */
+         result = vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
+                            "unsupported VkExternalMemoryTypeFlagBitsKHR 0x%x",
+                            external_info->handleType);
+         goto fail;
+      }
+   }
+
+   if (ycbcr_props) {
+      ycbcr_props->combinedImageSamplerDescriptorCount = vk_format_get_plane_count(format);
+   }
+
+   if (texture_lod_props) {
+      if (physical_device->rad_info.chip_class >= GFX9) {
+         texture_lod_props->supportsTextureGatherLODBiasAMD = true;
+      } else {
+         texture_lod_props->supportsTextureGatherLODBiasAMD = !vk_format_is_int(format);
+      }
+   }
+
+   return VK_SUCCESS;
 
 fail:
-	if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
-		/* From the Vulkan 1.0.97 spec:
-		 *
-		 *    If the combination of parameters to
-		 *    vkGetPhysicalDeviceImageFormatProperties2 is not supported by
-		 *    the implementation for use in vkCreateImage, then all members of
-		 *    imageFormatProperties will be filled with zero.
-		 */
-		base_props->imageFormatProperties = (VkImageFormatProperties) {0};
-	}
-
-	return result;
+   if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+      /* From the Vulkan 1.0.97 spec:
+       *
+       *    If the combination of parameters to
+       *    vkGetPhysicalDeviceImageFormatProperties2 is not supported by
+       *    the implementation for use in vkCreateImage, then all members of
+       *    imageFormatProperties will be filled with zero.
+       */
+      base_props->imageFormatProperties = (VkImageFormatProperties){0};
+   }
+
+   return result;
 }
 
-static void fill_sparse_image_format_properties(struct radv_physical_device *pdev,
-						VkFormat format,
-						VkSparseImageFormatProperties *prop)
+static void
+fill_sparse_image_format_properties(struct radv_physical_device *pdev, VkFormat format,
+                                    VkSparseImageFormatProperties *prop)
 {
-	prop->aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-	prop->flags = 0;
+   prop->aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+   prop->flags = 0;
 
-	/* On GFX8 we first subdivide by level and then layer, leading to a single
-	 * miptail. On GFX9+ we first subdivide by layer and then level which results
-	 * in a miptail per layer. */
-	if (pdev->rad_info.chip_class < GFX9)
-		prop->flags |= VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT;
+   /* On GFX8 we first subdivide by level and then layer, leading to a single
+    * miptail. On GFX9+ we first subdivide by layer and then level which results
+    * in a miptail per layer. */
+   if (pdev->rad_info.chip_class < GFX9)
+      prop->flags |= VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT;
 
-	/* This assumes the sparse image tile size is always 64 KiB (1 << 16) */
-	unsigned l2_size = 16 - util_logbase2(vk_format_get_blocksize(format));
-	unsigned w = (1u << ((l2_size + 1) / 2)) * vk_format_get_blockwidth(format);
-	unsigned h = (1u << (l2_size / 2)) * vk_format_get_blockheight(format);
+   /* This assumes the sparse image tile size is always 64 KiB (1 << 16) */
+   unsigned l2_size = 16 - util_logbase2(vk_format_get_blocksize(format));
+   unsigned w = (1u << ((l2_size + 1) / 2)) * vk_format_get_blockwidth(format);
+   unsigned h = (1u << (l2_size / 2)) * vk_format_get_blockheight(format);
 
-	prop->imageGranularity = (VkExtent3D) {w, h, 1};
+   prop->imageGranularity = (VkExtent3D){w, h, 1};
 }
 
-void radv_GetPhysicalDeviceSparseImageFormatProperties2(
-	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
-	uint32_t                                   *pPropertyCount,
-	VkSparseImageFormatProperties2             *pProperties)
+void
+radv_GetPhysicalDeviceSparseImageFormatProperties2(
+   VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
+   uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
-	VkResult result;
-
-	if (pFormatInfo->samples > VK_SAMPLE_COUNT_1_BIT) {
-		*pPropertyCount = 0;
-		return;
-	}
-
-	const VkPhysicalDeviceImageFormatInfo2 fmt_info = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
-		.format = pFormatInfo->format,
-		.type = pFormatInfo->type,
-		.tiling = pFormatInfo->tiling,
-		.usage = pFormatInfo->usage,
-		.flags = VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
-		         VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT
-	};
-
-	VkImageFormatProperties fmt_props;
-	result = radv_get_image_format_properties(pdev, &fmt_info, pFormatInfo->format,
-						  &fmt_props);
-	if (result != VK_SUCCESS) {
-		*pPropertyCount = 0;
-		return;
-	}
-
-	VK_OUTARRAY_MAKE_TYPED(VkSparseImageFormatProperties2, out, pProperties, pPropertyCount);
-
-	vk_outarray_append_typed(VkSparseImageFormatProperties2 , &out, prop) {
-		fill_sparse_image_format_properties(pdev, pFormatInfo->format, &prop->properties);
-	};
+   RADV_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
+   VkResult result;
+
+   if (pFormatInfo->samples > VK_SAMPLE_COUNT_1_BIT) {
+      *pPropertyCount = 0;
+      return;
+   }
+
+   const VkPhysicalDeviceImageFormatInfo2 fmt_info = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+      .format = pFormatInfo->format,
+      .type = pFormatInfo->type,
+      .tiling = pFormatInfo->tiling,
+      .usage = pFormatInfo->usage,
+      .flags = VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT};
+
+   VkImageFormatProperties fmt_props;
+   result = radv_get_image_format_properties(pdev, &fmt_info, pFormatInfo->format, &fmt_props);
+   if (result != VK_SUCCESS) {
+      *pPropertyCount = 0;
+      return;
+   }
+
+   VK_OUTARRAY_MAKE_TYPED(VkSparseImageFormatProperties2, out, pProperties, pPropertyCount);
+
+   vk_outarray_append_typed(VkSparseImageFormatProperties2, &out, prop)
+   {
+      fill_sparse_image_format_properties(pdev, pFormatInfo->format, &prop->properties);
+   };
 }
 
-void radv_GetPhysicalDeviceSparseImageFormatProperties(
-	VkPhysicalDevice                            physicalDevice,
-	VkFormat                                    format,
-	VkImageType                                 type,
-	uint32_t                                    samples,
-	VkImageUsageFlags                           usage,
-	VkImageTiling                               tiling,
-	uint32_t*                                   pNumProperties,
-	VkSparseImageFormatProperties*              pProperties)
+void
+radv_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format,
+                                                  VkImageType type, uint32_t samples,
+                                                  VkImageUsageFlags usage, VkImageTiling tiling,
+                                                  uint32_t *pNumProperties,
+                                                  VkSparseImageFormatProperties *pProperties)
 {
-	const VkPhysicalDeviceSparseImageFormatInfo2 info = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SPARSE_IMAGE_FORMAT_INFO_2,
-		.format = format,
-		.type = type,
-		.samples = samples,
-		.usage = usage,
-		.tiling = tiling
-	};
-
-	if (!pProperties) {
-		radv_GetPhysicalDeviceSparseImageFormatProperties2(physicalDevice, &info,
-								   pNumProperties, NULL);
-		return;
-	}
-
-	VkSparseImageFormatProperties2 props[4];
-	uint32_t prop_cnt = MIN2(ARRAY_SIZE(props), *pNumProperties);
-
-	memset(props, 0, sizeof(props));
-	for (unsigned i = 0; i < ARRAY_SIZE(props); ++i)
-		props[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_FORMAT_PROPERTIES_2;
-
-	radv_GetPhysicalDeviceSparseImageFormatProperties2(physicalDevice, &info,
-							   &prop_cnt, props);
-
-	for (unsigned i = 0; i < prop_cnt; ++i)
-		pProperties[i] = props[i].properties;
-	*pNumProperties = prop_cnt;
+   const VkPhysicalDeviceSparseImageFormatInfo2 info = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SPARSE_IMAGE_FORMAT_INFO_2,
+      .format = format,
+      .type = type,
+      .samples = samples,
+      .usage = usage,
+      .tiling = tiling};
+
+   if (!pProperties) {
+      radv_GetPhysicalDeviceSparseImageFormatProperties2(physicalDevice, &info, pNumProperties,
+                                                         NULL);
+      return;
+   }
+
+   VkSparseImageFormatProperties2 props[4];
+   uint32_t prop_cnt = MIN2(ARRAY_SIZE(props), *pNumProperties);
+
+   memset(props, 0, sizeof(props));
+   for (unsigned i = 0; i < ARRAY_SIZE(props); ++i)
+      props[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_FORMAT_PROPERTIES_2;
+
+   radv_GetPhysicalDeviceSparseImageFormatProperties2(physicalDevice, &info, &prop_cnt, props);
+
+   for (unsigned i = 0; i < prop_cnt; ++i)
+      pProperties[i] = props[i].properties;
+   *pNumProperties = prop_cnt;
 }
 
-void radv_GetImageSparseMemoryRequirements2(
-	VkDevice                                    _device,
-	const VkImageSparseMemoryRequirementsInfo2 *pInfo,
-	uint32_t*                                   pSparseMemoryRequirementCount,
-	VkSparseImageMemoryRequirements2           *pSparseMemoryRequirements)
+void
+radv_GetImageSparseMemoryRequirements2(VkDevice _device,
+                                       const VkImageSparseMemoryRequirementsInfo2 *pInfo,
+                                       uint32_t *pSparseMemoryRequirementCount,
+                                       VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_image, image, pInfo->image);
-
-	if (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) {
-		*pSparseMemoryRequirementCount = 0;
-		return;
-	}
-
-	VK_OUTARRAY_MAKE_TYPED(VkSparseImageMemoryRequirements2, out, pSparseMemoryRequirements, pSparseMemoryRequirementCount);
-
-	vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req) {
-		fill_sparse_image_format_properties(device->physical_device,
-						    image->vk_format,
-						    &req->memoryRequirements.formatProperties);
-		req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level;
-
-		if (req->memoryRequirements.imageMipTailFirstLod < image->info.levels) {
-			if (device->physical_device->rad_info.chip_class >= GFX9) {
-				/* The tail is always a single tile per layer. */
-				req->memoryRequirements.imageMipTailSize = 65536;
-				req->memoryRequirements.imageMipTailOffset =
-					image->planes[0].surface.u.gfx9.prt_level_offset[req->memoryRequirements.imageMipTailFirstLod] & ~65535;
-				req->memoryRequirements.imageMipTailStride =
-					image->planes[0].surface.u.gfx9.surf_slice_size;
-			} else {
-				req->memoryRequirements.imageMipTailOffset =
-					image->planes[0].surface.u.legacy.level[req->memoryRequirements.imageMipTailFirstLod ].offset;
-				req->memoryRequirements.imageMipTailSize =
-					image->size - req->memoryRequirements.imageMipTailOffset;
-				req->memoryRequirements.imageMipTailStride = 0;
-			}
-		} else {
-			req->memoryRequirements.imageMipTailSize = 0;
-			req->memoryRequirements.imageMipTailOffset = 0;
-			req->memoryRequirements.imageMipTailStride = 0;
-		}
-	};
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_image, image, pInfo->image);
+
+   if (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) {
+      *pSparseMemoryRequirementCount = 0;
+      return;
+   }
+
+   VK_OUTARRAY_MAKE_TYPED(VkSparseImageMemoryRequirements2, out, pSparseMemoryRequirements,
+                          pSparseMemoryRequirementCount);
+
+   vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req)
+   {
+      fill_sparse_image_format_properties(device->physical_device, image->vk_format,
+                                          &req->memoryRequirements.formatProperties);
+      req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level;
+
+      if (req->memoryRequirements.imageMipTailFirstLod < image->info.levels) {
+         if (device->physical_device->rad_info.chip_class >= GFX9) {
+            /* The tail is always a single tile per layer. */
+            req->memoryRequirements.imageMipTailSize = 65536;
+            req->memoryRequirements.imageMipTailOffset =
+               image->planes[0]
+                  .surface.u.gfx9.prt_level_offset[req->memoryRequirements.imageMipTailFirstLod] &
+               ~65535;
+            req->memoryRequirements.imageMipTailStride =
+               image->planes[0].surface.u.gfx9.surf_slice_size;
+         } else {
+            req->memoryRequirements.imageMipTailOffset =
+               image->planes[0]
+                  .surface.u.legacy.level[req->memoryRequirements.imageMipTailFirstLod]
+                  .offset;
+            req->memoryRequirements.imageMipTailSize =
+               image->size - req->memoryRequirements.imageMipTailOffset;
+            req->memoryRequirements.imageMipTailStride = 0;
+         }
+      } else {
+         req->memoryRequirements.imageMipTailSize = 0;
+         req->memoryRequirements.imageMipTailOffset = 0;
+         req->memoryRequirements.imageMipTailStride = 0;
+      }
+   };
 }
 
-void radv_GetImageSparseMemoryRequirements(
-	VkDevice                                    device,
-	VkImage                                     image,
-	uint32_t*                                   pSparseMemoryRequirementCount,
-	VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
+void
+radv_GetImageSparseMemoryRequirements(VkDevice device, VkImage image,
+                                      uint32_t *pSparseMemoryRequirementCount,
+                                      VkSparseImageMemoryRequirements *pSparseMemoryRequirements)
 {
-	const VkImageSparseMemoryRequirementsInfo2 info = {
-		.sType = VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2,
-		.image = image
-	};
-
-	if (!pSparseMemoryRequirements) {
-		radv_GetImageSparseMemoryRequirements2(device, &info,
-						       pSparseMemoryRequirementCount, NULL);
-		return;
-	}
-
-	VkSparseImageMemoryRequirements2 reqs[4];
-	uint32_t reqs_cnt = MIN2(ARRAY_SIZE(reqs), *pSparseMemoryRequirementCount);
-
-	memset(reqs, 0, sizeof(reqs));
-	for (unsigned i = 0; i < ARRAY_SIZE(reqs); ++i)
-		reqs[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2;
-
-	radv_GetImageSparseMemoryRequirements2(device, &info,
-					       &reqs_cnt, reqs);
-
-	for (unsigned i = 0; i < reqs_cnt; ++i)
-		pSparseMemoryRequirements[i] = reqs[i].memoryRequirements;
-	*pSparseMemoryRequirementCount = reqs_cnt;
+   const VkImageSparseMemoryRequirementsInfo2 info = {
+      .sType = VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2,
+      .image = image};
+
+   if (!pSparseMemoryRequirements) {
+      radv_GetImageSparseMemoryRequirements2(device, &info, pSparseMemoryRequirementCount, NULL);
+      return;
+   }
+
+   VkSparseImageMemoryRequirements2 reqs[4];
+   uint32_t reqs_cnt = MIN2(ARRAY_SIZE(reqs), *pSparseMemoryRequirementCount);
+
+   memset(reqs, 0, sizeof(reqs));
+   for (unsigned i = 0; i < ARRAY_SIZE(reqs); ++i)
+      reqs[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2;
+
+   radv_GetImageSparseMemoryRequirements2(device, &info, &reqs_cnt, reqs);
+
+   for (unsigned i = 0; i < reqs_cnt; ++i)
+      pSparseMemoryRequirements[i] = reqs[i].memoryRequirements;
+   *pSparseMemoryRequirementCount = reqs_cnt;
 }
 
-void radv_GetPhysicalDeviceExternalBufferProperties(
-	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceExternalBufferInfo    *pExternalBufferInfo,
-	VkExternalBufferProperties                  *pExternalBufferProperties)
+void
+radv_GetPhysicalDeviceExternalBufferProperties(
+   VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
+   VkExternalBufferProperties *pExternalBufferProperties)
 {
-	VkExternalMemoryFeatureFlagBits flags = 0;
-	VkExternalMemoryHandleTypeFlags export_flags = 0;
-	VkExternalMemoryHandleTypeFlags compat_flags = 0;
-	switch(pExternalBufferInfo->handleType) {
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
-		flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
-		        VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
-		compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
-					      VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
-		break;
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
-		flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
-		compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
-		break;
-	default:
-		break;
-	}
-	pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryProperties) {
-		.externalMemoryFeatures = flags,
-		.exportFromImportedHandleTypes = export_flags,
-		.compatibleHandleTypes = compat_flags,
-	};
+   VkExternalMemoryFeatureFlagBits flags = 0;
+   VkExternalMemoryHandleTypeFlags export_flags = 0;
+   VkExternalMemoryHandleTypeFlags compat_flags = 0;
+   switch (pExternalBufferInfo->handleType) {
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+      flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+      compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
+                                    VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+      break;
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
+      flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+      compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+      break;
+   default:
+      break;
+   }
+   pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryProperties){
+      .externalMemoryFeatures = flags,
+      .exportFromImportedHandleTypes = export_flags,
+      .compatibleHandleTypes = compat_flags,
+   };
 }
 
 /* DCC channel type categories within which formats can be reinterpreted
  * while keeping the same DCC encoding. The swizzle must also match. */
 enum dcc_channel_type {
-        dcc_channel_float32,
-        dcc_channel_uint32,
-        dcc_channel_sint32,
-        dcc_channel_float16,
-        dcc_channel_uint16,
-        dcc_channel_sint16,
-        dcc_channel_uint_10_10_10_2,
-        dcc_channel_uint8,
-        dcc_channel_sint8,
-        dcc_channel_incompatible,
+   dcc_channel_float32,
+   dcc_channel_uint32,
+   dcc_channel_sint32,
+   dcc_channel_float16,
+   dcc_channel_uint16,
+   dcc_channel_sint16,
+   dcc_channel_uint_10_10_10_2,
+   dcc_channel_uint8,
+   dcc_channel_sint8,
+   dcc_channel_incompatible,
 };
 
 /* Return the type of DCC encoding. */
 static enum dcc_channel_type
 radv_get_dcc_channel_type(const struct util_format_description *desc)
 {
-        int i;
-
-        /* Find the first non-void channel. */
-        for (i = 0; i < desc->nr_channels; i++)
-                if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
-                        break;
-        if (i == desc->nr_channels)
-                return dcc_channel_incompatible;
-
-        switch (desc->channel[i].size) {
-        case 32:
-                if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
-                        return dcc_channel_float32;
-                if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
-                        return dcc_channel_uint32;
-                return dcc_channel_sint32;
-        case 16:
-                if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
-                        return dcc_channel_float16;
-                if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
-                        return dcc_channel_uint16;
-                return dcc_channel_sint16;
-        case 10:
-                return dcc_channel_uint_10_10_10_2;
-        case 8:
-                if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
-                        return dcc_channel_uint8;
-                return dcc_channel_sint8;
-        default:
-                return dcc_channel_incompatible;
-        }
+   int i;
+
+   /* Find the first non-void channel. */
+   for (i = 0; i < desc->nr_channels; i++)
+      if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
+         break;
+   if (i == desc->nr_channels)
+      return dcc_channel_incompatible;
+
+   switch (desc->channel[i].size) {
+   case 32:
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
+         return dcc_channel_float32;
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
+         return dcc_channel_uint32;
+      return dcc_channel_sint32;
+   case 16:
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
+         return dcc_channel_float16;
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
+         return dcc_channel_uint16;
+      return dcc_channel_sint16;
+   case 10:
+      return dcc_channel_uint_10_10_10_2;
+   case 8:
+      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
+         return dcc_channel_uint8;
+      return dcc_channel_sint8;
+   default:
+      return dcc_channel_incompatible;
+   }
 }
 
 /* Return if it's allowed to reinterpret one format as another with DCC enabled. */
-bool radv_dcc_formats_compatible(VkFormat format1,
-                                 VkFormat format2)
+bool
+radv_dcc_formats_compatible(VkFormat format1, VkFormat format2)
 {
-        const struct util_format_description *desc1, *desc2;
-        enum dcc_channel_type type1, type2;
-        int i;
+   const struct util_format_description *desc1, *desc2;
+   enum dcc_channel_type type1, type2;
+   int i;
 
-        if (format1 == format2)
-                return true;
+   if (format1 == format2)
+      return true;
 
-        desc1 = vk_format_description(format1);
-        desc2 = vk_format_description(format2);
+   desc1 = vk_format_description(format1);
+   desc2 = vk_format_description(format2);
 
-        if (desc1->nr_channels != desc2->nr_channels)
-                return false;
+   if (desc1->nr_channels != desc2->nr_channels)
+      return false;
 
-        /* Swizzles must be the same. */
-        for (i = 0; i < desc1->nr_channels; i++)
-                if (desc1->swizzle[i] <= PIPE_SWIZZLE_W &&
-                    desc2->swizzle[i] <= PIPE_SWIZZLE_W &&
-                    desc1->swizzle[i] != desc2->swizzle[i])
-                        return false;
+   /* Swizzles must be the same. */
+   for (i = 0; i < desc1->nr_channels; i++)
+      if (desc1->swizzle[i] <= PIPE_SWIZZLE_W && desc2->swizzle[i] <= PIPE_SWIZZLE_W &&
+          desc1->swizzle[i] != desc2->swizzle[i])
+         return false;
 
-        type1 = radv_get_dcc_channel_type(desc1);
-        type2 = radv_get_dcc_channel_type(desc2);
+   type1 = radv_get_dcc_channel_type(desc1);
+   type2 = radv_get_dcc_channel_type(desc2);
 
-        return type1 != dcc_channel_incompatible &&
-               type2 != dcc_channel_incompatible &&
-               type1 == type2;
+   return type1 != dcc_channel_incompatible && type2 != dcc_channel_incompatible && type1 == type2;
 }
-
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 58324c36dde..deda051bd0c 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -26,257 +26,237 @@
  */
 
 #include "drm-uapi/drm_fourcc.h"
+#include "util/debug.h"
+#include "util/u_atomic.h"
+#include "vulkan/util/vk_format.h"
 #include "radv_debug.h"
 #include "radv_private.h"
-#include "vk_format.h"
-#include "vk_util.h"
 #include "radv_radeon_winsys.h"
 #include "sid.h"
-#include "util/debug.h"
-#include "util/u_atomic.h"
-#include "vulkan/util/vk_format.h"
+#include "vk_format.h"
+#include "vk_util.h"
 
 #include "gfx10_format_table.h"
 
-
 static const VkImageUsageFlagBits RADV_IMAGE_USAGE_WRITE_BITS =
-	VK_IMAGE_USAGE_TRANSFER_DST_BIT |
-	VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
-	VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
-	VK_IMAGE_USAGE_STORAGE_BIT;
+   VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+   VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
 
 static unsigned
-radv_choose_tiling(struct radv_device *device,
-		   const VkImageCreateInfo *pCreateInfo,
-		   VkFormat format)
+radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
+                   VkFormat format)
 {
-	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
-		assert(pCreateInfo->samples <= 1);
-		return RADEON_SURF_MODE_LINEAR_ALIGNED;
-	}
-
-	/* MSAA resources must be 2D tiled. */
-	if (pCreateInfo->samples > 1)
-		return RADEON_SURF_MODE_2D;
-
-	if (!vk_format_is_compressed(format) &&
-	    !vk_format_is_depth_or_stencil(format)
-	    && device->physical_device->rad_info.chip_class <= GFX8) {
-		/* this causes hangs in some VK CTS tests on GFX9. */
-		/* Textures with a very small height are recommended to be linear. */
-		if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
-		    /* Only very thin and long 2D textures should benefit from
-		     * linear_aligned. */
-		    (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
-			return RADEON_SURF_MODE_LINEAR_ALIGNED;
-	}
-
-	return RADEON_SURF_MODE_2D;
+   if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
+      assert(pCreateInfo->samples <= 1);
+      return RADEON_SURF_MODE_LINEAR_ALIGNED;
+   }
+
+   /* MSAA resources must be 2D tiled. */
+   if (pCreateInfo->samples > 1)
+      return RADEON_SURF_MODE_2D;
+
+   if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
+       device->physical_device->rad_info.chip_class <= GFX8) {
+      /* this causes hangs in some VK CTS tests on GFX9. */
+      /* Textures with a very small height are recommended to be linear. */
+      if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
+          /* Only very thin and long 2D textures should benefit from
+           * linear_aligned. */
+          (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
+         return RADEON_SURF_MODE_LINEAR_ALIGNED;
+   }
+
+   return RADEON_SURF_MODE_2D;
 }
 
 static bool
-radv_use_tc_compat_htile_for_image(struct radv_device *device,
-				   const VkImageCreateInfo *pCreateInfo,
-				   VkFormat format)
+radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
+                                   VkFormat format)
 {
-	/* TC-compat HTILE is only available for GFX8+. */
-	if (device->physical_device->rad_info.chip_class < GFX8)
-		return false;
-
-	if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
-		return false;
-
-	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
-		return false;
-
-	/* Do not enable TC-compatible HTILE if the image isn't readable by a
-	 * shader because no texture fetches will happen.
-	 */
-	if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
-				    VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
-				    VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
-		return false;
-
-	if (device->physical_device->rad_info.chip_class < GFX9) {
-		/* TC-compat HTILE for MSAA depth/stencil images is broken
-		 * on GFX8 because the tiling doesn't match.
-		 */
-		if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
-			return false;
-
-		/* GFX9+ supports compression for both 32-bit and 16-bit depth
-		 * surfaces, while GFX8 only supports 32-bit natively. Though,
-		 * the driver allows TC-compat HTILE for 16-bit depth surfaces
-		 * with no Z planes compression.
-		 */
-		if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
-		    format != VK_FORMAT_D32_SFLOAT &&
-		    format != VK_FORMAT_D16_UNORM)
-			return false;
-	}
-
-	return true;
+   /* TC-compat HTILE is only available for GFX8+. */
+   if (device->physical_device->rad_info.chip_class < GFX8)
+      return false;
+
+   if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
+      return false;
+
+   if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
+      return false;
+
+   /* Do not enable TC-compatible HTILE if the image isn't readable by a
+    * shader because no texture fetches will happen.
+    */
+   if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
+                               VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
+      return false;
+
+   if (device->physical_device->rad_info.chip_class < GFX9) {
+      /* TC-compat HTILE for MSAA depth/stencil images is broken
+       * on GFX8 because the tiling doesn't match.
+       */
+      if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
+         return false;
+
+      /* GFX9+ supports compression for both 32-bit and 16-bit depth
+       * surfaces, while GFX8 only supports 32-bit natively. Though,
+       * the driver allows TC-compat HTILE for 16-bit depth surfaces
+       * with no Z planes compression.
+       */
+      if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT &&
+          format != VK_FORMAT_D16_UNORM)
+         return false;
+   }
+
+   return true;
 }
 
 static bool
 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
 {
-	if (info->bo_metadata) {
-		if (device->physical_device->rad_info.chip_class >= GFX9)
-			return info->bo_metadata->u.gfx9.scanout;
-		else
-			return info->bo_metadata->u.legacy.scanout;
-	}
-
-	return info->scanout;
+   if (info->bo_metadata) {
+      if (device->physical_device->rad_info.chip_class >= GFX9)
+         return info->bo_metadata->u.gfx9.scanout;
+      else
+         return info->bo_metadata->u.legacy.scanout;
+   }
+
+   return info->scanout;
 }
 
 static bool
 radv_image_use_fast_clear_for_image(const struct radv_device *device,
                                     const struct radv_image *image)
 {
-	if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
-		return true;
-
-	if (image->info.samples <= 1 &&
-	    image->info.width * image->info.height <= 512 * 512) {
-		/* Do not enable CMASK or DCC for small surfaces where the cost
-		 * of the eliminate pass can be higher than the benefit of fast
-		 * clear. RadeonSI does this, but the image threshold is
-		 * different.
-		 */
-		return false;
-	}
-
-	return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
-	       (image->exclusive ||
-		/* Enable DCC for concurrent images if stores are
-		 * supported because that means we can keep DCC compressed on
-		 * all layouts/queues.
-		 */
-		radv_image_use_dcc_image_stores(device, image));
+   if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
+      return true;
+
+   if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) {
+      /* Do not enable CMASK or DCC for small surfaces where the cost
+       * of the eliminate pass can be higher than the benefit of fast
+       * clear. RadeonSI does this, but the image threshold is
+       * different.
+       */
+      return false;
+   }
+
+   return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
+          (image->exclusive ||
+           /* Enable DCC for concurrent images if stores are
+            * supported because that means we can keep DCC compressed on
+            * all layouts/queues.
+            */
+           radv_image_use_dcc_image_stores(device, image));
 }
 
 bool
-radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev,
-                                const void *pNext, VkFormat format,
-                                VkImageCreateFlags flags)
+radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
+                                VkFormat format, VkImageCreateFlags flags)
 {
-	bool blendable;
-
-	if (!radv_is_colorbuffer_format_supported(pdev,
-	                                          format, &blendable))
-		return false;
-
-	if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
-		const struct VkImageFormatListCreateInfo *format_list =
-			(const struct  VkImageFormatListCreateInfo *)
-				vk_find_struct_const(pNext,
-						     IMAGE_FORMAT_LIST_CREATE_INFO);
-
-		/* We have to ignore the existence of the list if viewFormatCount = 0 */
-		if (format_list && format_list->viewFormatCount) {
-			/* compatibility is transitive, so we only need to check
-			 * one format with everything else. */
-			for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
-				if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
-					continue;
-
-				if (!radv_dcc_formats_compatible(format,
-				                                 format_list->pViewFormats[i]))
-					return false;
-			}
-		} else {
-			return false;
-		}
-	}
-
-	return true;
+   bool blendable;
+
+   if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
+      return false;
+
+   if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
+      const struct VkImageFormatListCreateInfo *format_list =
+         (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
+            pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
+
+      /* We have to ignore the existence of the list if viewFormatCount = 0 */
+      if (format_list && format_list->viewFormatCount) {
+         /* compatibility is transitive, so we only need to check
+          * one format with everything else. */
+         for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
+            if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
+               continue;
+
+            if (!radv_dcc_formats_compatible(format, format_list->pViewFormats[i]))
+               return false;
+         }
+      } else {
+         return false;
+      }
+   }
+
+   return true;
 }
 
 static bool
-radv_formats_is_atomic_allowed(const void *pNext, VkFormat format,
-                               VkImageCreateFlags flags)
+radv_formats_is_atomic_allowed(const void *pNext, VkFormat format, VkImageCreateFlags flags)
 {
-	if (radv_is_atomic_format_supported(format))
-		return true;
-
-	if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
-		const struct VkImageFormatListCreateInfo *format_list =
-			(const struct  VkImageFormatListCreateInfo *)
-				vk_find_struct_const(pNext,
-						     IMAGE_FORMAT_LIST_CREATE_INFO);
-
-		/* We have to ignore the existence of the list if viewFormatCount = 0 */
-		if (format_list && format_list->viewFormatCount) {
-			for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
-				if (radv_is_atomic_format_supported(format_list->pViewFormats[i]))
-					return true;
-			}
-		}
-	}
-
-	return false;
+   if (radv_is_atomic_format_supported(format))
+      return true;
+
+   if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
+      const struct VkImageFormatListCreateInfo *format_list =
+         (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
+            pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
+
+      /* We have to ignore the existence of the list if viewFormatCount = 0 */
+      if (format_list && format_list->viewFormatCount) {
+         for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
+            if (radv_is_atomic_format_supported(format_list->pViewFormats[i]))
+               return true;
+         }
+      }
+   }
+
+   return false;
 }
 
 static bool
-radv_use_dcc_for_image(struct radv_device *device,
-		       const struct radv_image *image,
-		       const VkImageCreateInfo *pCreateInfo,
-		       VkFormat format)
+radv_use_dcc_for_image(struct radv_device *device, const struct radv_image *image,
+                       const VkImageCreateInfo *pCreateInfo, VkFormat format)
 {
-	/* DCC (Delta Color Compression) is only available for GFX8+. */
-	if (device->physical_device->rad_info.chip_class < GFX8)
-		return false;
-
-	if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
-		return false;
-
-	if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
-		return false;
-
-	/*
-	 * TODO: Enable DCC for storage images on GFX9 and earlier.
-	 *
-	 * Also disable DCC with atomics because even when DCC stores are
-	 * supported atomics will always decompress. So if we are
-	 * decompressing a lot anyway we might as well not have DCC.
-	 */
-	if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
-	    (!radv_image_use_dcc_image_stores(device, image) ||
-	     radv_formats_is_atomic_allowed(pCreateInfo->pNext, format, pCreateInfo->flags)))
-		return false;
-
-	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
-		return false;
-
-	if (vk_format_is_subsampled(format) ||
-	    vk_format_get_plane_count(format) > 1)
-		return false;
-
-	if (!radv_image_use_fast_clear_for_image(device, image) &&
-	    image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
-		return false;
-
-	/* Do not enable DCC for mipmapped arrays because performance is worse. */
-	if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
-		return false;
-
-	if (device->physical_device->rad_info.chip_class < GFX10) {
-		/* TODO: Add support for DCC MSAA on GFX8-9. */
-		if (pCreateInfo->samples > 1 &&
-		    !device->physical_device->dcc_msaa_allowed)
-			return false;
-
-		/* TODO: Add support for DCC layers/mipmaps on GFX9. */
-		if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
-		     device->physical_device->rad_info.chip_class == GFX9)
-			return false;
-	}
-
-	return radv_are_formats_dcc_compatible(device->physical_device,
-	                                       pCreateInfo->pNext, format,
-	                                       pCreateInfo->flags);
+   /* DCC (Delta Color Compression) is only available for GFX8+. */
+   if (device->physical_device->rad_info.chip_class < GFX8)
+      return false;
+
+   if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
+      return false;
+
+   if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+      return false;
+
+   /*
+    * TODO: Enable DCC for storage images on GFX9 and earlier.
+    *
+    * Also disable DCC with atomics because even when DCC stores are
+    * supported atomics will always decompress. So if we are
+    * decompressing a lot anyway we might as well not have DCC.
+    */
+   if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
+       (!radv_image_use_dcc_image_stores(device, image) ||
+        radv_formats_is_atomic_allowed(pCreateInfo->pNext, format, pCreateInfo->flags)))
+      return false;
+
+   if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
+      return false;
+
+   if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
+      return false;
+
+   if (!radv_image_use_fast_clear_for_image(device, image) &&
+       image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+      return false;
+
+   /* Do not enable DCC for mipmapped arrays because performance is worse. */
+   if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
+      return false;
+
+   if (device->physical_device->rad_info.chip_class < GFX10) {
+      /* TODO: Add support for DCC MSAA on GFX8-9. */
+      if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
+         return false;
+
+      /* TODO: Add support for DCC layers/mipmaps on GFX9. */
+      if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
+          device->physical_device->rad_info.chip_class == GFX9)
+         return false;
+   }
+
+   return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format,
+                                          pCreateInfo->flags);
 }
 
 /*
@@ -289,18 +269,18 @@ radv_use_dcc_for_image(struct radv_device *device,
  *
  * This function assumes the image uses DCC compression.
  */
-bool radv_image_use_dcc_image_stores(const struct radv_device *device,
-				     const struct radv_image *image)
+bool
+radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
 {
-	/*
-	 * TODO: Enable on more HW. DIMGREY and VANGOGH need a workaround and
-	 * we need more perf analysis.
-	 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643853
-	 */
-	return device->physical_device->rad_info.chip_class == GFX10 ||
-		(device->physical_device->rad_info.chip_class == GFX10_3 &&
-		 (device->instance->perftest_flags & RADV_PERFTEST_DCC_STORES) &&
-		 !device->physical_device->use_llvm);
+   /*
+    * TODO: Enable on more HW. DIMGREY and VANGOGH need a workaround and
+    * we need more perf analysis.
+    * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643853
+    */
+   return device->physical_device->rad_info.chip_class == GFX10 ||
+          (device->physical_device->rad_info.chip_class == GFX10_3 &&
+           (device->instance->perftest_flags & RADV_PERFTEST_DCC_STORES) &&
+           !device->physical_device->use_llvm);
 }
 
 /*
@@ -309,1584 +289,1485 @@ bool radv_image_use_dcc_image_stores(const struct radv_device *device,
  *
  * This function assumes the image uses DCC compression.
  */
-bool radv_image_use_dcc_predication(const struct radv_device *device,
-				    const struct radv_image *image)
+bool
+radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
 {
-	return !radv_image_use_dcc_image_stores(device, image);
+   return !radv_image_use_dcc_image_stores(device, image);
 }
 
 static inline bool
-radv_use_fmask_for_image(const struct radv_device *device,
-                         const struct radv_image *image)
+radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
 {
-	return image->info.samples > 1 &&
-	       ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
-	        (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
+   return image->info.samples > 1 && ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
+                                      (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
 }
 
 static inline bool
-radv_use_htile_for_image(const struct radv_device *device,
-                         const struct radv_image *image)
+radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image)
 {
-	/* TODO:
-	 * - Investigate about mips+layers.
-	 * - Enable on other gens.
-	 */
-	bool use_htile_for_mips = image->info.array_size == 1 &&
-				  device->physical_device->rad_info.chip_class >= GFX10;
-
-	return (image->info.levels == 1 || use_htile_for_mips) &&
-		!image->shareable &&
-	       ((image->info.width * image->info.height >= 8 * 8) ||
-	        (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
+   /* TODO:
+    * - Investigate about mips+layers.
+    * - Enable on other gens.
+    */
+   bool use_htile_for_mips =
+      image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10;
+
+   return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable &&
+          ((image->info.width * image->info.height >= 8 * 8) ||
+           (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
 }
 
 static bool
-radv_use_tc_compat_cmask_for_image(struct radv_device *device,
-				   struct radv_image *image)
+radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
 {
-	/* TC-compat CMASK is only available for GFX8+. */
-	if (device->physical_device->rad_info.chip_class < GFX8)
-		return false;
-
-	if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
-		return false;
-
-	/* TODO: Enable TC-compat CMASK on GFX8-9. */
-	if (device->physical_device->rad_info.chip_class < GFX10 &&
-	    !(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
-		return false;
-
-	if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
-		return false;
-
-	/* Do not enable TC-compatible if the image isn't readable by a shader
-	 * because no texture fetches will happen.
-	 */
-	if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
-			      VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
-			      VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
-		return false;
-
-	/* If the image doesn't have FMASK, it can't be fetchable. */
-	if (!radv_image_has_fmask(image))
-		return false;
-
-	return true;
+   /* TC-compat CMASK is only available for GFX8+. */
+   if (device->physical_device->rad_info.chip_class < GFX8)
+      return false;
+
+   if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
+      return false;
+
+   /* TODO: Enable TC-compat CMASK on GFX8-9. */
+   if (device->physical_device->rad_info.chip_class < GFX10 &&
+       !(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
+      return false;
+
+   if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
+      return false;
+
+   /* Do not enable TC-compatible if the image isn't readable by a shader
+    * because no texture fetches will happen.
+    */
+   if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
+                         VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
+      return false;
+
+   /* If the image doesn't have FMASK, it can't be fetchable. */
+   if (!radv_image_has_fmask(image))
+      return false;
+
+   return true;
 }
 
-static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
+static uint32_t
+si_get_bo_metadata_word1(const struct radv_device *device)
 {
-	return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
+   return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
 }
 
 static bool
-radv_is_valid_opaque_metadata(const struct radv_device *device,
-                              const struct radeon_bo_metadata *md)
+radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
 {
-	if (md->metadata[0] != 1 ||
-	    md->metadata[1] != si_get_bo_metadata_word1(device))
-		return false;
+   if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device))
+      return false;
 
-	if (md->size_metadata < 40)
-		return false;
+   if (md->size_metadata < 40)
+      return false;
 
-	return true;
+   return true;
 }
 
 static void
-radv_patch_surface_from_metadata(struct radv_device *device,
-                                 struct radeon_surf *surface,
+radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
                                  const struct radeon_bo_metadata *md)
 {
-	surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		if (md->u.gfx9.swizzle_mode > 0)
-			surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
-		else
-			surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
-
-		surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
-	} else {
-		surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
-		surface->u.legacy.bankw = md->u.legacy.bankw;
-		surface->u.legacy.bankh = md->u.legacy.bankh;
-		surface->u.legacy.tile_split = md->u.legacy.tile_split;
-		surface->u.legacy.mtilea = md->u.legacy.mtilea;
-		surface->u.legacy.num_banks = md->u.legacy.num_banks;
-
-		if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
-			surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
-		else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
-			surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
-		else
-			surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
-
-	}
+   surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      if (md->u.gfx9.swizzle_mode > 0)
+         surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+      else
+         surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
+
+      surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
+   } else {
+      surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
+      surface->u.legacy.bankw = md->u.legacy.bankw;
+      surface->u.legacy.bankh = md->u.legacy.bankh;
+      surface->u.legacy.tile_split = md->u.legacy.tile_split;
+      surface->u.legacy.mtilea = md->u.legacy.mtilea;
+      surface->u.legacy.num_banks = md->u.legacy.num_banks;
+
+      if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+         surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+      else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
+         surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
+      else
+         surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
+   }
 }
 
 static VkResult
-radv_patch_image_dimensions(struct radv_device *device,
-                            struct radv_image *image,
+radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
                             const struct radv_image_create_info *create_info,
                             struct ac_surf_info *image_info)
 {
-	unsigned width = image->info.width;
-	unsigned height = image->info.height;
-
-	/*
-	 * minigbm sometimes allocates bigger images which is going to result in
-	 * weird strides and other properties. Lets be lenient where possible and
-	 * fail it on GFX10 (as we cannot cope there).
-	 *
-	 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
-	 */
-	if (create_info->bo_metadata &&
-	    radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
-		const struct radeon_bo_metadata *md = create_info->bo_metadata;
-
-		if (device->physical_device->rad_info.chip_class >= GFX10) {
-			width = G_00A004_WIDTH_LO(md->metadata[3]) +
-			        (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
-			height = G_00A008_HEIGHT(md->metadata[4]) + 1;
-		} else {
-			width = G_008F18_WIDTH(md->metadata[4]) + 1;
-			height = G_008F18_HEIGHT(md->metadata[4]) + 1;
-		}
-	}
-
-	if (image->info.width == width && image->info.height == height)
-		return VK_SUCCESS;
-
-	if (width < image->info.width || height < image->info.height) {
-		fprintf(stderr,
-		        "The imported image has smaller dimensions than the internal\n"
-		        "dimensions. Using it is going to fail badly, so we reject\n"
-		        "this import.\n"
-		        "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
-		        image->info.width, image->info.height, width, height);
-		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-	} else if (device->physical_device->rad_info.chip_class >= GFX10) {
-		fprintf(stderr,
-		        "Tried to import an image with inconsistent width on GFX10.\n"
-		        "As GFX10 has no separate stride fields we cannot cope with\n"
-		        "an inconsistency in width and will fail this import.\n"
-		        "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
-		        image->info.width, image->info.height, width, height);
-		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-	} else {
-		fprintf(stderr,
-		        "Tried to import an image with inconsistent width on pre-GFX10.\n"
-		        "As GFX10 has no separate stride fields we cannot cope with\n"
-		        "an inconsistency and would fail on GFX10.\n"
-		        "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
-		        image->info.width, image->info.height, width, height);
-	}
-	image_info->width = width;
-	image_info->height = height;
-
-	return VK_SUCCESS;
+   unsigned width = image->info.width;
+   unsigned height = image->info.height;
+
+   /*
+    * minigbm sometimes allocates bigger images which is going to result in
+    * weird strides and other properties. Lets be lenient where possible and
+    * fail it on GFX10 (as we cannot cope there).
+    *
+    * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
+    */
+   if (create_info->bo_metadata &&
+       radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
+      const struct radeon_bo_metadata *md = create_info->bo_metadata;
+
+      if (device->physical_device->rad_info.chip_class >= GFX10) {
+         width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
+         height = G_00A008_HEIGHT(md->metadata[4]) + 1;
+      } else {
+         width = G_008F18_WIDTH(md->metadata[4]) + 1;
+         height = G_008F18_HEIGHT(md->metadata[4]) + 1;
+      }
+   }
+
+   if (image->info.width == width && image->info.height == height)
+      return VK_SUCCESS;
+
+   if (width < image->info.width || height < image->info.height) {
+      fprintf(stderr,
+              "The imported image has smaller dimensions than the internal\n"
+              "dimensions. Using it is going to fail badly, so we reject\n"
+              "this import.\n"
+              "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
+              image->info.width, image->info.height, width, height);
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+   } else if (device->physical_device->rad_info.chip_class >= GFX10) {
+      fprintf(stderr,
+              "Tried to import an image with inconsistent width on GFX10.\n"
+              "As GFX10 has no separate stride fields we cannot cope with\n"
+              "an inconsistency in width and will fail this import.\n"
+              "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
+              image->info.width, image->info.height, width, height);
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+   } else {
+      fprintf(stderr,
+              "Tried to import an image with inconsistent width on pre-GFX10.\n"
+              "As GFX10 has no separate stride fields we cannot cope with\n"
+              "an inconsistency and would fail on GFX10.\n"
+              "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
+              image->info.width, image->info.height, width, height);
+   }
+   image_info->width = width;
+   image_info->height = height;
+
+   return VK_SUCCESS;
 }
 
 static VkResult
-radv_patch_image_from_extra_info(struct radv_device *device,
-                                 struct radv_image *image,
+radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
                                  const struct radv_image_create_info *create_info,
                                  struct ac_surf_info *image_info)
 {
-	VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
-	if (result != VK_SUCCESS)
-		return result;
-
-	for (unsigned plane = 0; plane < image->plane_count; ++plane) {
-		if (create_info->bo_metadata) {
-			radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
-							 create_info->bo_metadata);
-		}
-
-		if (radv_surface_has_scanout(device, create_info)) {
-			image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
-			if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
-				image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
-
-			image->info.surf_index = NULL;
-		}
-	}
-	return VK_SUCCESS;
+   VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
+   if (result != VK_SUCCESS)
+      return result;
+
+   for (unsigned plane = 0; plane < image->plane_count; ++plane) {
+      if (create_info->bo_metadata) {
+         radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
+                                          create_info->bo_metadata);
+      }
+
+      if (radv_surface_has_scanout(device, create_info)) {
+         image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
+         if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
+            image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
+
+         image->info.surf_index = NULL;
+      }
+   }
+   return VK_SUCCESS;
 }
 
 static uint64_t
-radv_get_surface_flags(struct radv_device *device,
-                       const struct radv_image *image,
-                       unsigned plane_id,
-                       const VkImageCreateInfo *pCreateInfo,
+radv_get_surface_flags(struct radv_device *device, const struct radv_image *image,
+                       unsigned plane_id, const VkImageCreateInfo *pCreateInfo,
                        VkFormat image_format)
 {
-	uint64_t flags;
-	unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
-	VkFormat format = vk_format_get_plane_format(image_format, plane_id);
-	const struct util_format_description *desc = vk_format_description(format);
-	bool is_depth, is_stencil;
-
-	is_depth = util_format_has_depth(desc);
-	is_stencil = util_format_has_stencil(desc);
-
-	flags = RADEON_SURF_SET(array_mode, MODE);
-
-	switch (pCreateInfo->imageType){
-	case VK_IMAGE_TYPE_1D:
-		if (pCreateInfo->arrayLayers > 1)
-			flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
-		else
-			flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
-		break;
-	case VK_IMAGE_TYPE_2D:
-		if (pCreateInfo->arrayLayers > 1)
-			flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
-		else
-			flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
-		break;
-	case VK_IMAGE_TYPE_3D:
-		flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
-		break;
-	default:
-		unreachable("unhandled image type");
-	}
-
-	/* Required for clearing/initializing a specific layer on GFX8. */
-	flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
-
-	if (is_depth) {
-		flags |= RADEON_SURF_ZBUFFER;
-
-		if (radv_use_htile_for_image(device, image) &&
-		    !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
-			if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
-				flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
-		} else {
-			flags |= RADEON_SURF_NO_HTILE;
-		}
-	}
-
-	if (is_stencil)
-		flags |= RADEON_SURF_SBUFFER;
-
-	if (device->physical_device->rad_info.chip_class >= GFX9 &&
-	    pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
-	    vk_format_get_blocksizebits(image_format) == 128 &&
-	    vk_format_is_compressed(image_format))
-		flags |= RADEON_SURF_NO_RENDER_TARGET;
-
-	if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
-		flags |= RADEON_SURF_DISABLE_DCC;
-
-	if (!radv_use_fmask_for_image(device, image))
-		flags |= RADEON_SURF_NO_FMASK;
-
-	if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
-		flags |= RADEON_SURF_PRT |
-		         RADEON_SURF_NO_FMASK |
-		         RADEON_SURF_NO_HTILE |
-		         RADEON_SURF_DISABLE_DCC;
-	}
-
-	return flags;
+   uint64_t flags;
+   unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
+   VkFormat format = vk_format_get_plane_format(image_format, plane_id);
+   const struct util_format_description *desc = vk_format_description(format);
+   bool is_depth, is_stencil;
+
+   is_depth = util_format_has_depth(desc);
+   is_stencil = util_format_has_stencil(desc);
+
+   flags = RADEON_SURF_SET(array_mode, MODE);
+
+   switch (pCreateInfo->imageType) {
+   case VK_IMAGE_TYPE_1D:
+      if (pCreateInfo->arrayLayers > 1)
+         flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
+      else
+         flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
+      break;
+   case VK_IMAGE_TYPE_2D:
+      if (pCreateInfo->arrayLayers > 1)
+         flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
+      else
+         flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
+      break;
+   case VK_IMAGE_TYPE_3D:
+      flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
+      break;
+   default:
+      unreachable("unhandled image type");
+   }
+
+   /* Required for clearing/initializing a specific layer on GFX8. */
+   flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
+
+   if (is_depth) {
+      flags |= RADEON_SURF_ZBUFFER;
+
+      if (radv_use_htile_for_image(device, image) &&
+          !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
+         if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
+            flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
+      } else {
+         flags |= RADEON_SURF_NO_HTILE;
+      }
+   }
+
+   if (is_stencil)
+      flags |= RADEON_SURF_SBUFFER;
+
+   if (device->physical_device->rad_info.chip_class >= GFX9 &&
+       pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
+       vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
+      flags |= RADEON_SURF_NO_RENDER_TARGET;
+
+   if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
+      flags |= RADEON_SURF_DISABLE_DCC;
+
+   if (!radv_use_fmask_for_image(device, image))
+      flags |= RADEON_SURF_NO_FMASK;
+
+   if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
+      flags |=
+         RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
+   }
+
+   return flags;
 }
 
 static inline unsigned
 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
 {
-	if (stencil)
-		return plane->surface.u.legacy.stencil_tiling_index[level];
-	else
-		return plane->surface.u.legacy.tiling_index[level];
+   if (stencil)
+      return plane->surface.u.legacy.stencil_tiling_index[level];
+   else
+      return plane->surface.u.legacy.tiling_index[level];
 }
 
-static unsigned radv_map_swizzle(unsigned swizzle)
+static unsigned
+radv_map_swizzle(unsigned swizzle)
 {
-	switch (swizzle) {
-	case PIPE_SWIZZLE_Y:
-		return V_008F0C_SQ_SEL_Y;
-	case PIPE_SWIZZLE_Z:
-		return V_008F0C_SQ_SEL_Z;
-	case PIPE_SWIZZLE_W:
-		return V_008F0C_SQ_SEL_W;
-	case PIPE_SWIZZLE_0:
-		return V_008F0C_SQ_SEL_0;
-	case PIPE_SWIZZLE_1:
-		return V_008F0C_SQ_SEL_1;
-	default: /* PIPE_SWIZZLE_X */
-		return V_008F0C_SQ_SEL_X;
-	}
+   switch (swizzle) {
+   case PIPE_SWIZZLE_Y:
+      return V_008F0C_SQ_SEL_Y;
+   case PIPE_SWIZZLE_Z:
+      return V_008F0C_SQ_SEL_Z;
+   case PIPE_SWIZZLE_W:
+      return V_008F0C_SQ_SEL_W;
+   case PIPE_SWIZZLE_0:
+      return V_008F0C_SQ_SEL_0;
+   case PIPE_SWIZZLE_1:
+      return V_008F0C_SQ_SEL_1;
+   default: /* PIPE_SWIZZLE_X */
+      return V_008F0C_SQ_SEL_X;
+   }
 }
 
 static void
-radv_compose_swizzle(const struct util_format_description *desc,
-		     const VkComponentMapping *mapping, enum pipe_swizzle swizzle[4])
+radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
+                     enum pipe_swizzle swizzle[4])
 {
-	if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
-		/* 64-bit formats only support storage images and storage images
-		 * require identity component mappings. We use 32-bit
-		 * instructions to access 64-bit images, so we need a special
-		 * case here.
-		 *
-		 * The zw components are 1,0 so that they can be easily be used
-		 * by loads to create the w component, which has to be 0 for
-		 * NULL descriptors.
-		 */
-		swizzle[0] = PIPE_SWIZZLE_X;
-		swizzle[1] = PIPE_SWIZZLE_Y;
-		swizzle[2] = PIPE_SWIZZLE_1;
-		swizzle[3] = PIPE_SWIZZLE_0;
-	} else if (!mapping) {
-		for (unsigned i = 0; i < 4; i++)
-			swizzle[i] = desc->swizzle[i];
-	} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
-		const unsigned char swizzle_xxxx[4] = {
-			PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1
-		};
-		vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
-	} else {
-		vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
-	}
+   if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
+      /* 64-bit formats only support storage images and storage images
+       * require identity component mappings. We use 32-bit
+       * instructions to access 64-bit images, so we need a special
+       * case here.
+       *
+       * The zw components are 1,0 so that they can be easily be used
+       * by loads to create the w component, which has to be 0 for
+       * NULL descriptors.
+       */
+      swizzle[0] = PIPE_SWIZZLE_X;
+      swizzle[1] = PIPE_SWIZZLE_Y;
+      swizzle[2] = PIPE_SWIZZLE_1;
+      swizzle[3] = PIPE_SWIZZLE_0;
+   } else if (!mapping) {
+      for (unsigned i = 0; i < 4; i++)
+         swizzle[i] = desc->swizzle[i];
+   } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+      const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
+                                             PIPE_SWIZZLE_1};
+      vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
+   } else {
+      vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
+   }
 }
 
 static void
-radv_make_buffer_descriptor(struct radv_device *device,
-			    struct radv_buffer *buffer,
-			    VkFormat vk_format,
-			    unsigned offset,
-			    unsigned range,
-			    uint32_t *state)
+radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer,
+                            VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state)
 {
-	const struct util_format_description *desc;
-	unsigned stride;
-	uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
-	uint64_t va = gpu_address + buffer->offset;
-	unsigned num_format, data_format;
-	int first_non_void;
-	enum pipe_swizzle swizzle[4];
-	desc = vk_format_description(vk_format);
-	first_non_void = vk_format_get_first_non_void_channel(vk_format);
-	stride = desc->block.bits / 8;
-
-	radv_compose_swizzle(desc, NULL, swizzle);
-
-	va += offset;
-	state[0] = va;
-	state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
-		S_008F04_STRIDE(stride);
-
-	if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
-		range /= stride;
-	}
-
-	state[2] = range;
-	state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
-		   S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
-		   S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
-		   S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
-
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
-
-		/* OOB_SELECT chooses the out-of-bounds check:
-		 *  - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
-		 *  - 1: index >= NUM_RECORDS
-		 *  - 2: NUM_RECORDS == 0
-		 *  - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
-		 *       else: swizzle_address >= NUM_RECORDS
-		 */
-		state[3] |= S_008F0C_FORMAT(fmt->img_format) |
-			    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
-			    S_008F0C_RESOURCE_LEVEL(1);
-	} else {
-		num_format = radv_translate_buffer_numformat(desc, first_non_void);
-		data_format = radv_translate_buffer_dataformat(desc, first_non_void);
-
-		assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
-		assert(num_format != ~0);
-
-		state[3] |= S_008F0C_NUM_FORMAT(num_format) |
-			    S_008F0C_DATA_FORMAT(data_format);
-	}
+   const struct util_format_description *desc;
+   unsigned stride;
+   uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
+   uint64_t va = gpu_address + buffer->offset;
+   unsigned num_format, data_format;
+   int first_non_void;
+   enum pipe_swizzle swizzle[4];
+   desc = vk_format_description(vk_format);
+   first_non_void = vk_format_get_first_non_void_channel(vk_format);
+   stride = desc->block.bits / 8;
+
+   radv_compose_swizzle(desc, NULL, swizzle);
+
+   va += offset;
+   state[0] = va;
+   state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
+
+   if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
+      range /= stride;
+   }
+
+   state[2] = range;
+   state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
+              S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
+              S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
+              S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
+
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
+
+      /* OOB_SELECT chooses the out-of-bounds check:
+       *  - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
+       *  - 1: index >= NUM_RECORDS
+       *  - 2: NUM_RECORDS == 0
+       *  - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
+       *       else: swizzle_address >= NUM_RECORDS
+       */
+      state[3] |= S_008F0C_FORMAT(fmt->img_format) |
+                  S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
+                  S_008F0C_RESOURCE_LEVEL(1);
+   } else {
+      num_format = radv_translate_buffer_numformat(desc, first_non_void);
+      data_format = radv_translate_buffer_dataformat(desc, first_non_void);
+
+      assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
+      assert(num_format != ~0);
+
+      state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
+   }
 }
 
 static void
-si_set_mutable_tex_desc_fields(struct radv_device *device,
-			       struct radv_image *image,
-			       const struct legacy_surf_level *base_level_info,
-			       unsigned plane_id,
-			       unsigned base_level, unsigned first_level,
-			       unsigned block_width, bool is_stencil,
-			       bool is_storage_image, bool disable_compression, bool enable_write_compression,
-			       uint32_t *state)
+si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
+                               const struct legacy_surf_level *base_level_info, unsigned plane_id,
+                               unsigned base_level, unsigned first_level, unsigned block_width,
+                               bool is_stencil, bool is_storage_image, bool disable_compression,
+                               bool enable_write_compression, uint32_t *state)
 {
-	struct radv_image_plane *plane = &image->planes[plane_id];
-	uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
-	uint64_t va = gpu_address;
-	enum chip_class chip_class = device->physical_device->rad_info.chip_class;
-	uint64_t meta_va = 0;
-	if (chip_class >= GFX9) {
-		if (is_stencil)
-			va += plane->surface.u.gfx9.stencil_offset;
-		else
-			va += plane->surface.u.gfx9.surf_offset;
-	} else
-		va += base_level_info->offset;
-
-	state[0] = va >> 8;
-	if (chip_class >= GFX9 ||
-	    base_level_info->mode == RADEON_SURF_MODE_2D)
-		state[0] |= plane->surface.tile_swizzle;
-	state[1] &= C_008F14_BASE_ADDRESS_HI;
-	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
-
-	if (chip_class >= GFX8) {
-		state[6] &= C_008F28_COMPRESSION_EN;
-		state[7] = 0;
-		if (!disable_compression && radv_dcc_enabled(image, first_level)) {
-			meta_va = gpu_address + plane->surface.dcc_offset;
-			if (chip_class <= GFX8)
-				meta_va += base_level_info->dcc_offset;
-
-			unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
-			dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
-			meta_va |= dcc_tile_swizzle;
-		} else if (!disable_compression &&
-			   radv_image_is_tc_compat_htile(image)) {
-			meta_va = gpu_address +  plane->surface.htile_offset;
-		}
-
-		if (meta_va) {
-			state[6] |= S_008F28_COMPRESSION_EN(1);
-			if (chip_class <= GFX9)
-				state[7] = meta_va >> 8;
-		}
-	}
-
-	if (chip_class >= GFX10) {
-		state[3] &= C_00A00C_SW_MODE;
-
-		if (is_stencil) {
-			state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
-		} else {
-			state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
-		}
-
-		state[6] &= C_00A018_META_DATA_ADDRESS_LO &
-			    C_00A018_META_PIPE_ALIGNED;
-
-		if (meta_va) {
-			struct gfx9_surf_meta_flags meta = {
-				.rb_aligned = 1,
-				.pipe_aligned = 1,
-			};
-
-			if (plane->surface.dcc_offset)
-				meta = plane->surface.u.gfx9.dcc;
-
-			if (radv_dcc_enabled(image, first_level) &&
-			    is_storage_image && enable_write_compression)
-				state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
-
-			state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
-				    S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
-		}
-
-		state[7] = meta_va >> 16;
-	} else if (chip_class == GFX9) {
-		state[3] &= C_008F1C_SW_MODE;
-		state[4] &= C_008F20_PITCH;
-
-		if (is_stencil) {
-			state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
-			state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
-		} else {
-			state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
-			state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
-		}
-
-		state[5] &= C_008F24_META_DATA_ADDRESS &
-			    C_008F24_META_PIPE_ALIGNED &
-			    C_008F24_META_RB_ALIGNED;
-		if (meta_va) {
-			struct gfx9_surf_meta_flags meta = {
-				.rb_aligned = 1,
-				.pipe_aligned = 1,
-			};
-
-			if (plane->surface.dcc_offset)
-				meta = plane->surface.u.gfx9.dcc;
-
-			state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
-				    S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
-				    S_008F24_META_RB_ALIGNED(meta.rb_aligned);
-		}
-	} else {
-		/* GFX6-GFX8 */
-		unsigned pitch = base_level_info->nblk_x * block_width;
-		unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
-
-		state[3] &= C_008F1C_TILING_INDEX;
-		state[3] |= S_008F1C_TILING_INDEX(index);
-		state[4] &= C_008F20_PITCH;
-		state[4] |= S_008F20_PITCH(pitch - 1);
-	}
+   struct radv_image_plane *plane = &image->planes[plane_id];
+   uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
+   uint64_t va = gpu_address;
+   enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+   uint64_t meta_va = 0;
+   if (chip_class >= GFX9) {
+      if (is_stencil)
+         va += plane->surface.u.gfx9.stencil_offset;
+      else
+         va += plane->surface.u.gfx9.surf_offset;
+   } else
+      va += base_level_info->offset;
+
+   state[0] = va >> 8;
+   if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
+      state[0] |= plane->surface.tile_swizzle;
+   state[1] &= C_008F14_BASE_ADDRESS_HI;
+   state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
+
+   if (chip_class >= GFX8) {
+      state[6] &= C_008F28_COMPRESSION_EN;
+      state[7] = 0;
+      if (!disable_compression && radv_dcc_enabled(image, first_level)) {
+         meta_va = gpu_address + plane->surface.dcc_offset;
+         if (chip_class <= GFX8)
+            meta_va += base_level_info->dcc_offset;
+
+         unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
+         dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
+         meta_va |= dcc_tile_swizzle;
+      } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) {
+         meta_va = gpu_address + plane->surface.htile_offset;
+      }
+
+      if (meta_va) {
+         state[6] |= S_008F28_COMPRESSION_EN(1);
+         if (chip_class <= GFX9)
+            state[7] = meta_va >> 8;
+      }
+   }
+
+   if (chip_class >= GFX10) {
+      state[3] &= C_00A00C_SW_MODE;
+
+      if (is_stencil) {
+         state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
+      } else {
+         state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
+      }
+
+      state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED;
+
+      if (meta_va) {
+         struct gfx9_surf_meta_flags meta = {
+            .rb_aligned = 1,
+            .pipe_aligned = 1,
+         };
+
+         if (plane->surface.dcc_offset)
+            meta = plane->surface.u.gfx9.dcc;
+
+         if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression)
+            state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
+
+         state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
+                     S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
+      }
+
+      state[7] = meta_va >> 16;
+   } else if (chip_class == GFX9) {
+      state[3] &= C_008F1C_SW_MODE;
+      state[4] &= C_008F20_PITCH;
+
+      if (is_stencil) {
+         state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
+         state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
+      } else {
+         state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
+         state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
+      }
+
+      state[5] &=
+         C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
+      if (meta_va) {
+         struct gfx9_surf_meta_flags meta = {
+            .rb_aligned = 1,
+            .pipe_aligned = 1,
+         };
+
+         if (plane->surface.dcc_offset)
+            meta = plane->surface.u.gfx9.dcc;
+
+         state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
+                     S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
+                     S_008F24_META_RB_ALIGNED(meta.rb_aligned);
+      }
+   } else {
+      /* GFX6-GFX8 */
+      unsigned pitch = base_level_info->nblk_x * block_width;
+      unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
+
+      state[3] &= C_008F1C_TILING_INDEX;
+      state[3] |= S_008F1C_TILING_INDEX(index);
+      state[4] &= C_008F20_PITCH;
+      state[4] |= S_008F20_PITCH(pitch - 1);
+   }
 }
 
-static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
-			     unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
+static unsigned
+radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers,
+             unsigned nr_samples, bool is_storage_image, bool gfx9)
 {
-	if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
-		return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
-
-	/* GFX9 allocates 1D textures as 2D. */
-	if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
-		image_type = VK_IMAGE_TYPE_2D;
-	switch (image_type) {
-	case VK_IMAGE_TYPE_1D:
-		return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
-	case VK_IMAGE_TYPE_2D:
-		if (nr_samples > 1)
-			return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
-		else
-			return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
-	case VK_IMAGE_TYPE_3D:
-		if (view_type == VK_IMAGE_VIEW_TYPE_3D)
-			return V_008F1C_SQ_RSRC_IMG_3D;
-		else
-			return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
-	default:
-		unreachable("illegal image type");
-	}
+   if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
+      return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
+
+   /* GFX9 allocates 1D textures as 2D. */
+   if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
+      image_type = VK_IMAGE_TYPE_2D;
+   switch (image_type) {
+   case VK_IMAGE_TYPE_1D:
+      return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
+   case VK_IMAGE_TYPE_2D:
+      if (nr_samples > 1)
+         return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
+      else
+         return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
+   case VK_IMAGE_TYPE_3D:
+      if (view_type == VK_IMAGE_VIEW_TYPE_3D)
+         return V_008F1C_SQ_RSRC_IMG_3D;
+      else
+         return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
+   default:
+      unreachable("illegal image type");
+   }
 }
 
-static unsigned gfx9_border_color_swizzle(const enum pipe_swizzle swizzle[4])
+static unsigned
+gfx9_border_color_swizzle(const enum pipe_swizzle swizzle[4])
 {
-	unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
-
-	if (swizzle[3] == PIPE_SWIZZLE_X) {
-		/* For the pre-defined border color values (white, opaque
-		 * black, transparent black), the only thing that matters is
-		 * that the alpha channel winds up in the correct place
-		 * (because the RGB channels are all the same) so either of
-		 * these enumerations will work.
-		 */
-		if (swizzle[2] == PIPE_SWIZZLE_Y)
-			bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
-		else
-			bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
-	} else if (swizzle[0] == PIPE_SWIZZLE_X) {
-		if (swizzle[1] == PIPE_SWIZZLE_Y)
-			bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
-		else
-			bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
-	} else if (swizzle[1] == PIPE_SWIZZLE_X) {
-		bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
-	} else if (swizzle[2] == PIPE_SWIZZLE_X) {
-		bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
-	}
-
-	return bc_swizzle;
+   unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
+
+   if (swizzle[3] == PIPE_SWIZZLE_X) {
+      /* For the pre-defined border color values (white, opaque
+       * black, transparent black), the only thing that matters is
+       * that the alpha channel winds up in the correct place
+       * (because the RGB channels are all the same) so either of
+       * these enumerations will work.
+       */
+      if (swizzle[2] == PIPE_SWIZZLE_Y)
+         bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
+      else
+         bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
+   } else if (swizzle[0] == PIPE_SWIZZLE_X) {
+      if (swizzle[1] == PIPE_SWIZZLE_Y)
+         bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
+      else
+         bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
+   } else if (swizzle[1] == PIPE_SWIZZLE_X) {
+      bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
+   } else if (swizzle[2] == PIPE_SWIZZLE_X) {
+      bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
+   }
+
+   return bc_swizzle;
 }
 
-bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
+bool
+vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
 {
-	const struct util_format_description *desc = vk_format_description(format);
+   const struct util_format_description *desc = vk_format_description(format);
 
-	if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
-		return desc->swizzle[3] == PIPE_SWIZZLE_X;
+   if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
+      return desc->swizzle[3] == PIPE_SWIZZLE_X;
 
-	return radv_translate_colorswap(format, false) <= 1;
+   return radv_translate_colorswap(format, false) <= 1;
 }
 /**
  * Build the sampler view descriptor for a texture (GFX10).
  */
 static void
-gfx10_make_texture_descriptor(struct radv_device *device,
-			   struct radv_image *image,
-			   bool is_storage_image,
-			   VkImageViewType view_type,
-			   VkFormat vk_format,
-			   const VkComponentMapping *mapping,
-			   unsigned first_level, unsigned last_level,
-			   unsigned first_layer, unsigned last_layer,
-			   unsigned width, unsigned height, unsigned depth,
-			   uint32_t *state,
-			   uint32_t *fmask_state)
+gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
+                              bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
+                              const VkComponentMapping *mapping, unsigned first_level,
+                              unsigned last_level, unsigned first_layer, unsigned last_layer,
+                              unsigned width, unsigned height, unsigned depth, uint32_t *state,
+                              uint32_t *fmask_state)
 {
-	const struct util_format_description *desc;
-	enum pipe_swizzle swizzle[4];
-	unsigned img_format;
-	unsigned type;
-
-	desc = vk_format_description(vk_format);
-	img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
-
-	radv_compose_swizzle(desc, mapping, swizzle);
-
-	type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
-			    is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
-	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
-	        height = 1;
-		depth = image->info.array_size;
-	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
-		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
-		if (view_type != VK_IMAGE_VIEW_TYPE_3D)
-			depth = image->info.array_size;
-	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
-		depth = image->info.array_size / 6;
-
-	state[0] = 0;
-	state[1] = S_00A004_FORMAT(img_format) |
-		   S_00A004_WIDTH_LO(width - 1);
-	state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
-		   S_00A008_HEIGHT(height - 1) |
-		   S_00A008_RESOURCE_LEVEL(1);
-	state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
-		   S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
-		   S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
-		   S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
-		   S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
-					0 : first_level) |
-		   S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
-					util_logbase2(image->info.samples) :
-					last_level) |
-		   S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
-		   S_00A00C_TYPE(type);
-	/* Depth is the the last accessible layer on gfx9+. The hw doesn't need
-	 * to know the total number of layers.
-	 */
-	state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
-		   S_00A010_BASE_ARRAY(first_layer);
-	state[5] = S_00A014_ARRAY_PITCH(0) |
-		   S_00A014_MAX_MIP(image->info.samples > 1 ?
-				    util_logbase2(image->info.samples) :
-				    image->info.levels - 1) |
-		   S_00A014_PERF_MOD(4);
-	state[6] = 0;
-	state[7] = 0;
-
-	if (radv_dcc_enabled(image, first_level)) {
-		state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
-			    S_00A018_MAX_COMPRESSED_BLOCK_SIZE(image->planes[0].surface.u.gfx9.dcc.max_compressed_block_size) |
-			    S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
-	}
-
-	/* Initialize the sampler view for FMASK. */
-	if (fmask_state) {
-		if (radv_image_has_fmask(image)) {
-			uint64_t gpu_address = radv_buffer_get_va(image->bo);
-			uint32_t format;
-			uint64_t va;
-
-			assert(image->plane_count == 1);
-
-			va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
-
-			switch (image->info.samples) {
-			case 2:
-				format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
-				break;
-			case 4:
-				format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
-				break;
-			case 8:
-				format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
-				break;
-			default:
-				unreachable("invalid nr_samples");
-			}
-
-			fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
-			fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
-					S_00A004_FORMAT(format) |
-					S_00A004_WIDTH_LO(width - 1);
-			fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
-					S_00A008_HEIGHT(height - 1) |
-					S_00A008_RESOURCE_LEVEL(1);
-			fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
-					S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
-					S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
-					S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
-					S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
-					S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
-			fmask_state[4] = S_00A010_DEPTH(last_layer) |
-					S_00A010_BASE_ARRAY(first_layer);
-			fmask_state[5] = 0;
-			fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
-			fmask_state[7] = 0;
-
-			if (radv_image_is_tc_compat_cmask(image)) {
-				va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
-
-				fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
-				fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
-				fmask_state[7] |= va >> 16;
-			}
-		} else
-			memset(fmask_state, 0, 8 * 4);
-	}
+   const struct util_format_description *desc;
+   enum pipe_swizzle swizzle[4];
+   unsigned img_format;
+   unsigned type;
+
+   desc = vk_format_description(vk_format);
+   img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
+
+   radv_compose_swizzle(desc, mapping, swizzle);
+
+   type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
+                       is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
+   if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
+      height = 1;
+      depth = image->info.array_size;
+   } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
+      if (view_type != VK_IMAGE_VIEW_TYPE_3D)
+         depth = image->info.array_size;
+   } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
+      depth = image->info.array_size / 6;
+
+   state[0] = 0;
+   state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
+   state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
+              S_00A008_RESOURCE_LEVEL(1);
+   state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
+              S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
+              S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
+              S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
+              S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
+              S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
+                                                          : last_level) |
+              S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) | S_00A00C_TYPE(type);
+   /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
+    * to know the total number of layers.
+    */
+   state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
+              S_00A010_BASE_ARRAY(first_layer);
+   state[5] = S_00A014_ARRAY_PITCH(0) |
+              S_00A014_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
+                                                       : image->info.levels - 1) |
+              S_00A014_PERF_MOD(4);
+   state[6] = 0;
+   state[7] = 0;
+
+   if (radv_dcc_enabled(image, first_level)) {
+      state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
+                  S_00A018_MAX_COMPRESSED_BLOCK_SIZE(
+                     image->planes[0].surface.u.gfx9.dcc.max_compressed_block_size) |
+                  S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
+   }
+
+   /* Initialize the sampler view for FMASK. */
+   if (fmask_state) {
+      if (radv_image_has_fmask(image)) {
+         uint64_t gpu_address = radv_buffer_get_va(image->bo);
+         uint32_t format;
+         uint64_t va;
+
+         assert(image->plane_count == 1);
+
+         va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
+
+         switch (image->info.samples) {
+         case 2:
+            format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
+            break;
+         case 4:
+            format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
+            break;
+         case 8:
+            format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
+            break;
+         default:
+            unreachable("invalid nr_samples");
+         }
+
+         fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
+         fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
+                          S_00A004_WIDTH_LO(width - 1);
+         fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
+                          S_00A008_RESOURCE_LEVEL(1);
+         fmask_state[3] =
+            S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
+            S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
+            S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
+            S_00A00C_TYPE(
+               radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
+         fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
+         fmask_state[5] = 0;
+         fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
+         fmask_state[7] = 0;
+
+         if (radv_image_is_tc_compat_cmask(image)) {
+            va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
+
+            fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
+            fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
+            fmask_state[7] |= va >> 16;
+         }
+      } else
+         memset(fmask_state, 0, 8 * 4);
+   }
 }
 
 /**
  * Build the sampler view descriptor for a texture (SI-GFX9)
  */
 static void
-si_make_texture_descriptor(struct radv_device *device,
-			   struct radv_image *image,
-			   bool is_storage_image,
-			   VkImageViewType view_type,
-			   VkFormat vk_format,
-			   const VkComponentMapping *mapping,
-			   unsigned first_level, unsigned last_level,
-			   unsigned first_layer, unsigned last_layer,
-			   unsigned width, unsigned height, unsigned depth,
-			   uint32_t *state,
-			   uint32_t *fmask_state)
+si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
+                           bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
+                           const VkComponentMapping *mapping, unsigned first_level,
+                           unsigned last_level, unsigned first_layer, unsigned last_layer,
+                           unsigned width, unsigned height, unsigned depth, uint32_t *state,
+                           uint32_t *fmask_state)
 {
-	const struct util_format_description *desc;
-	enum pipe_swizzle swizzle[4];
-	int first_non_void;
-	unsigned num_format, data_format, type;
-
-	desc = vk_format_description(vk_format);
-
-	radv_compose_swizzle(desc, mapping, swizzle);
-
-	first_non_void = vk_format_get_first_non_void_channel(vk_format);
-
-	num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
-	if (num_format == ~0) {
-		num_format = 0;
-	}
-
-	data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
-	if (data_format == ~0) {
-		data_format = 0;
-	}
-
-	/* S8 with either Z16 or Z32 HTILE need a special format. */
-	if (device->physical_device->rad_info.chip_class == GFX9 &&
-	    vk_format == VK_FORMAT_S8_UINT &&
-	    radv_image_is_tc_compat_htile(image)) {
-		if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
-			data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
-		else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
-			data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
-	}
-	type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
-			    is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
-	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
-	        height = 1;
-		depth = image->info.array_size;
-	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
-		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
-		if (view_type != VK_IMAGE_VIEW_TYPE_3D)
-			depth = image->info.array_size;
-	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
-		depth = image->info.array_size / 6;
-
-	state[0] = 0;
-	state[1] = (S_008F14_DATA_FORMAT(data_format) |
-		    S_008F14_NUM_FORMAT(num_format));
-	state[2] = (S_008F18_WIDTH(width - 1) |
-		    S_008F18_HEIGHT(height - 1) |
-		    S_008F18_PERF_MOD(4));
-	state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
-		    S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
-		    S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
-		    S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
-		    S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
-					0 : first_level) |
-		    S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
-					util_logbase2(image->info.samples) :
-					last_level) |
-		    S_008F1C_TYPE(type));
-	state[4] = 0;
-	state[5] = S_008F24_BASE_ARRAY(first_layer);
-	state[6] = 0;
-	state[7] = 0;
-
-	if (device->physical_device->rad_info.chip_class == GFX9) {
-		unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
-
-		/* Depth is the last accessible layer on Gfx9.
-		 * The hw doesn't need to know the total number of layers.
-		 */
-		if (type == V_008F1C_SQ_RSRC_IMG_3D)
-			state[4] |= S_008F20_DEPTH(depth - 1);
-		else
-			state[4] |= S_008F20_DEPTH(last_layer);
-
-		state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
-		state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
-					     util_logbase2(image->info.samples) :
-					     image->info.levels - 1);
-	} else {
-		state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
-		state[4] |= S_008F20_DEPTH(depth - 1);
-		state[5] |= S_008F24_LAST_ARRAY(last_layer);
-	}
-	if (image->planes[0].surface.dcc_offset) {
-		state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
-	} else {
-		/* The last dword is unused by hw. The shader uses it to clear
-		 * bits in the first dword of sampler state.
-		 */
-		if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
-			if (first_level == last_level)
-				state[7] = C_008F30_MAX_ANISO_RATIO;
-			else
-				state[7] = 0xffffffff;
-		}
-	}
-
-	/* Initialize the sampler view for FMASK. */
-	if (fmask_state) {
-		if (radv_image_has_fmask(image)) {
-			uint32_t fmask_format;
-			uint64_t gpu_address = radv_buffer_get_va(image->bo);
-			uint64_t va;
-
-			assert(image->plane_count == 1);
-
-			va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
-
-			if (device->physical_device->rad_info.chip_class == GFX9) {
-				fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
-				switch (image->info.samples) {
-				case 2:
-					num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
-					break;
-				case 4:
-					num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
-					break;
-				case 8:
-					num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
-					break;
-				default:
-					unreachable("invalid nr_samples");
-				}
-			} else {
-				switch (image->info.samples) {
-				case 2:
-					fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
-					break;
-				case 4:
-					fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
-					break;
-				case 8:
-					fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
-					break;
-				default:
-					assert(0);
-					fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
-				}
-				num_format = V_008F14_IMG_NUM_FORMAT_UINT;
-			}
-
-			fmask_state[0] = va >> 8;
-			fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
-			fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
-				S_008F14_DATA_FORMAT(fmask_format) |
-				S_008F14_NUM_FORMAT(num_format);
-			fmask_state[2] = S_008F18_WIDTH(width - 1) |
-				S_008F18_HEIGHT(height - 1);
-			fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
-				S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
-				S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
-				S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
-				S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
-			fmask_state[4] = 0;
-			fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
-			fmask_state[6] = 0;
-			fmask_state[7] = 0;
-
-			if (device->physical_device->rad_info.chip_class == GFX9) {
-				fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
-				fmask_state[4] |= S_008F20_DEPTH(last_layer) |
-						S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
-				fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
-						S_008F24_META_RB_ALIGNED(1);
-
-				if (radv_image_is_tc_compat_cmask(image)) {
-					va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
-
-					fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
-					fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
-					fmask_state[7] |= va >> 8;
-				}
-			} else {
-				fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
-				fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
-					S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
-				fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
-
-				if (radv_image_is_tc_compat_cmask(image)) {
-					va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
-
-					fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
-					fmask_state[7] |= va >> 8;
-				}
-			}
-		} else
-			memset(fmask_state, 0, 8 * 4);
-	}
+   const struct util_format_description *desc;
+   enum pipe_swizzle swizzle[4];
+   int first_non_void;
+   unsigned num_format, data_format, type;
+
+   desc = vk_format_description(vk_format);
+
+   radv_compose_swizzle(desc, mapping, swizzle);
+
+   first_non_void = vk_format_get_first_non_void_channel(vk_format);
+
+   num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
+   if (num_format == ~0) {
+      num_format = 0;
+   }
+
+   data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
+   if (data_format == ~0) {
+      data_format = 0;
+   }
+
+   /* S8 with either Z16 or Z32 HTILE need a special format. */
+   if (device->physical_device->rad_info.chip_class == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
+       radv_image_is_tc_compat_htile(image)) {
+      if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
+         data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
+      else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
+         data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
+   }
+   type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
+                       is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
+   if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
+      height = 1;
+      depth = image->info.array_size;
+   } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
+      if (view_type != VK_IMAGE_VIEW_TYPE_3D)
+         depth = image->info.array_size;
+   } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
+      depth = image->info.array_size / 6;
+
+   state[0] = 0;
+   state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format));
+   state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
+   state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
+               S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
+               S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
+               S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
+               S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
+               S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
+                                                           : last_level) |
+               S_008F1C_TYPE(type));
+   state[4] = 0;
+   state[5] = S_008F24_BASE_ARRAY(first_layer);
+   state[6] = 0;
+   state[7] = 0;
+
+   if (device->physical_device->rad_info.chip_class == GFX9) {
+      unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
+
+      /* Depth is the last accessible layer on Gfx9.
+       * The hw doesn't need to know the total number of layers.
+       */
+      if (type == V_008F1C_SQ_RSRC_IMG_3D)
+         state[4] |= S_008F20_DEPTH(depth - 1);
+      else
+         state[4] |= S_008F20_DEPTH(last_layer);
+
+      state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
+      state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
+                                                           : image->info.levels - 1);
+   } else {
+      state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
+      state[4] |= S_008F20_DEPTH(depth - 1);
+      state[5] |= S_008F24_LAST_ARRAY(last_layer);
+   }
+   if (image->planes[0].surface.dcc_offset) {
+      state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
+   } else {
+      /* The last dword is unused by hw. The shader uses it to clear
+       * bits in the first dword of sampler state.
+       */
+      if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
+         if (first_level == last_level)
+            state[7] = C_008F30_MAX_ANISO_RATIO;
+         else
+            state[7] = 0xffffffff;
+      }
+   }
+
+   /* Initialize the sampler view for FMASK. */
+   if (fmask_state) {
+      if (radv_image_has_fmask(image)) {
+         uint32_t fmask_format;
+         uint64_t gpu_address = radv_buffer_get_va(image->bo);
+         uint64_t va;
+
+         assert(image->plane_count == 1);
+
+         va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
+
+         if (device->physical_device->rad_info.chip_class == GFX9) {
+            fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
+            switch (image->info.samples) {
+            case 2:
+               num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
+               break;
+            case 4:
+               num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
+               break;
+            case 8:
+               num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
+               break;
+            default:
+               unreachable("invalid nr_samples");
+            }
+         } else {
+            switch (image->info.samples) {
+            case 2:
+               fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
+               break;
+            case 4:
+               fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
+               break;
+            case 8:
+               fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
+               break;
+            default:
+               assert(0);
+               fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
+            }
+            num_format = V_008F14_IMG_NUM_FORMAT_UINT;
+         }
+
+         fmask_state[0] = va >> 8;
+         fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
+         fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) |
+                          S_008F14_NUM_FORMAT(num_format);
+         fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
+         fmask_state[3] =
+            S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
+            S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
+            S_008F1C_TYPE(
+               radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
+         fmask_state[4] = 0;
+         fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
+         fmask_state[6] = 0;
+         fmask_state[7] = 0;
+
+         if (device->physical_device->rad_info.chip_class == GFX9) {
+            fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
+            fmask_state[4] |= S_008F20_DEPTH(last_layer) |
+                              S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
+            fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1);
+
+            if (radv_image_is_tc_compat_cmask(image)) {
+               va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
+
+               fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
+               fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
+               fmask_state[7] |= va >> 8;
+            }
+         } else {
+            fmask_state[3] |=
+               S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
+            fmask_state[4] |=
+               S_008F20_DEPTH(depth - 1) |
+               S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
+            fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
+
+            if (radv_image_is_tc_compat_cmask(image)) {
+               va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
+
+               fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
+               fmask_state[7] |= va >> 8;
+            }
+         }
+      } else
+         memset(fmask_state, 0, 8 * 4);
+   }
 }
 
 static void
-radv_make_texture_descriptor(struct radv_device *device,
-			     struct radv_image *image,
-			     bool is_storage_image,
-			     VkImageViewType view_type,
-			     VkFormat vk_format,
-			     const VkComponentMapping *mapping,
-			     unsigned first_level, unsigned last_level,
-			     unsigned first_layer, unsigned last_layer,
-			     unsigned width, unsigned height, unsigned depth,
-			     uint32_t *state,
-			     uint32_t *fmask_state)
+radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
+                             bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
+                             const VkComponentMapping *mapping, unsigned first_level,
+                             unsigned last_level, unsigned first_layer, unsigned last_layer,
+                             unsigned width, unsigned height, unsigned depth, uint32_t *state,
+                             uint32_t *fmask_state)
 {
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		gfx10_make_texture_descriptor(device, image, is_storage_image,
-					      view_type, vk_format, mapping,
-					      first_level, last_level,
-					      first_layer, last_layer,
-					      width, height, depth,
-					      state, fmask_state);
-	} else {
-		si_make_texture_descriptor(device, image, is_storage_image,
-					   view_type, vk_format, mapping,
-					   first_level, last_level,
-					   first_layer, last_layer,
-					   width, height, depth,
-					   state, fmask_state);
-	}
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
+                                    first_level, last_level, first_layer, last_layer, width, height,
+                                    depth, state, fmask_state);
+   } else {
+      si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
+                                 first_level, last_level, first_layer, last_layer, width, height,
+                                 depth, state, fmask_state);
+   }
 }
 
 static void
-radv_query_opaque_metadata(struct radv_device *device,
-			   struct radv_image *image,
-			   struct radeon_bo_metadata *md)
+radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
+                           struct radeon_bo_metadata *md)
 {
-	static const VkComponentMapping fixedmapping;
-	uint32_t desc[8];
+   static const VkComponentMapping fixedmapping;
+   uint32_t desc[8];
 
-	assert(image->plane_count == 1);
+   assert(image->plane_count == 1);
 
-	radv_make_texture_descriptor(device, image, false,
-				     (VkImageViewType)image->type, image->vk_format,
-				     &fixedmapping, 0, image->info.levels - 1, 0,
-				     image->info.array_size - 1,
-				     image->info.width, image->info.height,
-				     image->info.depth,
-				     desc, NULL);
+   radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->type,
+                                image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0,
+                                image->info.array_size - 1, image->info.width, image->info.height,
+                                image->info.depth, desc, NULL);
 
-	si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
-				       image->planes[0].surface.blk_w, false, false, false, false, desc);
+   si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0,
+                                  0, image->planes[0].surface.blk_w, false, false, false, false,
+                                  desc);
 
-	ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
-				    image->info.levels, desc, &md->size_metadata, md->metadata);
+   ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
+                               image->info.levels, desc, &md->size_metadata, md->metadata);
 }
 
 void
-radv_init_metadata(struct radv_device *device,
-		   struct radv_image *image,
-		   struct radeon_bo_metadata *metadata)
+radv_init_metadata(struct radv_device *device, struct radv_image *image,
+                   struct radeon_bo_metadata *metadata)
 {
-	struct radeon_surf *surface = &image->planes[0].surface;
-
-	memset(metadata, 0, sizeof(*metadata));
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		uint64_t dcc_offset = image->offset + (surface->display_dcc_offset ?
-			surface->display_dcc_offset : surface->dcc_offset);
-		metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
-		metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
-		metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.display_dcc_pitch_max;
-		metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.dcc.independent_64B_blocks;
-		metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.dcc.independent_128B_blocks;
-		metadata->u.gfx9.dcc_max_compressed_block_size = surface->u.gfx9.dcc.max_compressed_block_size;
-		metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
-	} else {
-		metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
-			RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
-		metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
-			RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
-		metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
-		metadata->u.legacy.bankw = surface->u.legacy.bankw;
-		metadata->u.legacy.bankh = surface->u.legacy.bankh;
-		metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
-		metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
-		metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
-		metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
-		metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
-	}
-	radv_query_opaque_metadata(device, image, metadata);
+   struct radeon_surf *surface = &image->planes[0].surface;
+
+   memset(metadata, 0, sizeof(*metadata));
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      uint64_t dcc_offset =
+         image->offset +
+         (surface->display_dcc_offset ? surface->display_dcc_offset : surface->dcc_offset);
+      metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
+      metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
+      metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.display_dcc_pitch_max;
+      metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.dcc.independent_64B_blocks;
+      metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.dcc.independent_128B_blocks;
+      metadata->u.gfx9.dcc_max_compressed_block_size =
+         surface->u.gfx9.dcc.max_compressed_block_size;
+      metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
+   } else {
+      metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
+                                        ? RADEON_LAYOUT_TILED
+                                        : RADEON_LAYOUT_LINEAR;
+      metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D
+                                        ? RADEON_LAYOUT_TILED
+                                        : RADEON_LAYOUT_LINEAR;
+      metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
+      metadata->u.legacy.bankw = surface->u.legacy.bankw;
+      metadata->u.legacy.bankh = surface->u.legacy.bankh;
+      metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
+      metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
+      metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
+      metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
+      metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
+   }
+   radv_query_opaque_metadata(device, image, metadata);
 }
 
 void
-radv_image_override_offset_stride(struct radv_device *device,
-                                  struct radv_image *image,
+radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
                                   uint64_t offset, uint32_t stride)
 {
-	ac_surface_override_offset_stride(&device->physical_device->rad_info,
-					  &image->planes[0].surface,
-					  image->info.levels, offset, stride);
+   ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
+                                     image->info.levels, offset, stride);
 }
 
 static void
 radv_image_alloc_single_sample_cmask(const struct radv_device *device,
-                                     const struct radv_image *image,
-                                     struct radeon_surf *surf)
+                                     const struct radv_image *image, struct radeon_surf *surf)
 {
-	if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 ||
-	    image->info.levels > 1 || image->info.depth > 1 ||
-	    radv_image_has_dcc(image) ||
-	    !radv_image_use_fast_clear_for_image(device, image) ||
-	    (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
-		return;
-
-	assert(image->info.storage_samples == 1);
-
-	surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
-	surf->total_size = surf->cmask_offset + surf->cmask_size;
-	surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
+   if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 ||
+       image->info.depth > 1 || radv_image_has_dcc(image) ||
+       !radv_image_use_fast_clear_for_image(device, image) ||
+       (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
+      return;
+
+   assert(image->info.storage_samples == 1);
+
+   surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
+   surf->total_size = surf->cmask_offset + surf->cmask_size;
+   surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
 }
 
 static void
 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
 {
-	/* images with modifiers can be potentially imported */
-	if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
-		return;
-
-	if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) {
-		image->fce_pred_offset = image->size;
-		image->size += 8 * image->info.levels;
-	}
-
-	if (radv_image_use_dcc_predication(device, image)) {
-		image->dcc_pred_offset = image->size;
-		image->size += 8 * image->info.levels;
-	}
-
-	if (radv_image_has_dcc(image) || radv_image_has_cmask(image) ||
-	    radv_image_has_htile(image)) {
-		image->clear_value_offset = image->size;
-		image->size += 8 * image->info.levels;
-	}
-
-	if (radv_image_is_tc_compat_htile(image) &&
-	    device->physical_device->rad_info.has_tc_compat_zrange_bug) {
-		/* Metadata for the TC-compatible HTILE hardware bug which
-		 * have to be fixed by updating ZRANGE_PRECISION when doing
-		 * fast depth clears to 0.0f.
-		 */
-		image->tc_compat_zrange_offset = image->size;
-		image->size += image->info.levels * 4;
-	}
+   /* images with modifiers can be potentially imported */
+   if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+      return;
+
+   if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) {
+      image->fce_pred_offset = image->size;
+      image->size += 8 * image->info.levels;
+   }
+
+   if (radv_image_use_dcc_predication(device, image)) {
+      image->dcc_pred_offset = image->size;
+      image->size += 8 * image->info.levels;
+   }
+
+   if (radv_image_has_dcc(image) || radv_image_has_cmask(image) || radv_image_has_htile(image)) {
+      image->clear_value_offset = image->size;
+      image->size += 8 * image->info.levels;
+   }
+
+   if (radv_image_is_tc_compat_htile(image) &&
+       device->physical_device->rad_info.has_tc_compat_zrange_bug) {
+      /* Metadata for the TC-compatible HTILE hardware bug which
+       * have to be fixed by updating ZRANGE_PRECISION when doing
+       * fast depth clears to 0.0f.
+       */
+      image->tc_compat_zrange_offset = image->size;
+      image->size += image->info.levels * 4;
+   }
 }
 
-
 static void
 radv_image_reset_layout(struct radv_image *image)
 {
-	image->size = 0;
-	image->alignment = 1;
-
-	image->tc_compatible_cmask = 0;
-	image->fce_pred_offset = image->dcc_pred_offset = 0;
-	image->clear_value_offset = image->tc_compat_zrange_offset = 0;
-
-	for (unsigned i = 0; i < image->plane_count; ++i) {
-		VkFormat format = vk_format_get_plane_format(image->vk_format, i);
-
-		uint64_t flags = image->planes[i].surface.flags;
-		uint64_t modifier = image->planes[i].surface.modifier;
-		memset(image->planes + i, 0, sizeof(image->planes[i]));
-
-		image->planes[i].surface.flags = flags;
-		image->planes[i].surface.modifier = modifier;
-		image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
-		image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
-		image->planes[i].surface.bpe = vk_format_get_blocksize(vk_format_depth_only(format));
-
-		/* align byte per element on dword */
-		if (image->planes[i].surface.bpe == 3) {
-			image->planes[i].surface.bpe = 4;
-		}
-	}
+   image->size = 0;
+   image->alignment = 1;
+
+   image->tc_compatible_cmask = 0;
+   image->fce_pred_offset = image->dcc_pred_offset = 0;
+   image->clear_value_offset = image->tc_compat_zrange_offset = 0;
+
+   for (unsigned i = 0; i < image->plane_count; ++i) {
+      VkFormat format = vk_format_get_plane_format(image->vk_format, i);
+
+      uint64_t flags = image->planes[i].surface.flags;
+      uint64_t modifier = image->planes[i].surface.modifier;
+      memset(image->planes + i, 0, sizeof(image->planes[i]));
+
+      image->planes[i].surface.flags = flags;
+      image->planes[i].surface.modifier = modifier;
+      image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
+      image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
+      image->planes[i].surface.bpe = vk_format_get_blocksize(vk_format_depth_only(format));
+
+      /* align byte per element on dword */
+      if (image->planes[i].surface.bpe == 3) {
+         image->planes[i].surface.bpe = 4;
+      }
+   }
 }
 
 static VkResult
 radv_image_init_retile_map(struct radv_device *device, struct radv_image *image)
 {
-	/* If we do a relayout we have to free the old buffer. */
-	if(image->retile_map)
-		device->ws->buffer_destroy(device->ws, image->retile_map);
-
-	image->retile_map = NULL;
-	if (!radv_image_has_dcc(image) || !image->planes[0].surface.display_dcc_offset ||
-	    image->planes[0].surface.display_dcc_offset == image->planes[0].surface.dcc_offset)
-		return VK_SUCCESS;
-
-	uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface);
-	image->retile_map = device->ws->buffer_create(device->ws, retile_map_size, 4096,
-						      RADEON_DOMAIN_VRAM, RADEON_FLAG_READ_ONLY |
-						                          RADEON_FLAG_NO_INTERPROCESS_SHARING,
-						      RADV_BO_PRIORITY_METADATA);
-	if (!image->retile_map) {
-		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-	}
-	void *data = device->ws->buffer_map(image->retile_map);
-	if (!data) {
-		device->ws->buffer_destroy(device->ws, image->retile_map);
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-	}
-
-	memcpy(data, image->planes[0].surface.u.gfx9.dcc_retile_map, retile_map_size);
-	return VK_SUCCESS;
+   /* If we do a relayout we have to free the old buffer. */
+   if (image->retile_map)
+      device->ws->buffer_destroy(device->ws, image->retile_map);
+
+   image->retile_map = NULL;
+   if (!radv_image_has_dcc(image) || !image->planes[0].surface.display_dcc_offset ||
+       image->planes[0].surface.display_dcc_offset == image->planes[0].surface.dcc_offset)
+      return VK_SUCCESS;
+
+   uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface);
+   image->retile_map = device->ws->buffer_create(
+      device->ws, retile_map_size, 4096, RADEON_DOMAIN_VRAM,
+      RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_METADATA);
+   if (!image->retile_map) {
+      return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   }
+   void *data = device->ws->buffer_map(image->retile_map);
+   if (!data) {
+      device->ws->buffer_destroy(device->ws, image->retile_map);
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   memcpy(data, image->planes[0].surface.u.gfx9.dcc_retile_map, retile_map_size);
+   return VK_SUCCESS;
 }
 
 VkResult
-radv_image_create_layout(struct radv_device *device,
-                         struct radv_image_create_info create_info,
+radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
                          const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
                          struct radv_image *image)
 {
-	/* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
-	 * common internal case. */
-	create_info.vk_info = NULL;
-
-	struct ac_surf_info image_info = image->info;
-	VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
-	if (result != VK_SUCCESS)
-		return result;
-
-	assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
-
-	radv_image_reset_layout(image);
-
-	for (unsigned plane = 0; plane < image->plane_count; ++plane) {
-		struct ac_surf_info info = image_info;
-		uint64_t offset;
-		unsigned stride;
-
-		info.width = vk_format_get_plane_width(image->vk_format, plane, info.width);
-		info.height = vk_format_get_plane_height(image->vk_format, plane, info.height);
-
-		if (create_info.no_metadata_planes || image->plane_count > 1) {
-			image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC |
-			                                      RADEON_SURF_NO_FMASK |
-			                                      RADEON_SURF_NO_HTILE;
-		}
-
-		device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
-
-		if (create_info.bo_metadata && !mod_info &&
-		    !ac_surface_set_umd_metadata(&device->physical_device->rad_info,
-		                                 &image->planes[plane].surface,
-		                                 image_info.storage_samples, image_info.levels,
-		                                 create_info.bo_metadata->size_metadata,
-		                                 create_info.bo_metadata->metadata))
-			return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-
-		if (!create_info.no_metadata_planes && !create_info.bo_metadata &&
-		    image->plane_count == 1 && !mod_info)
-			radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
-
-		if (mod_info) {
-			if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
-			    !mod_info->pPlaneLayouts[plane].rowPitch)
-				return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
-
-			offset = mod_info->pPlaneLayouts[plane].offset;
-			stride  = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
-		} else {
-			offset = align(image->size, image->planes[plane].surface.alignment);
-			stride = 0;  /* 0 means no override */
-		}
-
-		if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
-		                                      &image->planes[plane].surface,
-		                                      image->info.levels,
-		                                      offset,
-		                                      stride))
-			return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
-
-		/* Validate DCC offsets in modifier layout. */
-		if (image->plane_count == 1 && mod_info) {
-			unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
-			if (mod_info->drmFormatModifierPlaneCount != mem_planes)
-				return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
-
-			for (unsigned i = 1; i < mem_planes; ++i) {
-				if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
-				                                &image->planes[plane].surface, i, 0) !=
-				    mod_info->pPlaneLayouts[i].offset)
-					return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
-			}
-		}
-
-		image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
-		image->alignment = MAX2(image->alignment, image->planes[plane].surface.alignment);
-
-		image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
-	}
-
-	image->tc_compatible_cmask = radv_image_has_cmask(image) &&
-	                             radv_use_tc_compat_cmask_for_image(device, image);
-
-	radv_image_alloc_values(device, image);
-
-	result = radv_image_init_retile_map(device, image);
-	if (result != VK_SUCCESS)
-		return result;
-
-	assert(image->planes[0].surface.surf_size);
-	assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
-	       ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
-	return VK_SUCCESS;
+   /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
+    * common internal case. */
+   create_info.vk_info = NULL;
+
+   struct ac_surf_info image_info = image->info;
+   VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
+   if (result != VK_SUCCESS)
+      return result;
+
+   assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
+
+   radv_image_reset_layout(image);
+
+   for (unsigned plane = 0; plane < image->plane_count; ++plane) {
+      struct ac_surf_info info = image_info;
+      uint64_t offset;
+      unsigned stride;
+
+      info.width = vk_format_get_plane_width(image->vk_format, plane, info.width);
+      info.height = vk_format_get_plane_height(image->vk_format, plane, info.height);
+
+      if (create_info.no_metadata_planes || image->plane_count > 1) {
+         image->planes[plane].surface.flags |=
+            RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
+      }
+
+      device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
+
+      if (create_info.bo_metadata && !mod_info &&
+          !ac_surface_set_umd_metadata(&device->physical_device->rad_info,
+                                       &image->planes[plane].surface, image_info.storage_samples,
+                                       image_info.levels, create_info.bo_metadata->size_metadata,
+                                       create_info.bo_metadata->metadata))
+         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+      if (!create_info.no_metadata_planes && !create_info.bo_metadata && image->plane_count == 1 &&
+          !mod_info)
+         radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
+
+      if (mod_info) {
+         if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
+             !mod_info->pPlaneLayouts[plane].rowPitch)
+            return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
+
+         offset = mod_info->pPlaneLayouts[plane].offset;
+         stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
+      } else {
+         offset = align(image->size, image->planes[plane].surface.alignment);
+         stride = 0; /* 0 means no override */
+      }
+
+      if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
+                                             &image->planes[plane].surface, image->info.levels,
+                                             offset, stride))
+         return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
+
+      /* Validate DCC offsets in modifier layout. */
+      if (image->plane_count == 1 && mod_info) {
+         unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
+         if (mod_info->drmFormatModifierPlaneCount != mem_planes)
+            return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
+
+         for (unsigned i = 1; i < mem_planes; ++i) {
+            if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
+                                            &image->planes[plane].surface, i,
+                                            0) != mod_info->pPlaneLayouts[i].offset)
+               return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
+         }
+      }
+
+      image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
+      image->alignment = MAX2(image->alignment, image->planes[plane].surface.alignment);
+
+      image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
+   }
+
+   image->tc_compatible_cmask =
+      radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
+
+   radv_image_alloc_values(device, image);
+
+   result = radv_image_init_retile_map(device, image);
+   if (result != VK_SUCCESS)
+      return result;
+
+   assert(image->planes[0].surface.surf_size);
+   assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
+          ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
+   return VK_SUCCESS;
 }
 
 static void
-radv_destroy_image(struct radv_device *device,
-		   const VkAllocationCallbacks *pAllocator,
-		   struct radv_image *image)
+radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+                   struct radv_image *image)
 {
-	if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
-		device->ws->buffer_destroy(device->ws, image->bo);
+   if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
+      device->ws->buffer_destroy(device->ws, image->bo);
 
-	if(image->retile_map)
-		device->ws->buffer_destroy(device->ws, image->retile_map);
+   if (image->retile_map)
+      device->ws->buffer_destroy(device->ws, image->retile_map);
 
-	if (image->owned_memory != VK_NULL_HANDLE) {
-		RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
-		radv_free_memory(device, pAllocator, mem);
-	}
+   if (image->owned_memory != VK_NULL_HANDLE) {
+      RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
+      radv_free_memory(device, pAllocator, mem);
+   }
 
-	vk_object_base_finish(&image->base);
-	vk_free2(&device->vk.alloc, pAllocator, image);
+   vk_object_base_finish(&image->base);
+   vk_free2(&device->vk.alloc, pAllocator, image);
 }
 
 static void
 radv_image_print_info(struct radv_device *device, struct radv_image *image)
 {
-	fprintf(stderr, "Image:\n");
-	fprintf(stderr, "  Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
-			"width=%" PRIu32 ", height=%" PRIu32 ", "
-			"offset=%" PRIu64 ", array_size=%" PRIu32 "\n",
-		image->size, image->alignment, image->info.width,
-		image->info.height, image->offset, image->info.array_size);
-	for (unsigned i = 0; i < image->plane_count; ++i) {
-		const struct radv_image_plane *plane = &image->planes[i];
-		const struct radeon_surf *surf = &plane->surface;
-		const struct util_format_description *desc =
-			vk_format_description(plane->format);
-		uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
-		                                              &plane->surface, 0, 0);
-
-		fprintf(stderr,
-			"  Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n",
-			i, desc->name, offset);
-
-		ac_surface_print_info(stderr,
-				      &device->physical_device->rad_info,
-				      surf);
-	}
+   fprintf(stderr, "Image:\n");
+   fprintf(stderr,
+           "  Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
+           "width=%" PRIu32 ", height=%" PRIu32 ", "
+           "offset=%" PRIu64 ", array_size=%" PRIu32 "\n",
+           image->size, image->alignment, image->info.width, image->info.height, image->offset,
+           image->info.array_size);
+   for (unsigned i = 0; i < image->plane_count; ++i) {
+      const struct radv_image_plane *plane = &image->planes[i];
+      const struct radeon_surf *surf = &plane->surface;
+      const struct util_format_description *desc = vk_format_description(plane->format);
+      uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
+                                                    &plane->surface, 0, 0);
+
+      fprintf(stderr, "  Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
+
+      ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
+   }
 }
 
 /**
  * Determine if the given image can be fast cleared.
  */
 static bool
-radv_image_can_fast_clear(const struct radv_device *device,
-			  const struct radv_image *image)
+radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
 {
-	if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
-		return false;
-
-	if (vk_format_is_color(image->vk_format)) {
-		if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
-			return false;
-
-		/* RB+ doesn't work with CMASK fast clear on Stoney. */
-		if (!radv_image_has_dcc(image) &&
-		    device->physical_device->rad_info.family == CHIP_STONEY)
-			return false;
-	} else {
-		if (!radv_image_has_htile(image))
-			return false;
-	}
-
-	/* Do not fast clears 3D images. */
-	if (image->type == VK_IMAGE_TYPE_3D)
-		return false;
-
-	return true;
+   if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
+      return false;
+
+   if (vk_format_is_color(image->vk_format)) {
+      if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
+         return false;
+
+      /* RB+ doesn't work with CMASK fast clear on Stoney. */
+      if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
+         return false;
+   } else {
+      if (!radv_image_has_htile(image))
+         return false;
+   }
+
+   /* Do not fast clears 3D images. */
+   if (image->type == VK_IMAGE_TYPE_3D)
+      return false;
+
+   return true;
 }
 
 static uint64_t
-radv_select_modifier(const struct radv_device *dev,
-                     VkFormat format,
+radv_select_modifier(const struct radv_device *dev, VkFormat format,
                      const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
 {
-	const struct radv_physical_device *pdev = dev->physical_device;
-	unsigned mod_count;
-
-	assert(mod_list->drmFormatModifierCount);
-
-	/* We can allow everything here as it does not affect order and the application
-	 * is only allowed to specify modifiers that we support. */
-	const struct ac_modifier_options modifier_options = {
-		.dcc = true,
-		.dcc_retile = true,
-	};
-
-	ac_get_supported_modifiers(&pdev->rad_info, &modifier_options,
-	                           vk_format_to_pipe_format(format), &mod_count, NULL);
-
-	uint64_t *mods = calloc(mod_count, sizeof(*mods));
-
-	/* If allocations fail, fall back to a dumber solution. */
-	if (!mods)
-		return mod_list->pDrmFormatModifiers[0];
-
-	ac_get_supported_modifiers(&pdev->rad_info, &modifier_options,
-	                           vk_format_to_pipe_format(format), &mod_count, mods);
-
-	for (unsigned i = 0; i < mod_count; ++i) {
-		for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
-			if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
-				free(mods);
-				return mod_list->pDrmFormatModifiers[j];
-			}
-		}
-	}
-	unreachable("App specified an invalid modifier");
+   const struct radv_physical_device *pdev = dev->physical_device;
+   unsigned mod_count;
+
+   assert(mod_list->drmFormatModifierCount);
+
+   /* We can allow everything here as it does not affect order and the application
+    * is only allowed to specify modifiers that we support. */
+   const struct ac_modifier_options modifier_options = {
+      .dcc = true,
+      .dcc_retile = true,
+   };
+
+   ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
+                              &mod_count, NULL);
+
+   uint64_t *mods = calloc(mod_count, sizeof(*mods));
+
+   /* If allocations fail, fall back to a dumber solution. */
+   if (!mods)
+      return mod_list->pDrmFormatModifiers[0];
+
+   ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
+                              &mod_count, mods);
+
+   for (unsigned i = 0; i < mod_count; ++i) {
+      for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
+         if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
+            free(mods);
+            return mod_list->pDrmFormatModifiers[j];
+         }
+      }
+   }
+   unreachable("App specified an invalid modifier");
 }
 
 VkResult
-radv_image_create(VkDevice _device,
-		  const struct radv_image_create_info *create_info,
-		  const VkAllocationCallbacks* alloc,
-		  VkImage *pImage)
+radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
+                  const VkAllocationCallbacks *alloc, VkImage *pImage)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
-	uint64_t modifier = DRM_FORMAT_MOD_INVALID;
-	struct radv_image *image = NULL;
-	VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
-	                                                      pCreateInfo->format);
-	const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
-		vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
-	const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
-		vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
-	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
-
-	const unsigned plane_count = vk_format_get_plane_count(format);
-	const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
-
-	radv_assert(pCreateInfo->mipLevels > 0);
-	radv_assert(pCreateInfo->arrayLayers > 0);
-	radv_assert(pCreateInfo->samples > 0);
-	radv_assert(pCreateInfo->extent.width > 0);
-	radv_assert(pCreateInfo->extent.height > 0);
-	radv_assert(pCreateInfo->extent.depth > 0);
-
-	image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
-			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (!image)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
-
-	image->type = pCreateInfo->imageType;
-	image->info.width = pCreateInfo->extent.width;
-	image->info.height = pCreateInfo->extent.height;
-	image->info.depth = pCreateInfo->extent.depth;
-	image->info.samples = pCreateInfo->samples;
-	image->info.storage_samples = pCreateInfo->samples;
-	image->info.array_size = pCreateInfo->arrayLayers;
-	image->info.levels = pCreateInfo->mipLevels;
-	image->info.num_channels = vk_format_get_nr_components(format);
-
-	image->vk_format = format;
-	image->tiling = pCreateInfo->tiling;
-	image->usage = pCreateInfo->usage;
-	image->flags = pCreateInfo->flags;
-	image->plane_count = plane_count;
-
-	image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
-	if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
-		for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
-			if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
-			    pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
-				image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
-			else
-				image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
-	}
-
-	const VkExternalMemoryImageCreateInfo *external_info =
-		vk_find_struct_const(pCreateInfo->pNext,
-		                     EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
-
-	image->shareable = external_info;
-	if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
-	    !(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
-	    pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
-		image->info.surf_index = &device->image_mrt_offset_counter;
-	}
-
-	if (mod_list)
-		modifier = radv_select_modifier(device, format, mod_list);
-	else if (explicit_mod)
-		modifier = explicit_mod->drmFormatModifier;
-
-	for (unsigned plane = 0; plane < image->plane_count; ++plane) {
-		image->planes[plane].surface.flags =
-			radv_get_surface_flags(device, image, plane, pCreateInfo, format);
-		image->planes[plane].surface.modifier = modifier;
-	}
-
-	bool delay_layout = external_info &&
-		(external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
-
-	if (delay_layout) {
-		*pImage = radv_image_to_handle(image);
-		assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
-		return VK_SUCCESS;
-	}
-
-	VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
-	if (result != VK_SUCCESS) {
-		radv_destroy_image(device, alloc, image);
-		return result;
-	}
-
-	if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
-		image->alignment = MAX2(image->alignment, 4096);
-		image->size = align64(image->size, image->alignment);
-		image->offset = 0;
-
-		image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
-		                                      0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
-		if (!image->bo) {
-			radv_destroy_image(device, alloc, image);
-			return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-		}
-	}
-
-	if (device->instance->debug_flags & RADV_DEBUG_IMG) {
-		radv_image_print_info(device, image);
-	}
-
-	*pImage = radv_image_to_handle(image);
-
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
+   uint64_t modifier = DRM_FORMAT_MOD_INVALID;
+   struct radv_image *image = NULL;
+   VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
+   const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
+      vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
+   const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
+      vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
+
+   const unsigned plane_count = vk_format_get_plane_count(format);
+   const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
+
+   radv_assert(pCreateInfo->mipLevels > 0);
+   radv_assert(pCreateInfo->arrayLayers > 0);
+   radv_assert(pCreateInfo->samples > 0);
+   radv_assert(pCreateInfo->extent.width > 0);
+   radv_assert(pCreateInfo->extent.height > 0);
+   radv_assert(pCreateInfo->extent.depth > 0);
+
+   image =
+      vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!image)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
+
+   image->type = pCreateInfo->imageType;
+   image->info.width = pCreateInfo->extent.width;
+   image->info.height = pCreateInfo->extent.height;
+   image->info.depth = pCreateInfo->extent.depth;
+   image->info.samples = pCreateInfo->samples;
+   image->info.storage_samples = pCreateInfo->samples;
+   image->info.array_size = pCreateInfo->arrayLayers;
+   image->info.levels = pCreateInfo->mipLevels;
+   image->info.num_channels = vk_format_get_nr_components(format);
+
+   image->vk_format = format;
+   image->tiling = pCreateInfo->tiling;
+   image->usage = pCreateInfo->usage;
+   image->flags = pCreateInfo->flags;
+   image->plane_count = plane_count;
+
+   image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
+   if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
+      for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
+         if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
+             pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
+            image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
+         else
+            image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
+   }
+
+   const VkExternalMemoryImageCreateInfo *external_info =
+      vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
+
+   image->shareable = external_info;
+   if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
+       !(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
+       pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+      image->info.surf_index = &device->image_mrt_offset_counter;
+   }
+
+   if (mod_list)
+      modifier = radv_select_modifier(device, format, mod_list);
+   else if (explicit_mod)
+      modifier = explicit_mod->drmFormatModifier;
+
+   for (unsigned plane = 0; plane < image->plane_count; ++plane) {
+      image->planes[plane].surface.flags =
+         radv_get_surface_flags(device, image, plane, pCreateInfo, format);
+      image->planes[plane].surface.modifier = modifier;
+   }
+
+   bool delay_layout =
+      external_info && (external_info->handleTypes &
+                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
+
+   if (delay_layout) {
+      *pImage = radv_image_to_handle(image);
+      assert(!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
+      return VK_SUCCESS;
+   }
+
+   VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
+   if (result != VK_SUCCESS) {
+      radv_destroy_image(device, alloc, image);
+      return result;
+   }
+
+   if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
+      image->alignment = MAX2(image->alignment, 4096);
+      image->size = align64(image->size, image->alignment);
+      image->offset = 0;
+
+      image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
+                                            RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
+      if (!image->bo) {
+         radv_destroy_image(device, alloc, image);
+         return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+      }
+   }
+
+   if (device->instance->debug_flags & RADV_DEBUG_IMG) {
+      radv_image_print_info(device, image);
+   }
+
+   *pImage = radv_image_to_handle(image);
+
+   return VK_SUCCESS;
 }
 
 static void
-radv_image_view_make_descriptor(struct radv_image_view *iview,
-				struct radv_device *device,
-				VkFormat vk_format,
-				const VkComponentMapping *components,
-				bool is_storage_image, bool disable_compression,
-				unsigned plane_id, unsigned descriptor_plane_id)
+radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
+                                VkFormat vk_format, const VkComponentMapping *components,
+                                bool is_storage_image, bool disable_compression, unsigned plane_id,
+                                unsigned descriptor_plane_id)
 {
-	struct radv_image *image = iview->image;
-	struct radv_image_plane *plane = &image->planes[plane_id];
-	bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
-	uint32_t blk_w;
-	union radv_descriptor *descriptor;
-	uint32_t hw_level = 0;
-
-	if (is_storage_image) {
-		descriptor = &iview->storage_descriptor;
-	} else {
-		descriptor = &iview->descriptor;
-	}
-
-	assert(vk_format_get_plane_count(vk_format) == 1);
-	assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
-	blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
-
-	if (device->physical_device->rad_info.chip_class >= GFX9)
-		hw_level = iview->base_mip;
-	radv_make_texture_descriptor(device, image, is_storage_image,
-				     iview->type,
-				     vk_format,
-				     components,
-				     hw_level, hw_level + iview->level_count - 1,
-				     iview->base_layer,
-				     iview->base_layer + iview->layer_count - 1,
-				     vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
-				     vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
-				     iview->extent.depth,
-				     descriptor->plane_descriptors[descriptor_plane_id],
-				     descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
-
-	const struct legacy_surf_level *base_level_info = NULL;
-	if (device->physical_device->rad_info.chip_class <= GFX9) {
-		if (is_stencil)
-			base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
-		else
-			base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
-	}
-
-	bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
-	if (is_storage_image && !enable_write_compression)
-		disable_compression = true;
-	si_set_mutable_tex_desc_fields(device, image,
-				       base_level_info,
-				       plane_id,
-				       iview->base_mip,
-				       iview->base_mip,
-				       blk_w, is_stencil, is_storage_image,
-				       disable_compression, enable_write_compression,
-				       descriptor->plane_descriptors[descriptor_plane_id]);
+   struct radv_image *image = iview->image;
+   struct radv_image_plane *plane = &image->planes[plane_id];
+   bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
+   uint32_t blk_w;
+   union radv_descriptor *descriptor;
+   uint32_t hw_level = 0;
+
+   if (is_storage_image) {
+      descriptor = &iview->storage_descriptor;
+   } else {
+      descriptor = &iview->descriptor;
+   }
+
+   assert(vk_format_get_plane_count(vk_format) == 1);
+   assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
+   blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
+           vk_format_get_blockwidth(vk_format);
+
+   if (device->physical_device->rad_info.chip_class >= GFX9)
+      hw_level = iview->base_mip;
+   radv_make_texture_descriptor(
+      device, image, is_storage_image, iview->type, vk_format, components, hw_level,
+      hw_level + iview->level_count - 1, iview->base_layer,
+      iview->base_layer + iview->layer_count - 1,
+      vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
+      vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
+      iview->extent.depth, descriptor->plane_descriptors[descriptor_plane_id],
+      descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
+
+   const struct legacy_surf_level *base_level_info = NULL;
+   if (device->physical_device->rad_info.chip_class <= GFX9) {
+      if (is_stencil)
+         base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
+      else
+         base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
+   }
+
+   bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
+   if (is_storage_image && !enable_write_compression)
+      disable_compression = true;
+   si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip,
+                                  iview->base_mip, blk_w, is_stencil, is_storage_image,
+                                  disable_compression, enable_write_compression,
+                                  descriptor->plane_descriptors[descriptor_plane_id]);
 }
 
 static unsigned
 radv_plane_from_aspect(VkImageAspectFlags mask)
 {
-	switch(mask) {
-	case VK_IMAGE_ASPECT_PLANE_1_BIT:
-	case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
-		return 1;
-	case VK_IMAGE_ASPECT_PLANE_2_BIT:
-	case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
-		return 2;
-	case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
-		return 3;
-	default:
-		return 0;
-	}
+   switch (mask) {
+   case VK_IMAGE_ASPECT_PLANE_1_BIT:
+   case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
+      return 1;
+   case VK_IMAGE_ASPECT_PLANE_2_BIT:
+   case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
+      return 2;
+   case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
+      return 3;
+   default:
+      return 0;
+   }
 }
 
 VkFormat
 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
 {
-	switch(mask) {
-	case VK_IMAGE_ASPECT_PLANE_0_BIT:
-		return image->planes[0].format;
-	case VK_IMAGE_ASPECT_PLANE_1_BIT:
-		return image->planes[1].format;
-	case VK_IMAGE_ASPECT_PLANE_2_BIT:
-		return image->planes[2].format;
-	case VK_IMAGE_ASPECT_STENCIL_BIT:
-		return vk_format_stencil_only(image->vk_format);
-	case VK_IMAGE_ASPECT_DEPTH_BIT:
-		return vk_format_depth_only(image->vk_format);
-	case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
-		return vk_format_depth_only(image->vk_format);
-	default:
-		return image->vk_format;
-	}
+   switch (mask) {
+   case VK_IMAGE_ASPECT_PLANE_0_BIT:
+      return image->planes[0].format;
+   case VK_IMAGE_ASPECT_PLANE_1_BIT:
+      return image->planes[1].format;
+   case VK_IMAGE_ASPECT_PLANE_2_BIT:
+      return image->planes[2].format;
+   case VK_IMAGE_ASPECT_STENCIL_BIT:
+      return vk_format_stencil_only(image->vk_format);
+   case VK_IMAGE_ASPECT_DEPTH_BIT:
+      return vk_format_depth_only(image->vk_format);
+   case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
+      return vk_format_depth_only(image->vk_format);
+   default:
+      return image->vk_format;
+   }
 }
 
 /**
@@ -1894,495 +1775,458 @@ radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
  */
 static bool
 radv_image_view_can_fast_clear(const struct radv_device *device,
-			       const struct radv_image_view *iview)
+                               const struct radv_image_view *iview)
 {
-	struct radv_image *image;
+   struct radv_image *image;
 
-	if (!iview)
-		return false;
-	image = iview->image;
+   if (!iview)
+      return false;
+   image = iview->image;
 
-	/* Only fast clear if the image itself can be fast cleared. */
-	if (!radv_image_can_fast_clear(device, image))
-		return false;
+   /* Only fast clear if the image itself can be fast cleared. */
+   if (!radv_image_can_fast_clear(device, image))
+      return false;
 
-	/* Only fast clear if all layers are bound. */
-	if (iview->base_layer > 0 ||
-	    iview->layer_count != image->info.array_size)
-		return false;
+   /* Only fast clear if all layers are bound. */
+   if (iview->base_layer > 0 || iview->layer_count != image->info.array_size)
+      return false;
 
-	/* Only fast clear if the view covers the whole image. */
-	if (!radv_image_extent_compare(image, &iview->extent))
-		return false;
+   /* Only fast clear if the view covers the whole image. */
+   if (!radv_image_extent_compare(image, &iview->extent))
+      return false;
 
-	return true;
+   return true;
 }
 
 void
-radv_image_view_init(struct radv_image_view *iview,
-		     struct radv_device *device,
-		     const VkImageViewCreateInfo* pCreateInfo,
-		     const struct radv_image_view_extra_create_info* extra_create_info)
+radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
+                     const VkImageViewCreateInfo *pCreateInfo,
+                     const struct radv_image_view_extra_create_info *extra_create_info)
 {
-	RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
-	const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
-
-	switch (image->type) {
-	case VK_IMAGE_TYPE_1D:
-	case VK_IMAGE_TYPE_2D:
-		assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
-		break;
-	case VK_IMAGE_TYPE_3D:
-		assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
-		       <= radv_minify(image->info.depth, range->baseMipLevel));
-		break;
-	default:
-		unreachable("bad VkImageType");
-	}
-	iview->image = image;
-	iview->bo = image->bo;
-	iview->type = pCreateInfo->viewType;
-	iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
-	iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
-	iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
-
-	iview->base_layer = range->baseArrayLayer;
-	iview->layer_count = radv_get_layerCount(image, range);
-	iview->base_mip = range->baseMipLevel;
-	iview->level_count = radv_get_levelCount(image, range);
-
-	iview->vk_format = pCreateInfo->format;
-
-	/* If the image has an Android external format, pCreateInfo->format will be
-	 * VK_FORMAT_UNDEFINED. */
-	if (iview->vk_format == VK_FORMAT_UNDEFINED)
-		iview->vk_format = image->vk_format;
-
-	if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
-		iview->vk_format = vk_format_stencil_only(iview->vk_format);
-	} else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
-		iview->vk_format = vk_format_depth_only(iview->vk_format);
-	}
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		iview->extent = (VkExtent3D) {
-			.width = image->info.width,
-			.height = image->info.height,
-			.depth = image->info.depth,
-		};
-	} else {
-		iview->extent = (VkExtent3D) {
-			.width  = radv_minify(image->info.width , range->baseMipLevel),
-			.height = radv_minify(image->info.height, range->baseMipLevel),
-			.depth  = radv_minify(image->info.depth , range->baseMipLevel),
-		};
-	}
-
-	if (iview->vk_format != image->planes[iview->plane_id].format) {
-		unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
-		unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
-		unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
-		unsigned img_bh = vk_format_get_blockheight(image->vk_format);
-
-		iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
-		iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
-
-		/* Comment ported from amdvlk -
-		 * If we have the following image:
-		 *              Uncompressed pixels   Compressed block sizes (4x4)
-		 *      mip0:       22 x 22                   6 x 6
-		 *      mip1:       11 x 11                   3 x 3
-		 *      mip2:        5 x  5                   2 x 2
-		 *      mip3:        2 x  2                   1 x 1
-		 *      mip4:        1 x  1                   1 x 1
-		 *
-		 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
-		 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
-		 * divide-by-two integer math):
-		 *      mip0:  6x6
-		 *      mip1:  3x3
-		 *      mip2:  1x1
-		 *      mip3:  1x1
-		 *
-		 * This means that mip2 will be missing texels.
-		 *
-		 * Fix this by calculating the base mip's width and height, then convert
-		 * that, and round it back up to get the level 0 size. Clamp the
-		 * converted size between the original values, and the physical extent
-		 * of the base mipmap.
-		 *
-		 * On GFX10 we have to take care to not go over the physical extent
-		 * of the base mipmap as otherwise the GPU computes a different layout.
-		 * Note that the GPU does use the same base-mip dimensions for both a
-		 * block compatible format and the compressed format, so even if we take
-		 * the plain converted dimensions the physical layout is correct.
-		 */
-		if (device->physical_device->rad_info.chip_class >= GFX9 &&
-		    vk_format_is_compressed(image->vk_format) &&
-		    !vk_format_is_compressed(iview->vk_format)) {
-			/* If we have multiple levels in the view we should ideally take the last level,
-			 * but the mip calculation has a max(..., 1) so walking back to the base mip in an
-			 * useful way is hard. */
-			if (iview->level_count > 1) {
-				iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
-				iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
-			} else {
-				unsigned lvl_width  = radv_minify(image->info.width , range->baseMipLevel);
-				unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
-
-				lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
-				lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
-
-				lvl_width <<= range->baseMipLevel;
-				lvl_height <<= range->baseMipLevel;
-
-				iview->extent.width = CLAMP(lvl_width, iview->extent.width,
-							    iview->image->planes[0].surface.u.gfx9.base_mip_width);
-				iview->extent.height = CLAMP(lvl_height, iview->extent.height,
-							     iview->image->planes[0].surface.u.gfx9.base_mip_height);
-			}
-		 }
-	}
-
-	iview->support_fast_clear =
-		radv_image_view_can_fast_clear(device, iview);
-
-	bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
-	for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
-		VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
-		radv_image_view_make_descriptor(iview, device, format,
-						&pCreateInfo->components,
-						false, disable_compression,
-						iview->plane_id + i, i);
-		radv_image_view_make_descriptor(iview, device,
-						format, &pCreateInfo->components,
-						true, disable_compression,
-						iview->plane_id + i, i);
-	}
+   RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
+   const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
+
+   switch (image->type) {
+   case VK_IMAGE_TYPE_1D:
+   case VK_IMAGE_TYPE_2D:
+      assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
+             image->info.array_size);
+      break;
+   case VK_IMAGE_TYPE_3D:
+      assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
+             radv_minify(image->info.depth, range->baseMipLevel));
+      break;
+   default:
+      unreachable("bad VkImageType");
+   }
+   iview->image = image;
+   iview->bo = image->bo;
+   iview->type = pCreateInfo->viewType;
+   iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
+   iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
+   iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 &&
+                            iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
+
+   iview->base_layer = range->baseArrayLayer;
+   iview->layer_count = radv_get_layerCount(image, range);
+   iview->base_mip = range->baseMipLevel;
+   iview->level_count = radv_get_levelCount(image, range);
+
+   iview->vk_format = pCreateInfo->format;
+
+   /* If the image has an Android external format, pCreateInfo->format will be
+    * VK_FORMAT_UNDEFINED. */
+   if (iview->vk_format == VK_FORMAT_UNDEFINED)
+      iview->vk_format = image->vk_format;
+
+   if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+      iview->vk_format = vk_format_stencil_only(iview->vk_format);
+   } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+      iview->vk_format = vk_format_depth_only(iview->vk_format);
+   }
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      iview->extent = (VkExtent3D){
+         .width = image->info.width,
+         .height = image->info.height,
+         .depth = image->info.depth,
+      };
+   } else {
+      iview->extent = (VkExtent3D){
+         .width = radv_minify(image->info.width, range->baseMipLevel),
+         .height = radv_minify(image->info.height, range->baseMipLevel),
+         .depth = radv_minify(image->info.depth, range->baseMipLevel),
+      };
+   }
+
+   if (iview->vk_format != image->planes[iview->plane_id].format) {
+      unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
+      unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
+      unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
+      unsigned img_bh = vk_format_get_blockheight(image->vk_format);
+
+      iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
+      iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
+
+      /* Comment ported from amdvlk -
+       * If we have the following image:
+       *              Uncompressed pixels   Compressed block sizes (4x4)
+       *      mip0:       22 x 22                   6 x 6
+       *      mip1:       11 x 11                   3 x 3
+       *      mip2:        5 x  5                   2 x 2
+       *      mip3:        2 x  2                   1 x 1
+       *      mip4:        1 x  1                   1 x 1
+       *
+       * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and
+       * the HW is calculating the degradation of the block sizes down the mip-chain as follows
+       * (straight-up divide-by-two integer math): mip0:  6x6 mip1:  3x3 mip2:  1x1 mip3:  1x1
+       *
+       * This means that mip2 will be missing texels.
+       *
+       * Fix this by calculating the base mip's width and height, then convert
+       * that, and round it back up to get the level 0 size. Clamp the
+       * converted size between the original values, and the physical extent
+       * of the base mipmap.
+       *
+       * On GFX10 we have to take care to not go over the physical extent
+       * of the base mipmap as otherwise the GPU computes a different layout.
+       * Note that the GPU does use the same base-mip dimensions for both a
+       * block compatible format and the compressed format, so even if we take
+       * the plain converted dimensions the physical layout is correct.
+       */
+      if (device->physical_device->rad_info.chip_class >= GFX9 &&
+          vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) {
+         /* If we have multiple levels in the view we should ideally take the last level,
+          * but the mip calculation has a max(..., 1) so walking back to the base mip in an
+          * useful way is hard. */
+         if (iview->level_count > 1) {
+            iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
+            iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
+         } else {
+            unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel);
+            unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
+
+            lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
+            lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
+
+            lvl_width <<= range->baseMipLevel;
+            lvl_height <<= range->baseMipLevel;
+
+            iview->extent.width = CLAMP(lvl_width, iview->extent.width,
+                                        iview->image->planes[0].surface.u.gfx9.base_mip_width);
+            iview->extent.height = CLAMP(lvl_height, iview->extent.height,
+                                         iview->image->planes[0].surface.u.gfx9.base_mip_height);
+         }
+      }
+   }
+
+   iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview);
+
+   bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false;
+   for (unsigned i = 0;
+        i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
+      VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
+      radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, false,
+                                      disable_compression, iview->plane_id + i, i);
+      radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, true,
+                                      disable_compression, iview->plane_id + i, i);
+   }
 }
 
-bool radv_layout_is_htile_compressed(const struct radv_device *device,
-				     const struct radv_image *image,
-                                     VkImageLayout layout,
-				     bool in_render_loop,
-                                     unsigned queue_mask)
+bool
+radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
+                                VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
 {
-	switch (layout) {
-	case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
-	case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR:
-	case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR:
-		return radv_image_has_htile(image);
-	case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
-		return radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL);
-	case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
-	case VK_IMAGE_LAYOUT_GENERAL:
-		/* It should be safe to enable TC-compat HTILE with
-		 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
-		 * if the image doesn't have the storage bit set. This
-		 * improves performance for apps that use GENERAL for the main
-		 * depth pass because this allows compression and this reduces
-		 * the number of decompressions from/to GENERAL.
-		 */
-		/* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
-		 * queue is likely broken for eg. depth/stencil copies.
-		 */
-		if (radv_image_is_tc_compat_htile(image) &&
-		    queue_mask & (1u << RADV_QUEUE_GENERAL) &&
-		    !in_render_loop &&
-		    !device->instance->disable_tc_compat_htile_in_general) {
-			/* GFX10+ supports compressed writes to HTILE. */
-			return device->physical_device->rad_info.chip_class >= GFX10 ||
-			       !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT);
-		} else {
-			return false;
-		}
-	case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
-		if (radv_image_is_tc_compat_htile(image) ||
-		    (radv_image_has_htile(image) &&
-		     !(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
-				       VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
-			/* Keep HTILE compressed if the image is only going to
-			 * be used as a depth/stencil read-only attachment.
-			 */
-			return true;
-		} else {
-			return false;
-		}
-		break;
-	default:
-	    return radv_image_is_tc_compat_htile(image);
-	}
+   switch (layout) {
+   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+   case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR:
+   case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR:
+      return radv_image_has_htile(image);
+   case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+      return radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL);
+   case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
+   case VK_IMAGE_LAYOUT_GENERAL:
+      /* It should be safe to enable TC-compat HTILE with
+       * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
+       * if the image doesn't have the storage bit set. This
+       * improves performance for apps that use GENERAL for the main
+       * depth pass because this allows compression and this reduces
+       * the number of decompressions from/to GENERAL.
+       */
+      /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
+       * queue is likely broken for eg. depth/stencil copies.
+       */
+      if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
+          !in_render_loop && !device->instance->disable_tc_compat_htile_in_general) {
+         /* GFX10+ supports compressed writes to HTILE. */
+         return device->physical_device->rad_info.chip_class >= GFX10 ||
+                !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT);
+      } else {
+         return false;
+      }
+   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+      if (radv_image_is_tc_compat_htile(image) ||
+          (radv_image_has_htile(image) &&
+           !(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
+         /* Keep HTILE compressed if the image is only going to
+          * be used as a depth/stencil read-only attachment.
+          */
+         return true;
+      } else {
+         return false;
+      }
+      break;
+   default:
+      return radv_image_is_tc_compat_htile(image);
+   }
 }
 
-bool radv_layout_can_fast_clear(const struct radv_device *device,
-				const struct radv_image *image,
-			        VkImageLayout layout,
-				bool in_render_loop,
-			        unsigned queue_mask)
+bool
+radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
+                           VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
 {
-	if (radv_image_has_dcc(image) &&
-	    !radv_layout_dcc_compressed(device, image, layout, in_render_loop, queue_mask))
-		return false;
+   if (radv_image_has_dcc(image) &&
+       !radv_layout_dcc_compressed(device, image, layout, in_render_loop, queue_mask))
+      return false;
 
-	if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
-		return false;
+   if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
+      return false;
 
-	return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
-	       queue_mask == (1u << RADV_QUEUE_GENERAL);
+   return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+          queue_mask == (1u << RADV_QUEUE_GENERAL);
 }
 
-bool radv_layout_dcc_compressed(const struct radv_device *device,
-				const struct radv_image *image,
-			        VkImageLayout layout,
-				bool in_render_loop,
-			        unsigned queue_mask)
+bool
+radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
+                           VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
 {
-	/* If the image is read-only, we can always just keep it compressed */
-	if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS) &&
-	    radv_image_has_dcc(image))
-		return false;
-
-	/* Don't compress compute transfer dst when image stores are not supported. */
-	if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ||
-	     layout == VK_IMAGE_LAYOUT_GENERAL) &&
-	    (queue_mask & (1u << RADV_QUEUE_COMPUTE)) &&
-	    !radv_image_use_dcc_image_stores(device, image))
-		return false;
-
-	return radv_image_has_dcc(image) &&
-	       (device->physical_device->rad_info.chip_class >= GFX10 ||
-	        layout != VK_IMAGE_LAYOUT_GENERAL);
+   /* If the image is read-only, we can always just keep it compressed */
+   if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS) && radv_image_has_dcc(image))
+      return false;
+
+   /* Don't compress compute transfer dst when image stores are not supported. */
+   if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
+       (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
+      return false;
+
+   return radv_image_has_dcc(image) && (device->physical_device->rad_info.chip_class >= GFX10 ||
+                                        layout != VK_IMAGE_LAYOUT_GENERAL);
 }
 
-bool radv_layout_fmask_compressed(const struct radv_device *device,
-				  const struct radv_image *image,
-				  VkImageLayout layout,
-				  unsigned queue_mask)
+bool
+radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
+                             VkImageLayout layout, unsigned queue_mask)
 {
-	return radv_image_has_fmask(image) &&
-	       layout != VK_IMAGE_LAYOUT_GENERAL &&
-	       queue_mask == (1u << RADV_QUEUE_GENERAL);
+   return radv_image_has_fmask(image) && layout != VK_IMAGE_LAYOUT_GENERAL &&
+          queue_mask == (1u << RADV_QUEUE_GENERAL);
 }
 
-unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
+unsigned
+radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
 {
-	if (!image->exclusive)
-		return image->queue_family_mask;
-	if (family == VK_QUEUE_FAMILY_EXTERNAL ||
-	    family == VK_QUEUE_FAMILY_FOREIGN_EXT)
-		return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) |
-		        (1u << RADV_QUEUE_FOREIGN);
-	if (family == VK_QUEUE_FAMILY_IGNORED)
-		return 1u << queue_family;
-	return 1u << family;
+   if (!image->exclusive)
+      return image->queue_family_mask;
+   if (family == VK_QUEUE_FAMILY_EXTERNAL || family == VK_QUEUE_FAMILY_FOREIGN_EXT)
+      return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
+   if (family == VK_QUEUE_FAMILY_IGNORED)
+      return 1u << queue_family;
+   return 1u << family;
 }
 
 VkResult
-radv_CreateImage(VkDevice device,
-		 const VkImageCreateInfo *pCreateInfo,
-		 const VkAllocationCallbacks *pAllocator,
-		 VkImage *pImage)
+radv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo,
+                 const VkAllocationCallbacks *pAllocator, VkImage *pImage)
 {
 #ifdef ANDROID
-	const VkNativeBufferANDROID *gralloc_info =
-		vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
+   const VkNativeBufferANDROID *gralloc_info =
+      vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
 
-	if (gralloc_info)
-		return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
-		                              pAllocator, pImage);
+   if (gralloc_info)
+      return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage);
 #endif
 
-	const struct wsi_image_create_info *wsi_info =
-		vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
-	bool scanout = wsi_info && wsi_info->scanout;
-
-	return radv_image_create(device,
-				 &(struct radv_image_create_info) {
-					 .vk_info = pCreateInfo,
-					 .scanout = scanout,
-				 },
-				 pAllocator,
-				 pImage);
+   const struct wsi_image_create_info *wsi_info =
+      vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
+   bool scanout = wsi_info && wsi_info->scanout;
+
+   return radv_image_create(device,
+                            &(struct radv_image_create_info){
+                               .vk_info = pCreateInfo,
+                               .scanout = scanout,
+                            },
+                            pAllocator, pImage);
 }
 
 void
-radv_DestroyImage(VkDevice _device, VkImage _image,
-		  const VkAllocationCallbacks *pAllocator)
+radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_image, image, _image);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_image, image, _image);
 
-	if (!image)
-		return;
+   if (!image)
+      return;
 
-	radv_destroy_image(device, pAllocator, image);
+   radv_destroy_image(device, pAllocator, image);
 }
 
-void radv_GetImageSubresourceLayout(
-	VkDevice                                    _device,
-	VkImage                                     _image,
-	const VkImageSubresource*                   pSubresource,
-	VkSubresourceLayout*                        pLayout)
+void
+radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
+                               const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout)
 {
-	RADV_FROM_HANDLE(radv_image, image, _image);
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	int level = pSubresource->mipLevel;
-	int layer = pSubresource->arrayLayer;
-
-	unsigned plane_id = 0;
-	if (vk_format_get_plane_count(image->vk_format) > 1)
-		plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
-
-	struct radv_image_plane *plane = &image->planes[plane_id];
-	struct radeon_surf *surface = &plane->surface;
-
-	if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
-		unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
-
-		assert(level == 0);
-		assert(layer == 0);
-
-		pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
-			                                              surface, mem_plane_id, 0);
-		pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class,
-			                                                surface, mem_plane_id);
-		pLayout->arrayPitch = 0;
-		pLayout->depthPitch = 0;
-		pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
-	} else if (device->physical_device->rad_info.chip_class >= GFX9) {
-		uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
-
-		pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
-		                                              &plane->surface, 0, layer) + level_offset;
-		if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
-		    image->vk_format == VK_FORMAT_R32G32B32_SINT ||
-		    image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
-			/* Adjust the number of bytes between each row because
-			 * the pitch is actually the number of components per
-			 * row.
-			 */
-			pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
-		} else {
-			uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
-
-			assert(util_is_power_of_two_nonzero(surface->bpe));
-			pLayout->rowPitch = pitch * surface->bpe;
-		}
-
-		pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
-		pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
-		pLayout->size = surface->u.gfx9.surf_slice_size;
-		if (image->type == VK_IMAGE_TYPE_3D)
-			pLayout->size *= u_minify(image->info.depth, level);
-	} else {
-		pLayout->offset = surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
-		pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
-		pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
-		pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
-		pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
-		if (image->type == VK_IMAGE_TYPE_3D)
-			pLayout->size *= u_minify(image->info.depth, level);
-	}
+   RADV_FROM_HANDLE(radv_image, image, _image);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   int level = pSubresource->mipLevel;
+   int layer = pSubresource->arrayLayer;
+
+   unsigned plane_id = 0;
+   if (vk_format_get_plane_count(image->vk_format) > 1)
+      plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
+
+   struct radv_image_plane *plane = &image->planes[plane_id];
+   struct radeon_surf *surface = &plane->surface;
+
+   if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+      unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
+
+      assert(level == 0);
+      assert(layer == 0);
+
+      pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
+                                                    surface, mem_plane_id, 0);
+      pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class,
+                                                      surface, mem_plane_id);
+      pLayout->arrayPitch = 0;
+      pLayout->depthPitch = 0;
+      pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
+   } else if (device->physical_device->rad_info.chip_class >= GFX9) {
+      uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
+
+      pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
+                                                    &plane->surface, 0, layer) +
+                        level_offset;
+      if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+          image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+          image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
+         /* Adjust the number of bytes between each row because
+          * the pitch is actually the number of components per
+          * row.
+          */
+         pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
+      } else {
+         uint32_t pitch =
+            surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
+
+         assert(util_is_power_of_two_nonzero(surface->bpe));
+         pLayout->rowPitch = pitch * surface->bpe;
+      }
+
+      pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
+      pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
+      pLayout->size = surface->u.gfx9.surf_slice_size;
+      if (image->type == VK_IMAGE_TYPE_3D)
+         pLayout->size *= u_minify(image->info.depth, level);
+   } else {
+      pLayout->offset = surface->u.legacy.level[level].offset +
+                        (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
+      pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
+      pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
+      pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
+      pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
+      if (image->type == VK_IMAGE_TYPE_3D)
+         pLayout->size *= u_minify(image->info.depth, level);
+   }
 }
 
-VkResult radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device,
-                                                     VkImage  _image,
-                                                     VkImageDrmFormatModifierPropertiesEXT* pProperties)
+VkResult
+radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
+                                            VkImageDrmFormatModifierPropertiesEXT *pProperties)
 {
-	RADV_FROM_HANDLE(radv_image, image, _image);
+   RADV_FROM_HANDLE(radv_image, image, _image);
 
-	pProperties->drmFormatModifier = image->planes[0].surface.modifier;
-	return VK_SUCCESS;
+   pProperties->drmFormatModifier = image->planes[0].surface.modifier;
+   return VK_SUCCESS;
 }
 
-
 VkResult
-radv_CreateImageView(VkDevice _device,
-		     const VkImageViewCreateInfo *pCreateInfo,
-		     const VkAllocationCallbacks *pAllocator,
-		     VkImageView *pView)
+radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
+                     const VkAllocationCallbacks *pAllocator, VkImageView *pView)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_image_view *view;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_image_view *view;
 
-	view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
-			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (view == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   view =
+      vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (view == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	vk_object_base_init(&device->vk, &view->base,
-			    VK_OBJECT_TYPE_IMAGE_VIEW);
+   vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_IMAGE_VIEW);
 
-	radv_image_view_init(view, device, pCreateInfo, NULL);
+   radv_image_view_init(view, device, pCreateInfo, NULL);
 
-	*pView = radv_image_view_to_handle(view);
+   *pView = radv_image_view_to_handle(view);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
 void
-radv_DestroyImageView(VkDevice _device, VkImageView _iview,
-		      const VkAllocationCallbacks *pAllocator)
+radv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_image_view, iview, _iview);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_image_view, iview, _iview);
 
-	if (!iview)
-		return;
+   if (!iview)
+      return;
 
-	vk_object_base_finish(&iview->base);
-	vk_free2(&device->vk.alloc, pAllocator, iview);
+   vk_object_base_finish(&iview->base);
+   vk_free2(&device->vk.alloc, pAllocator, iview);
 }
 
-void radv_buffer_view_init(struct radv_buffer_view *view,
-			   struct radv_device *device,
-			   const VkBufferViewCreateInfo* pCreateInfo)
+void
+radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
+                      const VkBufferViewCreateInfo *pCreateInfo)
 {
-	RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
+   RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
 
-	view->bo = buffer->bo;
-	view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
-		buffer->size - pCreateInfo->offset : pCreateInfo->range;
-	view->vk_format = pCreateInfo->format;
+   view->bo = buffer->bo;
+   view->range =
+      pCreateInfo->range == VK_WHOLE_SIZE ? buffer->size - pCreateInfo->offset : pCreateInfo->range;
+   view->vk_format = pCreateInfo->format;
 
-	radv_make_buffer_descriptor(device, buffer, view->vk_format,
-				    pCreateInfo->offset, view->range, view->state);
+   radv_make_buffer_descriptor(device, buffer, view->vk_format, pCreateInfo->offset, view->range,
+                               view->state);
 }
 
 VkResult
-radv_CreateBufferView(VkDevice _device,
-		      const VkBufferViewCreateInfo *pCreateInfo,
-		      const VkAllocationCallbacks *pAllocator,
-		      VkBufferView *pView)
+radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
+                      const VkAllocationCallbacks *pAllocator, VkBufferView *pView)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_buffer_view *view;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_buffer_view *view;
 
-	view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
-			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (!view)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   view =
+      vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!view)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	vk_object_base_init(&device->vk, &view->base,
-			    VK_OBJECT_TYPE_BUFFER_VIEW);
+   vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW);
 
-	radv_buffer_view_init(view, device, pCreateInfo);
+   radv_buffer_view_init(view, device, pCreateInfo);
 
-	*pView = radv_buffer_view_to_handle(view);
+   *pView = radv_buffer_view_to_handle(view);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
 void
 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
-		       const VkAllocationCallbacks *pAllocator)
+                       const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
 
-	if (!view)
-		return;
+   if (!view)
+      return;
 
-	vk_object_base_finish(&view->base);
-	vk_free2(&device->vk.alloc, pAllocator, view);
+   vk_object_base_finish(&view->base);
+   vk_free2(&device->vk.alloc, pAllocator, view);
 }
diff --git a/src/amd/vulkan/radv_llvm_helper.cpp b/src/amd/vulkan/radv_llvm_helper.cpp
index 612548e4219..f5eed3545e4 100644
--- a/src/amd/vulkan/radv_llvm_helper.cpp
+++ b/src/amd/vulkan/radv_llvm_helper.cpp
@@ -20,131 +20,125 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
-#include "ac_llvm_util.h"
 #include "ac_llvm_build.h"
+#include "ac_llvm_util.h"
 #include "radv_shader_helper.h"
 
 #include <list>
 class radv_llvm_per_thread_info {
-public:
-	radv_llvm_per_thread_info(enum radeon_family arg_family,
-				enum ac_target_machine_options arg_tm_options,
-				unsigned arg_wave_size)
-		: family(arg_family), tm_options(arg_tm_options),
-		  wave_size(arg_wave_size), passes(NULL), passes_wave32(NULL) {}
-
-	~radv_llvm_per_thread_info()
-	{
-		ac_destroy_llvm_compiler(&llvm_info);
-	}
-
-	bool init(void)
-	{
-		if (!ac_init_llvm_compiler(&llvm_info,
-					  family,
-					  tm_options))
-			return false;
-
-		passes = ac_create_llvm_passes(llvm_info.tm);
-		if (!passes)
-			return false;
-
-		if (llvm_info.tm_wave32) {
-			passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32);
-			if (!passes_wave32)
-				return false;
-		}
-
-		return true;
-	}
-
-	bool compile_to_memory_buffer(LLVMModuleRef module,
-				      char **pelf_buffer, size_t *pelf_size)
-	{
-		struct ac_compiler_passes *p = wave_size == 32 ? passes_wave32 : passes;
-		return ac_compile_module_to_elf(p, module, pelf_buffer, pelf_size);
-	}
-
-	bool is_same(enum radeon_family arg_family,
-		     enum ac_target_machine_options arg_tm_options,
-		     unsigned arg_wave_size) {
-		if (arg_family == family &&
-		    arg_tm_options == tm_options &&
-		    arg_wave_size == wave_size)
-			return true;
-		return false;
-	}
-	struct ac_llvm_compiler llvm_info;
-private:
-	enum radeon_family family;
-	enum ac_target_machine_options tm_options;
-	unsigned wave_size;
-	struct ac_compiler_passes *passes;
-	struct ac_compiler_passes *passes_wave32;
+ public:
+   radv_llvm_per_thread_info(enum radeon_family arg_family,
+                             enum ac_target_machine_options arg_tm_options, unsigned arg_wave_size)
+       : family(arg_family), tm_options(arg_tm_options), wave_size(arg_wave_size), passes(NULL),
+         passes_wave32(NULL)
+   {
+   }
+
+   ~radv_llvm_per_thread_info()
+   {
+      ac_destroy_llvm_compiler(&llvm_info);
+   }
+
+   bool init(void)
+   {
+      if (!ac_init_llvm_compiler(&llvm_info, family, tm_options))
+         return false;
+
+      passes = ac_create_llvm_passes(llvm_info.tm);
+      if (!passes)
+         return false;
+
+      if (llvm_info.tm_wave32) {
+         passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32);
+         if (!passes_wave32)
+            return false;
+      }
+
+      return true;
+   }
+
+   bool compile_to_memory_buffer(LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size)
+   {
+      struct ac_compiler_passes *p = wave_size == 32 ? passes_wave32 : passes;
+      return ac_compile_module_to_elf(p, module, pelf_buffer, pelf_size);
+   }
+
+   bool is_same(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options,
+                unsigned arg_wave_size)
+   {
+      if (arg_family == family && arg_tm_options == tm_options && arg_wave_size == wave_size)
+         return true;
+      return false;
+   }
+   struct ac_llvm_compiler llvm_info;
+
+ private:
+   enum radeon_family family;
+   enum ac_target_machine_options tm_options;
+   unsigned wave_size;
+   struct ac_compiler_passes *passes;
+   struct ac_compiler_passes *passes_wave32;
 };
 
 /* we have to store a linked list per thread due to the possiblity of multiple gpus being required */
 static thread_local std::list<radv_llvm_per_thread_info> radv_llvm_per_thread_list;
 
-bool radv_compile_to_elf(struct ac_llvm_compiler *info,
-			LLVMModuleRef module,
-			char **pelf_buffer, size_t *pelf_size)
+bool
+radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer,
+                    size_t *pelf_size)
 {
-	radv_llvm_per_thread_info *thread_info = nullptr;
-
-	for (auto &I : radv_llvm_per_thread_list) {
-		if (I.llvm_info.tm == info->tm) {
-			thread_info = &I;
-			break;
-		}
-	}
-
-	if (!thread_info) {
-		struct ac_compiler_passes *passes = ac_create_llvm_passes(info->tm);
-		bool ret = ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size);
-		ac_destroy_llvm_passes(passes);
-		return ret;
-	}
-
-	return thread_info->compile_to_memory_buffer(module, pelf_buffer, pelf_size);
+   radv_llvm_per_thread_info *thread_info = nullptr;
+
+   for (auto &I : radv_llvm_per_thread_list) {
+      if (I.llvm_info.tm == info->tm) {
+         thread_info = &I;
+         break;
+      }
+   }
+
+   if (!thread_info) {
+      struct ac_compiler_passes *passes = ac_create_llvm_passes(info->tm);
+      bool ret = ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size);
+      ac_destroy_llvm_passes(passes);
+      return ret;
+   }
+
+   return thread_info->compile_to_memory_buffer(module, pelf_buffer, pelf_size);
 }
 
-bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
-			     bool thread_compiler,
-			     enum radeon_family family,
-			     enum ac_target_machine_options tm_options,
-			     unsigned wave_size)
+bool
+radv_init_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler,
+                        enum radeon_family family, enum ac_target_machine_options tm_options,
+                        unsigned wave_size)
 {
-	if (thread_compiler) {
-		for (auto &I : radv_llvm_per_thread_list) {
-			if (I.is_same(family, tm_options, wave_size)) {
-				*info = I.llvm_info;
-				return true;
-			}
-		}
-
-		radv_llvm_per_thread_list.emplace_back(family, tm_options, wave_size);
-		radv_llvm_per_thread_info &tinfo = radv_llvm_per_thread_list.back();
-
-		if (!tinfo.init()) {
-			radv_llvm_per_thread_list.pop_back();
-			return false;
-		}
-
-		*info = tinfo.llvm_info;
-		return true;
-	}
-
-	if (!ac_init_llvm_compiler(info,
-				   family,
-				   tm_options))
-		return false;
-	return true;
+   if (thread_compiler) {
+      for (auto &I : radv_llvm_per_thread_list) {
+         if (I.is_same(family, tm_options, wave_size)) {
+            *info = I.llvm_info;
+            return true;
+         }
+      }
+
+      radv_llvm_per_thread_list.emplace_back(family, tm_options, wave_size);
+      radv_llvm_per_thread_info &tinfo = radv_llvm_per_thread_list.back();
+
+      if (!tinfo.init()) {
+         radv_llvm_per_thread_list.pop_back();
+         return false;
+      }
+
+      *info = tinfo.llvm_info;
+      return true;
+   }
+
+   if (!ac_init_llvm_compiler(info, family, tm_options))
+      return false;
+   return true;
 }
 
-void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info,
-				bool thread_compiler)
+void
+radv_destroy_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler)
 {
-	if (!thread_compiler)
-		ac_destroy_llvm_compiler(info);
+   if (!thread_compiler)
+      ac_destroy_llvm_compiler(info);
 }
diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
index a09bfa1755c..a38c7911601 100644
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -35,213 +35,205 @@
 #include <sys/stat.h>
 
 void
-radv_meta_save(struct radv_meta_saved_state *state,
-	       struct radv_cmd_buffer *cmd_buffer, uint32_t flags)
+radv_meta_save(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer,
+               uint32_t flags)
 {
-	VkPipelineBindPoint bind_point =
-		flags & RADV_META_SAVE_GRAPHICS_PIPELINE ?
-			VK_PIPELINE_BIND_POINT_GRAPHICS :
-			VK_PIPELINE_BIND_POINT_COMPUTE;
-	struct radv_descriptor_state *descriptors_state =
-		radv_get_descriptors_state(cmd_buffer, bind_point);
-
-	assert(flags & (RADV_META_SAVE_GRAPHICS_PIPELINE |
-			RADV_META_SAVE_COMPUTE_PIPELINE));
-
-	state->flags = flags;
-
-	if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
-		assert(!(state->flags & RADV_META_SAVE_COMPUTE_PIPELINE));
-
-		state->old_pipeline = cmd_buffer->state.pipeline;
-
-		/* Save all viewports. */
-		state->viewport.count = cmd_buffer->state.dynamic.viewport.count;
-		typed_memcpy(state->viewport.viewports,
-			     cmd_buffer->state.dynamic.viewport.viewports,
-			     MAX_VIEWPORTS);
-
-		/* Save all scissors. */
-		state->scissor.count = cmd_buffer->state.dynamic.scissor.count;
-		typed_memcpy(state->scissor.scissors,
-			     cmd_buffer->state.dynamic.scissor.scissors,
-			     MAX_SCISSORS);
-
-		state->cull_mode = cmd_buffer->state.dynamic.cull_mode;
-		state->front_face = cmd_buffer->state.dynamic.front_face;
-
-		state->primitive_topology = cmd_buffer->state.dynamic.primitive_topology;
-
-		state->depth_test_enable = cmd_buffer->state.dynamic.depth_test_enable;
-		state->depth_write_enable = cmd_buffer->state.dynamic.depth_write_enable;
-		state->depth_compare_op = cmd_buffer->state.dynamic.depth_compare_op;
-		state->depth_bounds_test_enable = cmd_buffer->state.dynamic.depth_bounds_test_enable;
-		state->stencil_test_enable = cmd_buffer->state.dynamic.stencil_test_enable;
-
-		state->stencil_op.front.compare_op = cmd_buffer->state.dynamic.stencil_op.front.compare_op;
-		state->stencil_op.front.fail_op = cmd_buffer->state.dynamic.stencil_op.front.fail_op;
-		state->stencil_op.front.pass_op = cmd_buffer->state.dynamic.stencil_op.front.pass_op;
-		state->stencil_op.front.depth_fail_op = cmd_buffer->state.dynamic.stencil_op.front.depth_fail_op;
-
-		state->stencil_op.back.compare_op = cmd_buffer->state.dynamic.stencil_op.back.compare_op;
-		state->stencil_op.back.fail_op = cmd_buffer->state.dynamic.stencil_op.back.fail_op;
-		state->stencil_op.back.pass_op = cmd_buffer->state.dynamic.stencil_op.back.pass_op;
-		state->stencil_op.back.depth_fail_op = cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op;
-
-		state->fragment_shading_rate.size = cmd_buffer->state.dynamic.fragment_shading_rate.size;
-		state->fragment_shading_rate.combiner_ops[0] = cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0];
-		state->fragment_shading_rate.combiner_ops[1] = cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1];
-	}
-
-	if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) {
-		typed_memcpy(&state->sample_location,
-			     &cmd_buffer->state.dynamic.sample_location, 1);
-	}
-
-	if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
-		assert(!(state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE));
-
-		state->old_pipeline = cmd_buffer->state.compute_pipeline;
-	}
-
-	if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
-		state->old_descriptor_set0 = descriptors_state->sets[0];
-		if (!(descriptors_state->valid & 1) || !state->old_descriptor_set0)
-			state->flags &= ~RADV_META_SAVE_DESCRIPTORS;
-	}
-
-	if (state->flags & RADV_META_SAVE_CONSTANTS) {
-		memcpy(state->push_constants, cmd_buffer->push_constants,
-		       MAX_PUSH_CONSTANTS_SIZE);
-	}
-
-	if (state->flags & RADV_META_SAVE_PASS) {
-		state->pass = cmd_buffer->state.pass;
-		state->subpass = cmd_buffer->state.subpass;
-		state->framebuffer = cmd_buffer->state.framebuffer;
-		state->attachments = cmd_buffer->state.attachments;
-		state->render_area = cmd_buffer->state.render_area;
-	}
+   VkPipelineBindPoint bind_point = flags & RADV_META_SAVE_GRAPHICS_PIPELINE
+                                       ? VK_PIPELINE_BIND_POINT_GRAPHICS
+                                       : VK_PIPELINE_BIND_POINT_COMPUTE;
+   struct radv_descriptor_state *descriptors_state =
+      radv_get_descriptors_state(cmd_buffer, bind_point);
+
+   assert(flags & (RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_COMPUTE_PIPELINE));
+
+   state->flags = flags;
+
+   if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
+      assert(!(state->flags & RADV_META_SAVE_COMPUTE_PIPELINE));
+
+      state->old_pipeline = cmd_buffer->state.pipeline;
+
+      /* Save all viewports. */
+      state->viewport.count = cmd_buffer->state.dynamic.viewport.count;
+      typed_memcpy(state->viewport.viewports, cmd_buffer->state.dynamic.viewport.viewports,
+                   MAX_VIEWPORTS);
+
+      /* Save all scissors. */
+      state->scissor.count = cmd_buffer->state.dynamic.scissor.count;
+      typed_memcpy(state->scissor.scissors, cmd_buffer->state.dynamic.scissor.scissors,
+                   MAX_SCISSORS);
+
+      state->cull_mode = cmd_buffer->state.dynamic.cull_mode;
+      state->front_face = cmd_buffer->state.dynamic.front_face;
+
+      state->primitive_topology = cmd_buffer->state.dynamic.primitive_topology;
+
+      state->depth_test_enable = cmd_buffer->state.dynamic.depth_test_enable;
+      state->depth_write_enable = cmd_buffer->state.dynamic.depth_write_enable;
+      state->depth_compare_op = cmd_buffer->state.dynamic.depth_compare_op;
+      state->depth_bounds_test_enable = cmd_buffer->state.dynamic.depth_bounds_test_enable;
+      state->stencil_test_enable = cmd_buffer->state.dynamic.stencil_test_enable;
+
+      state->stencil_op.front.compare_op = cmd_buffer->state.dynamic.stencil_op.front.compare_op;
+      state->stencil_op.front.fail_op = cmd_buffer->state.dynamic.stencil_op.front.fail_op;
+      state->stencil_op.front.pass_op = cmd_buffer->state.dynamic.stencil_op.front.pass_op;
+      state->stencil_op.front.depth_fail_op =
+         cmd_buffer->state.dynamic.stencil_op.front.depth_fail_op;
+
+      state->stencil_op.back.compare_op = cmd_buffer->state.dynamic.stencil_op.back.compare_op;
+      state->stencil_op.back.fail_op = cmd_buffer->state.dynamic.stencil_op.back.fail_op;
+      state->stencil_op.back.pass_op = cmd_buffer->state.dynamic.stencil_op.back.pass_op;
+      state->stencil_op.back.depth_fail_op =
+         cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op;
+
+      state->fragment_shading_rate.size = cmd_buffer->state.dynamic.fragment_shading_rate.size;
+      state->fragment_shading_rate.combiner_ops[0] =
+         cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0];
+      state->fragment_shading_rate.combiner_ops[1] =
+         cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1];
+   }
+
+   if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) {
+      typed_memcpy(&state->sample_location, &cmd_buffer->state.dynamic.sample_location, 1);
+   }
+
+   if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
+      assert(!(state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE));
+
+      state->old_pipeline = cmd_buffer->state.compute_pipeline;
+   }
+
+   if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
+      state->old_descriptor_set0 = descriptors_state->sets[0];
+      if (!(descriptors_state->valid & 1) || !state->old_descriptor_set0)
+         state->flags &= ~RADV_META_SAVE_DESCRIPTORS;
+   }
+
+   if (state->flags & RADV_META_SAVE_CONSTANTS) {
+      memcpy(state->push_constants, cmd_buffer->push_constants, MAX_PUSH_CONSTANTS_SIZE);
+   }
+
+   if (state->flags & RADV_META_SAVE_PASS) {
+      state->pass = cmd_buffer->state.pass;
+      state->subpass = cmd_buffer->state.subpass;
+      state->framebuffer = cmd_buffer->state.framebuffer;
+      state->attachments = cmd_buffer->state.attachments;
+      state->render_area = cmd_buffer->state.render_area;
+   }
 }
 
 void
-radv_meta_restore(const struct radv_meta_saved_state *state,
-		  struct radv_cmd_buffer *cmd_buffer)
+radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer)
 {
-	VkPipelineBindPoint bind_point =
-		state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE ?
-			VK_PIPELINE_BIND_POINT_GRAPHICS :
-			VK_PIPELINE_BIND_POINT_COMPUTE;
-
-	if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
-		radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-				     VK_PIPELINE_BIND_POINT_GRAPHICS,
-				     radv_pipeline_to_handle(state->old_pipeline));
-
-		cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
-
-		/* Restore all viewports. */
-		cmd_buffer->state.dynamic.viewport.count = state->viewport.count;
-		typed_memcpy(cmd_buffer->state.dynamic.viewport.viewports,
-			     state->viewport.viewports,
-			     MAX_VIEWPORTS);
-
-		/* Restore all scissors. */
-		cmd_buffer->state.dynamic.scissor.count = state->scissor.count;
-		typed_memcpy(cmd_buffer->state.dynamic.scissor.scissors,
-			     state->scissor.scissors,
-			     MAX_SCISSORS);
-
-		cmd_buffer->state.dynamic.cull_mode = state->cull_mode;
-		cmd_buffer->state.dynamic.front_face = state->front_face;
-
-		cmd_buffer->state.dynamic.primitive_topology = state->primitive_topology;
-
-		cmd_buffer->state.dynamic.depth_test_enable = state->depth_test_enable;
-		cmd_buffer->state.dynamic.depth_write_enable = state->depth_write_enable;
-		cmd_buffer->state.dynamic.depth_compare_op = state->depth_compare_op;
-		cmd_buffer->state.dynamic.depth_bounds_test_enable = state->depth_bounds_test_enable;
-		cmd_buffer->state.dynamic.stencil_test_enable = state->stencil_test_enable;
-
-		cmd_buffer->state.dynamic.stencil_op.front.compare_op = state->stencil_op.front.compare_op;
-		cmd_buffer->state.dynamic.stencil_op.front.fail_op = state->stencil_op.front.fail_op;
-		cmd_buffer->state.dynamic.stencil_op.front.pass_op = state->stencil_op.front.pass_op;
-		cmd_buffer->state.dynamic.stencil_op.front.depth_fail_op = state->stencil_op.front.depth_fail_op;
-
-		cmd_buffer->state.dynamic.stencil_op.back.compare_op = state->stencil_op.back.compare_op;
-		cmd_buffer->state.dynamic.stencil_op.back.fail_op = state->stencil_op.back.fail_op;
-		cmd_buffer->state.dynamic.stencil_op.back.pass_op = state->stencil_op.back.pass_op;
-		cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op = state->stencil_op.back.depth_fail_op;
-
-		cmd_buffer->state.dynamic.fragment_shading_rate.size = state->fragment_shading_rate.size;
-		cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0] = state->fragment_shading_rate.combiner_ops[0];
-		cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1] = state->fragment_shading_rate.combiner_ops[1];
-
-		cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT |
-					   RADV_CMD_DIRTY_DYNAMIC_SCISSOR |
-					   RADV_CMD_DIRTY_DYNAMIC_CULL_MODE |
-					   RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
-					   RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY |
-					   RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |
-					   RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
-					   RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
-					   RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
-					   RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |
-					   RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP |
-					   RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
-	}
-
-	if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) {
-		typed_memcpy(&cmd_buffer->state.dynamic.sample_location.locations,
-			     &state->sample_location.locations, 1);
-
-		cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
-	}
-
-	if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
-		radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-				     VK_PIPELINE_BIND_POINT_COMPUTE,
-				     radv_pipeline_to_handle(state->old_pipeline));
-	}
-
-	if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
-		radv_set_descriptor_set(cmd_buffer, bind_point,
-					state->old_descriptor_set0, 0);
-	}
-
-	if (state->flags & RADV_META_SAVE_CONSTANTS) {
-		VkShaderStageFlags stages = VK_SHADER_STAGE_COMPUTE_BIT;
-
-		if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE)
-			stages |= VK_SHADER_STAGE_ALL_GRAPHICS;
-
-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      VK_NULL_HANDLE, stages, 0,
-				      MAX_PUSH_CONSTANTS_SIZE,
-				      state->push_constants);
-	}
-
-	if (state->flags & RADV_META_SAVE_PASS) {
-		cmd_buffer->state.pass = state->pass;
-		cmd_buffer->state.subpass = state->subpass;
-		cmd_buffer->state.framebuffer = state->framebuffer;
-		cmd_buffer->state.attachments = state->attachments;
-		cmd_buffer->state.render_area = state->render_area;
-		if (state->subpass)
-			cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
-	}
+   VkPipelineBindPoint bind_point = state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE
+                                       ? VK_PIPELINE_BIND_POINT_GRAPHICS
+                                       : VK_PIPELINE_BIND_POINT_COMPUTE;
+
+   if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
+      radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+                           radv_pipeline_to_handle(state->old_pipeline));
+
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
+
+      /* Restore all viewports. */
+      cmd_buffer->state.dynamic.viewport.count = state->viewport.count;
+      typed_memcpy(cmd_buffer->state.dynamic.viewport.viewports, state->viewport.viewports,
+                   MAX_VIEWPORTS);
+
+      /* Restore all scissors. */
+      cmd_buffer->state.dynamic.scissor.count = state->scissor.count;
+      typed_memcpy(cmd_buffer->state.dynamic.scissor.scissors, state->scissor.scissors,
+                   MAX_SCISSORS);
+
+      cmd_buffer->state.dynamic.cull_mode = state->cull_mode;
+      cmd_buffer->state.dynamic.front_face = state->front_face;
+
+      cmd_buffer->state.dynamic.primitive_topology = state->primitive_topology;
+
+      cmd_buffer->state.dynamic.depth_test_enable = state->depth_test_enable;
+      cmd_buffer->state.dynamic.depth_write_enable = state->depth_write_enable;
+      cmd_buffer->state.dynamic.depth_compare_op = state->depth_compare_op;
+      cmd_buffer->state.dynamic.depth_bounds_test_enable = state->depth_bounds_test_enable;
+      cmd_buffer->state.dynamic.stencil_test_enable = state->stencil_test_enable;
+
+      cmd_buffer->state.dynamic.stencil_op.front.compare_op = state->stencil_op.front.compare_op;
+      cmd_buffer->state.dynamic.stencil_op.front.fail_op = state->stencil_op.front.fail_op;
+      cmd_buffer->state.dynamic.stencil_op.front.pass_op = state->stencil_op.front.pass_op;
+      cmd_buffer->state.dynamic.stencil_op.front.depth_fail_op =
+         state->stencil_op.front.depth_fail_op;
+
+      cmd_buffer->state.dynamic.stencil_op.back.compare_op = state->stencil_op.back.compare_op;
+      cmd_buffer->state.dynamic.stencil_op.back.fail_op = state->stencil_op.back.fail_op;
+      cmd_buffer->state.dynamic.stencil_op.back.pass_op = state->stencil_op.back.pass_op;
+      cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op =
+         state->stencil_op.back.depth_fail_op;
+
+      cmd_buffer->state.dynamic.fragment_shading_rate.size = state->fragment_shading_rate.size;
+      cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0] =
+         state->fragment_shading_rate.combiner_ops[0];
+      cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1] =
+         state->fragment_shading_rate.combiner_ops[1];
+
+      cmd_buffer->state.dirty |=
+         RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | RADV_CMD_DIRTY_DYNAMIC_SCISSOR |
+         RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
+         RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |
+         RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
+         RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
+         RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP |
+         RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
+   }
+
+   if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) {
+      typed_memcpy(&cmd_buffer->state.dynamic.sample_location.locations,
+                   &state->sample_location.locations, 1);
+
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
+   }
+
+   if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
+      radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                           radv_pipeline_to_handle(state->old_pipeline));
+   }
+
+   if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
+      radv_set_descriptor_set(cmd_buffer, bind_point, state->old_descriptor_set0, 0);
+   }
+
+   if (state->flags & RADV_META_SAVE_CONSTANTS) {
+      VkShaderStageFlags stages = VK_SHADER_STAGE_COMPUTE_BIT;
+
+      if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE)
+         stages |= VK_SHADER_STAGE_ALL_GRAPHICS;
+
+      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), VK_NULL_HANDLE, stages, 0,
+                            MAX_PUSH_CONSTANTS_SIZE, state->push_constants);
+   }
+
+   if (state->flags & RADV_META_SAVE_PASS) {
+      cmd_buffer->state.pass = state->pass;
+      cmd_buffer->state.subpass = state->subpass;
+      cmd_buffer->state.framebuffer = state->framebuffer;
+      cmd_buffer->state.attachments = state->attachments;
+      cmd_buffer->state.render_area = state->render_area;
+      if (state->subpass)
+         cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
+   }
 }
 
 VkImageViewType
 radv_meta_get_view_type(const struct radv_image *image)
 {
-	switch (image->type) {
-	case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D;
-	case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D;
-	case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D;
-	default:
-		unreachable("bad VkImageViewType");
-	}
+   switch (image->type) {
+   case VK_IMAGE_TYPE_1D:
+      return VK_IMAGE_VIEW_TYPE_1D;
+   case VK_IMAGE_TYPE_2D:
+      return VK_IMAGE_VIEW_TYPE_2D;
+   case VK_IMAGE_TYPE_3D:
+      return VK_IMAGE_VIEW_TYPE_3D;
+   default:
+      unreachable("bad VkImageViewType");
+   }
 }
 
 /**
@@ -250,80 +242,76 @@ radv_meta_get_view_type(const struct radv_image *image)
  */
 uint32_t
 radv_meta_get_iview_layer(const struct radv_image *dest_image,
-			  const VkImageSubresourceLayers *dest_subresource,
-			  const VkOffset3D *dest_offset)
+                          const VkImageSubresourceLayers *dest_subresource,
+                          const VkOffset3D *dest_offset)
 {
-	switch (dest_image->type) {
-	case VK_IMAGE_TYPE_1D:
-	case VK_IMAGE_TYPE_2D:
-		return dest_subresource->baseArrayLayer;
-	case VK_IMAGE_TYPE_3D:
-		/* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
-		 * but meta does it anyway. When doing so, we translate the
-		 * destination's z offset into an array offset.
-		 */
-		return dest_offset->z;
-	default:
-		assert(!"bad VkImageType");
-		return 0;
-	}
+   switch (dest_image->type) {
+   case VK_IMAGE_TYPE_1D:
+   case VK_IMAGE_TYPE_2D:
+      return dest_subresource->baseArrayLayer;
+   case VK_IMAGE_TYPE_3D:
+      /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
+       * but meta does it anyway. When doing so, we translate the
+       * destination's z offset into an array offset.
+       */
+      return dest_offset->z;
+   default:
+      assert(!"bad VkImageType");
+      return 0;
+   }
 }
 
 static void *
-meta_alloc(void* _device, size_t size, size_t alignment,
-           VkSystemAllocationScope allocationScope)
+meta_alloc(void *_device, size_t size, size_t alignment, VkSystemAllocationScope allocationScope)
 {
-	struct radv_device *device = _device;
-	return device->vk.alloc.pfnAllocation(device->vk.alloc.pUserData, size, alignment,
-					   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   struct radv_device *device = _device;
+   return device->vk.alloc.pfnAllocation(device->vk.alloc.pUserData, size, alignment,
+                                         VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
 }
 
 static void *
-meta_realloc(void* _device, void *original, size_t size, size_t alignment,
+meta_realloc(void *_device, void *original, size_t size, size_t alignment,
              VkSystemAllocationScope allocationScope)
 {
-	struct radv_device *device = _device;
-	return device->vk.alloc.pfnReallocation(device->vk.alloc.pUserData, original,
-					     size, alignment,
-					     VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   struct radv_device *device = _device;
+   return device->vk.alloc.pfnReallocation(device->vk.alloc.pUserData, original, size, alignment,
+                                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
 }
 
 static void
-meta_free(void* _device, void *data)
+meta_free(void *_device, void *data)
 {
-	struct radv_device *device = _device;
-	device->vk.alloc.pfnFree(device->vk.alloc.pUserData, data);
+   struct radv_device *device = _device;
+   device->vk.alloc.pfnFree(device->vk.alloc.pUserData, data);
 }
 
 #ifndef _WIN32
 static bool
 radv_builtin_cache_path(char *path)
 {
-	char *xdg_cache_home = getenv("XDG_CACHE_HOME");
-	const char *suffix = "/radv_builtin_shaders";
-	const char *suffix2 = "/.cache/radv_builtin_shaders";
-	struct passwd pwd, *result;
-	char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices here. */
-	int ret;
-
-	if (xdg_cache_home) {
-		ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
-			       xdg_cache_home, suffix, sizeof(void *) * 8);
-		return ret > 0 && ret < PATH_MAX + 1;
-	}
-
-	getpwuid_r(getuid(), &pwd, path2, PATH_MAX - strlen(suffix2), &result);
-	if (!result)
-		return false;
-
-	strcpy(path, pwd.pw_dir);
-	strcat(path, "/.cache");
-	if (mkdir(path, 0755) && errno != EEXIST)
-		return false;
-
-	ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
-		       pwd.pw_dir, suffix2, sizeof(void *) * 8);
-	return ret > 0 && ret < PATH_MAX + 1;
+   char *xdg_cache_home = getenv("XDG_CACHE_HOME");
+   const char *suffix = "/radv_builtin_shaders";
+   const char *suffix2 = "/.cache/radv_builtin_shaders";
+   struct passwd pwd, *result;
+   char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices here. */
+   int ret;
+
+   if (xdg_cache_home) {
+      ret = snprintf(path, PATH_MAX + 1, "%s%s%zd", xdg_cache_home, suffix, sizeof(void *) * 8);
+      return ret > 0 && ret < PATH_MAX + 1;
+   }
+
+   getpwuid_r(getuid(), &pwd, path2, PATH_MAX - strlen(suffix2), &result);
+   if (!result)
+      return false;
+
+   strcpy(path, pwd.pw_dir);
+   strcat(path, "/.cache");
+   if (mkdir(path, 0755) && errno != EEXIST)
+      return false;
+
+   ret = snprintf(path, PATH_MAX + 1, "%s%s%zd", pwd.pw_dir, suffix2, sizeof(void *) * 8);
+   return ret > 0 && ret < PATH_MAX + 1;
 }
 #endif
 
@@ -331,32 +319,32 @@ static bool
 radv_load_meta_pipeline(struct radv_device *device)
 {
 #ifdef _WIN32
-	return false;
+   return false;
 #else
-	char path[PATH_MAX + 1];
-	struct stat st;
-	void *data = NULL;
-	bool ret = false;
-
-	if (!radv_builtin_cache_path(path))
-		return false;
-
-	int fd = open(path, O_RDONLY);
-	if (fd < 0)
-		return false;
-	if (fstat(fd, &st))
-		goto fail;
-	data = malloc(st.st_size);
-	if (!data)
-		goto fail;
-	if(read(fd, data, st.st_size) == -1)
-		goto fail;
-
-	ret = radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size);
+   char path[PATH_MAX + 1];
+   struct stat st;
+   void *data = NULL;
+   bool ret = false;
+
+   if (!radv_builtin_cache_path(path))
+      return false;
+
+   int fd = open(path, O_RDONLY);
+   if (fd < 0)
+      return false;
+   if (fstat(fd, &st))
+      goto fail;
+   data = malloc(st.st_size);
+   if (!data)
+      goto fail;
+   if (read(fd, data, st.st_size) == -1)
+      goto fail;
+
+   ret = radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size);
 fail:
-	free(data);
-	close(fd);
-	return ret;
+   free(data);
+   close(fd);
+   return ret;
 #endif
 }
 
@@ -364,320 +352,315 @@ static void
 radv_store_meta_pipeline(struct radv_device *device)
 {
 #ifndef _WIN32
-	char path[PATH_MAX + 1], path2[PATH_MAX + 7];
-	size_t size;
-	void *data = NULL;
-
-	if (!device->meta_state.cache.modified)
-		return;
-
-	if (radv_GetPipelineCacheData(radv_device_to_handle(device),
-				      radv_pipeline_cache_to_handle(&device->meta_state.cache),
-				      &size, NULL))
-		return;
-
-	if (!radv_builtin_cache_path(path))
-		return;
-
-	strcpy(path2, path);
-	strcat(path2, "XXXXXX");
-	int fd = mkstemp(path2);//open(path, O_WRONLY | O_CREAT, 0600);
-	if (fd < 0)
-		return;
-	data = malloc(size);
-	if (!data)
-		goto fail;
-
-	if (radv_GetPipelineCacheData(radv_device_to_handle(device),
-				      radv_pipeline_cache_to_handle(&device->meta_state.cache),
-				      &size, data))
-		goto fail;
-	if(write(fd, data, size) == -1)
-		goto fail;
-
-	rename(path2, path);
+   char path[PATH_MAX + 1], path2[PATH_MAX + 7];
+   size_t size;
+   void *data = NULL;
+
+   if (!device->meta_state.cache.modified)
+      return;
+
+   if (radv_GetPipelineCacheData(radv_device_to_handle(device),
+                                 radv_pipeline_cache_to_handle(&device->meta_state.cache), &size,
+                                 NULL))
+      return;
+
+   if (!radv_builtin_cache_path(path))
+      return;
+
+   strcpy(path2, path);
+   strcat(path2, "XXXXXX");
+   int fd = mkstemp(path2); // open(path, O_WRONLY | O_CREAT, 0600);
+   if (fd < 0)
+      return;
+   data = malloc(size);
+   if (!data)
+      goto fail;
+
+   if (radv_GetPipelineCacheData(radv_device_to_handle(device),
+                                 radv_pipeline_cache_to_handle(&device->meta_state.cache), &size,
+                                 data))
+      goto fail;
+   if (write(fd, data, size) == -1)
+      goto fail;
+
+   rename(path2, path);
 fail:
-	free(data);
-	close(fd);
-	unlink(path2);
+   free(data);
+   close(fd);
+   unlink(path2);
 #endif
 }
 
 VkResult
 radv_device_init_meta(struct radv_device *device)
 {
-	VkResult result;
+   VkResult result;
 
-	memset(&device->meta_state, 0, sizeof(device->meta_state));
+   memset(&device->meta_state, 0, sizeof(device->meta_state));
 
-	device->meta_state.alloc = (VkAllocationCallbacks) {
-		.pUserData = device,
-		.pfnAllocation = meta_alloc,
-		.pfnReallocation = meta_realloc,
-		.pfnFree = meta_free,
-	};
+   device->meta_state.alloc = (VkAllocationCallbacks){
+      .pUserData = device,
+      .pfnAllocation = meta_alloc,
+      .pfnReallocation = meta_realloc,
+      .pfnFree = meta_free,
+   };
 
-	device->meta_state.cache.alloc = device->meta_state.alloc;
-	radv_pipeline_cache_init(&device->meta_state.cache, device);
-	bool loaded_cache = radv_load_meta_pipeline(device);
-	bool on_demand = !loaded_cache;
+   device->meta_state.cache.alloc = device->meta_state.alloc;
+   radv_pipeline_cache_init(&device->meta_state.cache, device);
+   bool loaded_cache = radv_load_meta_pipeline(device);
+   bool on_demand = !loaded_cache;
 
-	mtx_init(&device->meta_state.mtx, mtx_plain);
+   mtx_init(&device->meta_state.mtx, mtx_plain);
 
-	result = radv_device_init_meta_clear_state(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail_clear;
+   result = radv_device_init_meta_clear_state(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail_clear;
 
-	result = radv_device_init_meta_resolve_state(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail_resolve;
+   result = radv_device_init_meta_resolve_state(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail_resolve;
 
-	result = radv_device_init_meta_blit_state(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail_blit;
+   result = radv_device_init_meta_blit_state(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail_blit;
 
-	result = radv_device_init_meta_blit2d_state(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail_blit2d;
+   result = radv_device_init_meta_blit2d_state(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail_blit2d;
 
-	result = radv_device_init_meta_bufimage_state(device);
-	if (result != VK_SUCCESS)
-		goto fail_bufimage;
+   result = radv_device_init_meta_bufimage_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_bufimage;
 
-	result = radv_device_init_meta_depth_decomp_state(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail_depth_decomp;
+   result = radv_device_init_meta_depth_decomp_state(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail_depth_decomp;
 
-	result = radv_device_init_meta_buffer_state(device);
-	if (result != VK_SUCCESS)
-		goto fail_buffer;
+   result = radv_device_init_meta_buffer_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_buffer;
 
-	result = radv_device_init_meta_query_state(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail_query;
+   result = radv_device_init_meta_query_state(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail_query;
 
-	result = radv_device_init_meta_fast_clear_flush_state(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail_fast_clear;
+   result = radv_device_init_meta_fast_clear_flush_state(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail_fast_clear;
 
-	result = radv_device_init_meta_resolve_compute_state(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail_resolve_compute;
+   result = radv_device_init_meta_resolve_compute_state(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail_resolve_compute;
 
-	result = radv_device_init_meta_resolve_fragment_state(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail_resolve_fragment;
+   result = radv_device_init_meta_resolve_fragment_state(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail_resolve_fragment;
 
-	result = radv_device_init_meta_fmask_expand_state(device);
-	if (result != VK_SUCCESS)
-		goto fail_fmask_expand;
+   result = radv_device_init_meta_fmask_expand_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_fmask_expand;
 
-	if (!on_demand) {
-		result = radv_device_init_meta_dcc_retile_state(device);
-		if (result != VK_SUCCESS)
-			goto fail_dcc_retile;
-	}
+   if (!on_demand) {
+      result = radv_device_init_meta_dcc_retile_state(device);
+      if (result != VK_SUCCESS)
+         goto fail_dcc_retile;
+   }
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 
 fail_dcc_retile:
-	radv_device_finish_meta_fmask_expand_state(device);
+   radv_device_finish_meta_fmask_expand_state(device);
 fail_fmask_expand:
-	radv_device_finish_meta_resolve_fragment_state(device);
+   radv_device_finish_meta_resolve_fragment_state(device);
 fail_resolve_fragment:
-	radv_device_finish_meta_resolve_compute_state(device);
+   radv_device_finish_meta_resolve_compute_state(device);
 fail_resolve_compute:
-	radv_device_finish_meta_fast_clear_flush_state(device);
+   radv_device_finish_meta_fast_clear_flush_state(device);
 fail_fast_clear:
-	radv_device_finish_meta_query_state(device);
+   radv_device_finish_meta_query_state(device);
 fail_query:
-	radv_device_finish_meta_buffer_state(device);
+   radv_device_finish_meta_buffer_state(device);
 fail_buffer:
-	radv_device_finish_meta_depth_decomp_state(device);
+   radv_device_finish_meta_depth_decomp_state(device);
 fail_depth_decomp:
-	radv_device_finish_meta_bufimage_state(device);
+   radv_device_finish_meta_bufimage_state(device);
 fail_bufimage:
-	radv_device_finish_meta_blit2d_state(device);
+   radv_device_finish_meta_blit2d_state(device);
 fail_blit2d:
-	radv_device_finish_meta_blit_state(device);
+   radv_device_finish_meta_blit_state(device);
 fail_blit:
-	radv_device_finish_meta_resolve_state(device);
+   radv_device_finish_meta_resolve_state(device);
 fail_resolve:
-	radv_device_finish_meta_clear_state(device);
+   radv_device_finish_meta_clear_state(device);
 fail_clear:
-	mtx_destroy(&device->meta_state.mtx);
-	radv_pipeline_cache_finish(&device->meta_state.cache);
-	return result;
+   mtx_destroy(&device->meta_state.mtx);
+   radv_pipeline_cache_finish(&device->meta_state.cache);
+   return result;
 }
 
 void
 radv_device_finish_meta(struct radv_device *device)
 {
-	radv_device_finish_meta_clear_state(device);
-	radv_device_finish_meta_resolve_state(device);
-	radv_device_finish_meta_blit_state(device);
-	radv_device_finish_meta_blit2d_state(device);
-	radv_device_finish_meta_bufimage_state(device);
-	radv_device_finish_meta_depth_decomp_state(device);
-	radv_device_finish_meta_query_state(device);
-	radv_device_finish_meta_buffer_state(device);
-	radv_device_finish_meta_fast_clear_flush_state(device);
-	radv_device_finish_meta_resolve_compute_state(device);
-	radv_device_finish_meta_resolve_fragment_state(device);
-	radv_device_finish_meta_fmask_expand_state(device);
-	radv_device_finish_meta_dcc_retile_state(device);
-
-	radv_store_meta_pipeline(device);
-	radv_pipeline_cache_finish(&device->meta_state.cache);
-	mtx_destroy(&device->meta_state.mtx);
+   radv_device_finish_meta_clear_state(device);
+   radv_device_finish_meta_resolve_state(device);
+   radv_device_finish_meta_blit_state(device);
+   radv_device_finish_meta_blit2d_state(device);
+   radv_device_finish_meta_bufimage_state(device);
+   radv_device_finish_meta_depth_decomp_state(device);
+   radv_device_finish_meta_query_state(device);
+   radv_device_finish_meta_buffer_state(device);
+   radv_device_finish_meta_fast_clear_flush_state(device);
+   radv_device_finish_meta_resolve_compute_state(device);
+   radv_device_finish_meta_resolve_fragment_state(device);
+   radv_device_finish_meta_fmask_expand_state(device);
+   radv_device_finish_meta_dcc_retile_state(device);
+
+   radv_store_meta_pipeline(device);
+   radv_pipeline_cache_finish(&device->meta_state.cache);
+   mtx_destroy(&device->meta_state.mtx);
 }
 
-nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2)
+nir_ssa_def *
+radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2)
 {
 
-	nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(vs_b);
+   nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(vs_b);
 
-	/* vertex 0 - -1.0, -1.0 */
-	/* vertex 1 - -1.0, 1.0 */
-	/* vertex 2 - 1.0, -1.0 */
-	/* so channel 0 is vertex_id != 2 ? -1.0 : 1.0
-	   channel 1 is vertex id != 1 ? -1.0 : 1.0 */
+   /* vertex 0 - -1.0, -1.0 */
+   /* vertex 1 - -1.0, 1.0 */
+   /* vertex 2 - 1.0, -1.0 */
+   /* so channel 0 is vertex_id != 2 ? -1.0 : 1.0
+      channel 1 is vertex id != 1 ? -1.0 : 1.0 */
 
-	nir_ssa_def *c0cmp = nir_ine(vs_b, vertex_id, nir_imm_int(vs_b, 2));
-	nir_ssa_def *c1cmp = nir_ine(vs_b, vertex_id, nir_imm_int(vs_b, 1));
+   nir_ssa_def *c0cmp = nir_ine(vs_b, vertex_id, nir_imm_int(vs_b, 2));
+   nir_ssa_def *c1cmp = nir_ine(vs_b, vertex_id, nir_imm_int(vs_b, 1));
 
-	nir_ssa_def *comp[4];
-	comp[0] = nir_bcsel(vs_b, c0cmp,
-			    nir_imm_float(vs_b, -1.0),
-			    nir_imm_float(vs_b, 1.0));
+   nir_ssa_def *comp[4];
+   comp[0] = nir_bcsel(vs_b, c0cmp, nir_imm_float(vs_b, -1.0), nir_imm_float(vs_b, 1.0));
 
-	comp[1] = nir_bcsel(vs_b, c1cmp,
-			    nir_imm_float(vs_b, -1.0),
-			    nir_imm_float(vs_b, 1.0));
-	comp[2] = comp2;
-	comp[3] = nir_imm_float(vs_b, 1.0);
-	nir_ssa_def *outvec = nir_vec(vs_b, comp, 4);
+   comp[1] = nir_bcsel(vs_b, c1cmp, nir_imm_float(vs_b, -1.0), nir_imm_float(vs_b, 1.0));
+   comp[2] = comp2;
+   comp[3] = nir_imm_float(vs_b, 1.0);
+   nir_ssa_def *outvec = nir_vec(vs_b, comp, 4);
 
-	return outvec;
+   return outvec;
 }
 
-nir_ssa_def *radv_meta_gen_rect_vertices(nir_builder *vs_b)
+nir_ssa_def *
+radv_meta_gen_rect_vertices(nir_builder *vs_b)
 {
-	return radv_meta_gen_rect_vertices_comp2(vs_b, nir_imm_float(vs_b, 0.0));
+   return radv_meta_gen_rect_vertices_comp2(vs_b, nir_imm_float(vs_b, 0.0));
 }
 
 /* vertex shader that generates vertices */
 nir_shader *
 radv_meta_build_nir_vs_generate_vertices(void)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *vec4 = glsl_vec4_type();
 
-	nir_variable *v_position;
+   nir_variable *v_position;
 
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_vs_gen_verts");
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_vs_gen_verts");
 
-	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+   nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
 
-	v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
-					 "gl_Position");
-	v_position->data.location = VARYING_SLOT_POS;
+   v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
+   v_position->data.location = VARYING_SLOT_POS;
 
-	nir_store_var(&b, v_position, outvec, 0xf);
+   nir_store_var(&b, v_position, outvec, 0xf);
 
-	return b.shader;
+   return b.shader;
 }
 
 nir_shader *
 radv_meta_build_nir_fs_noop(void)
 {
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_noop_fs");
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_noop_fs");
 
-	return b.shader;
+   return b.shader;
 }
 
-void radv_meta_build_resolve_shader_core(nir_builder *b,
-					 bool is_integer,
-					 int samples,
-					 nir_variable *input_img,
-					 nir_variable *color,
-					 nir_ssa_def *img_coord)
+void
+radv_meta_build_resolve_shader_core(nir_builder *b, bool is_integer, int samples,
+                                    nir_variable *input_img, nir_variable *color,
+                                    nir_ssa_def *img_coord)
 {
-	/* do a txf_ms on each sample */
-	nir_ssa_def *tmp;
-	bool inserted_if = false;
-
-	nir_ssa_def *input_img_deref = &nir_build_deref_var(b, input_img)->dest.ssa;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
-	tex->op = nir_texop_txf_ms;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(img_coord);
-	tex->src[1].src_type = nir_tex_src_ms_index;
-	tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
-	tex->src[2].src_type = nir_tex_src_texture_deref;
-	tex->src[2].src = nir_src_for_ssa(input_img_deref);
-	tex->dest_type = nir_type_float32;
-	tex->is_array = false;
-	tex->coord_components = 2;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(b, &tex->instr);
-
-	tmp = &tex->dest.ssa;
-
-	if (!is_integer && samples > 1) {
-		nir_tex_instr *tex_all_same = nir_tex_instr_create(b->shader, 2);
-		tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
-		tex_all_same->op = nir_texop_samples_identical;
-		tex_all_same->src[0].src_type = nir_tex_src_coord;
-		tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
-		tex_all_same->src[1].src_type = nir_tex_src_texture_deref;
-		tex_all_same->src[1].src = nir_src_for_ssa(input_img_deref);
-		tex_all_same->dest_type = nir_type_bool1;
-		tex_all_same->is_array = false;
-		tex_all_same->coord_components = 2;
-
-		nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 1, "tex");
-		nir_builder_instr_insert(b, &tex_all_same->instr);
-
-		nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_bool(b, false));
-		nir_push_if(b, all_same);
-		for (int i = 1; i < samples; i++) {
-			nir_tex_instr *tex_add = nir_tex_instr_create(b->shader, 3);
-			tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
-			tex_add->op = nir_texop_txf_ms;
-			tex_add->src[0].src_type = nir_tex_src_coord;
-			tex_add->src[0].src = nir_src_for_ssa(img_coord);
-			tex_add->src[1].src_type = nir_tex_src_ms_index;
-			tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(b, i));
-			tex_add->src[2].src_type = nir_tex_src_texture_deref;
-			tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
-			tex_add->dest_type = nir_type_float32;
-			tex_add->is_array = false;
-			tex_add->coord_components = 2;
-
-			nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
-			nir_builder_instr_insert(b, &tex_add->instr);
-
-			tmp = nir_fadd(b, tmp, &tex_add->dest.ssa);
-		}
-
-		tmp = nir_fdiv(b, tmp, nir_imm_float(b, samples));
-		nir_store_var(b, color, tmp, 0xf);
-		nir_push_else(b, NULL);
-		inserted_if = true;
-	}
-	nir_store_var(b, color, &tex->dest.ssa, 0xf);
-
-	if (inserted_if)
-		nir_pop_if(b, NULL);
+   /* do a txf_ms on each sample */
+   nir_ssa_def *tmp;
+   bool inserted_if = false;
+
+   nir_ssa_def *input_img_deref = &nir_build_deref_var(b, input_img)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
+   tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+   tex->op = nir_texop_txf_ms;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(img_coord);
+   tex->src[1].src_type = nir_tex_src_ms_index;
+   tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
+   tex->src[2].src_type = nir_tex_src_texture_deref;
+   tex->src[2].src = nir_src_for_ssa(input_img_deref);
+   tex->dest_type = nir_type_float32;
+   tex->is_array = false;
+   tex->coord_components = 2;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(b, &tex->instr);
+
+   tmp = &tex->dest.ssa;
+
+   if (!is_integer && samples > 1) {
+      nir_tex_instr *tex_all_same = nir_tex_instr_create(b->shader, 2);
+      tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
+      tex_all_same->op = nir_texop_samples_identical;
+      tex_all_same->src[0].src_type = nir_tex_src_coord;
+      tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
+      tex_all_same->src[1].src_type = nir_tex_src_texture_deref;
+      tex_all_same->src[1].src = nir_src_for_ssa(input_img_deref);
+      tex_all_same->dest_type = nir_type_bool1;
+      tex_all_same->is_array = false;
+      tex_all_same->coord_components = 2;
+
+      nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 1, "tex");
+      nir_builder_instr_insert(b, &tex_all_same->instr);
+
+      nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_bool(b, false));
+      nir_push_if(b, all_same);
+      for (int i = 1; i < samples; i++) {
+         nir_tex_instr *tex_add = nir_tex_instr_create(b->shader, 3);
+         tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+         tex_add->op = nir_texop_txf_ms;
+         tex_add->src[0].src_type = nir_tex_src_coord;
+         tex_add->src[0].src = nir_src_for_ssa(img_coord);
+         tex_add->src[1].src_type = nir_tex_src_ms_index;
+         tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(b, i));
+         tex_add->src[2].src_type = nir_tex_src_texture_deref;
+         tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+         tex_add->dest_type = nir_type_float32;
+         tex_add->is_array = false;
+         tex_add->coord_components = 2;
+
+         nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+         nir_builder_instr_insert(b, &tex_add->instr);
+
+         tmp = nir_fadd(b, tmp, &tex_add->dest.ssa);
+      }
+
+      tmp = nir_fdiv(b, tmp, nir_imm_float(b, samples));
+      nir_store_var(b, color, tmp, 0xf);
+      nir_push_else(b, NULL);
+      inserted_if = true;
+   }
+   nir_store_var(b, color, &tex->dest.ssa, 0xf);
+
+   if (inserted_if)
+      nir_pop_if(b, NULL);
 }
 
 nir_ssa_def *
 radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding)
 {
-	nir_ssa_def *rsrc = nir_vulkan_resource_index(
-		b, 2, 32, nir_imm_int(b, 0), .desc_set=desc_set, .binding=binding);
-	return nir_channel(b, rsrc, 0);
+   nir_ssa_def *rsrc = nir_vulkan_resource_index(b, 2, 32, nir_imm_int(b, 0), .desc_set = desc_set,
+                                                 .binding = binding);
+   return nir_channel(b, rsrc, 0);
 }
diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
index ce2c25f5850..f20e0b07a02 100644
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -34,62 +34,62 @@ extern "C" {
 #endif
 
 enum radv_meta_save_flags {
-	RADV_META_SAVE_PASS		 = (1 << 0),
-	RADV_META_SAVE_CONSTANTS         = (1 << 1),
-	RADV_META_SAVE_DESCRIPTORS       = (1 << 2),
-	RADV_META_SAVE_GRAPHICS_PIPELINE = (1 << 3),
-	RADV_META_SAVE_COMPUTE_PIPELINE  = (1 << 4),
-	RADV_META_SAVE_SAMPLE_LOCATIONS  = (1 << 5),
+   RADV_META_SAVE_PASS = (1 << 0),
+   RADV_META_SAVE_CONSTANTS = (1 << 1),
+   RADV_META_SAVE_DESCRIPTORS = (1 << 2),
+   RADV_META_SAVE_GRAPHICS_PIPELINE = (1 << 3),
+   RADV_META_SAVE_COMPUTE_PIPELINE = (1 << 4),
+   RADV_META_SAVE_SAMPLE_LOCATIONS = (1 << 5),
 };
 
 struct radv_meta_saved_state {
-	uint32_t flags;
-
-	struct radv_descriptor_set *old_descriptor_set0;
-	struct radv_pipeline *old_pipeline;
-	struct radv_viewport_state viewport;
-	struct radv_scissor_state scissor;
-	struct radv_sample_locations_state sample_location;
-
-	char push_constants[128];
-
-	struct radv_render_pass *pass;
-	const struct radv_subpass *subpass;
-	struct radv_attachment_state *attachments;
-	struct radv_framebuffer *framebuffer;
-	VkRect2D render_area;
-
-	VkCullModeFlags cull_mode;
-	VkFrontFace front_face;
-
-	unsigned primitive_topology;
-
-	bool depth_test_enable;
-	bool depth_write_enable;
-	unsigned depth_compare_op;
-	bool depth_bounds_test_enable;
-	bool stencil_test_enable;
-
-	struct {
-		struct {
-			VkStencilOp fail_op;
-			VkStencilOp pass_op;
-			VkStencilOp depth_fail_op;
-			VkCompareOp compare_op;
-		} front;
-
-		struct {
-			VkStencilOp fail_op;
-			VkStencilOp pass_op;
-			VkStencilOp depth_fail_op;
-			VkCompareOp compare_op;
-		} back;
-	} stencil_op;
-
-	struct {
-		VkExtent2D size;
-		VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
-	} fragment_shading_rate;
+   uint32_t flags;
+
+   struct radv_descriptor_set *old_descriptor_set0;
+   struct radv_pipeline *old_pipeline;
+   struct radv_viewport_state viewport;
+   struct radv_scissor_state scissor;
+   struct radv_sample_locations_state sample_location;
+
+   char push_constants[128];
+
+   struct radv_render_pass *pass;
+   const struct radv_subpass *subpass;
+   struct radv_attachment_state *attachments;
+   struct radv_framebuffer *framebuffer;
+   VkRect2D render_area;
+
+   VkCullModeFlags cull_mode;
+   VkFrontFace front_face;
+
+   unsigned primitive_topology;
+
+   bool depth_test_enable;
+   bool depth_write_enable;
+   unsigned depth_compare_op;
+   bool depth_bounds_test_enable;
+   bool stencil_test_enable;
+
+   struct {
+      struct {
+         VkStencilOp fail_op;
+         VkStencilOp pass_op;
+         VkStencilOp depth_fail_op;
+         VkCompareOp compare_op;
+      } front;
+
+      struct {
+         VkStencilOp fail_op;
+         VkStencilOp pass_op;
+         VkStencilOp depth_fail_op;
+         VkCompareOp compare_op;
+      } back;
+   } stencil_op;
+
+   struct {
+      VkExtent2D size;
+      VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
+   } fragment_shading_rate;
 };
 
 VkResult radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand);
@@ -128,135 +128,111 @@ void radv_device_finish_meta_fmask_expand_state(struct radv_device *device);
 VkResult radv_device_init_meta_dcc_retile_state(struct radv_device *device);
 void radv_device_finish_meta_dcc_retile_state(struct radv_device *device);
 
-void radv_meta_save(struct radv_meta_saved_state *saved_state,
-		    struct radv_cmd_buffer *cmd_buffer, uint32_t flags);
+void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer,
+                    uint32_t flags);
 
 void radv_meta_restore(const struct radv_meta_saved_state *state,
-		       struct radv_cmd_buffer *cmd_buffer);
+                       struct radv_cmd_buffer *cmd_buffer);
 
 VkImageViewType radv_meta_get_view_type(const struct radv_image *image);
 
 uint32_t radv_meta_get_iview_layer(const struct radv_image *dest_image,
-				   const VkImageSubresourceLayers *dest_subresource,
-				   const VkOffset3D *dest_offset);
+                                   const VkImageSubresourceLayers *dest_subresource,
+                                   const VkOffset3D *dest_offset);
 
 struct radv_meta_blit2d_surf {
-	/** The size of an element in bytes. */
-	uint8_t bs;
-	VkFormat format;
-
-	struct radv_image *image;
-	unsigned level;
-	unsigned layer;
-	VkImageAspectFlags aspect_mask;
-	VkImageLayout current_layout;
-	bool disable_compression;
+   /** The size of an element in bytes. */
+   uint8_t bs;
+   VkFormat format;
+
+   struct radv_image *image;
+   unsigned level;
+   unsigned layer;
+   VkImageAspectFlags aspect_mask;
+   VkImageLayout current_layout;
+   bool disable_compression;
 };
 
 struct radv_meta_blit2d_buffer {
-	struct radv_buffer *buffer;
-	uint32_t offset;
-	uint32_t pitch;
-	uint8_t bs;
-	VkFormat format;
+   struct radv_buffer *buffer;
+   uint32_t offset;
+   uint32_t pitch;
+   uint8_t bs;
+   VkFormat format;
 };
 
 struct radv_meta_blit2d_rect {
-	uint32_t src_x, src_y;
-	uint32_t dst_x, dst_y;
-	uint32_t width, height;
+   uint32_t src_x, src_y;
+   uint32_t dst_x, dst_y;
+   uint32_t width, height;
 };
 
-void radv_meta_begin_blit2d(struct radv_cmd_buffer *cmd_buffer,
-			    struct radv_meta_saved_state *save);
+void radv_meta_begin_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state *save);
 
-void radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer,
-		      struct radv_meta_blit2d_surf *src_img,
-		      struct radv_meta_blit2d_buffer *src_buf,
-		      struct radv_meta_blit2d_surf *dst,
-		      unsigned num_rects,
-		      struct radv_meta_blit2d_rect *rects);
-
-void radv_meta_end_blit2d(struct radv_cmd_buffer *cmd_buffer,
-			  struct radv_meta_saved_state *save);
+void radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
+                      struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst,
+                      unsigned num_rects, struct radv_meta_blit2d_rect *rects);
 
+void radv_meta_end_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state *save);
 
 VkResult radv_device_init_meta_bufimage_state(struct radv_device *device);
 void radv_device_finish_meta_bufimage_state(struct radv_device *device);
 void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
-			       struct radv_meta_blit2d_surf *src,
-			       struct radv_meta_blit2d_buffer *dst,
-			       unsigned num_rects,
-			       struct radv_meta_blit2d_rect *rects);
+                               struct radv_meta_blit2d_surf *src,
+                               struct radv_meta_blit2d_buffer *dst, unsigned num_rects,
+                               struct radv_meta_blit2d_rect *rects);
 
 void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
-				  struct radv_meta_blit2d_buffer *src,
-				  struct radv_meta_blit2d_surf *dst,
-				  unsigned num_rects,
-				  struct radv_meta_blit2d_rect *rects);
+                                  struct radv_meta_blit2d_buffer *src,
+                                  struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+                                  struct radv_meta_blit2d_rect *rects);
 void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
-				 struct radv_meta_blit2d_surf *src,
-				 struct radv_meta_blit2d_surf *dst,
-				 unsigned num_rects,
-				 struct radv_meta_blit2d_rect *rects);
-void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
-			      struct radv_meta_blit2d_surf *dst,
-			      const VkClearColorValue *clear_color);
-
-void radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
-				   struct radv_image *image,
-				   const VkImageSubresourceRange *subresourceRange,
-				   struct radv_sample_locations_state *sample_locs);
-void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
-				    struct radv_image *image,
-				    const VkImageSubresourceRange *subresourceRange,
-				    struct radv_sample_locations_state *sample_locs);
+                                 struct radv_meta_blit2d_surf *src,
+                                 struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+                                 struct radv_meta_blit2d_rect *rects);
+void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
+                              const VkClearColorValue *clear_color);
+
+void radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                                   const VkImageSubresourceRange *subresourceRange,
+                                   struct radv_sample_locations_state *sample_locs);
+void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                                    const VkImageSubresourceRange *subresourceRange,
+                                    struct radv_sample_locations_state *sample_locs);
 void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
-					 struct radv_image *image,
-					 const VkImageSubresourceRange *subresourceRange);
-void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
-			struct radv_image *image,
-                        const VkImageSubresourceRange *subresourceRange);
+                                         struct radv_image *image,
+                                         const VkImageSubresourceRange *subresourceRange);
+void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                         const VkImageSubresourceRange *subresourceRange);
 void radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image);
-void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
-				     struct radv_image *image,
-				     const VkImageSubresourceRange *subresourceRange);
+void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                                     const VkImageSubresourceRange *subresourceRange);
 
 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
-				     struct radv_image *src_image,
-				     VkFormat src_format,
-				     VkImageLayout src_image_layout,
-				     struct radv_image *dest_image,
-				     VkFormat dest_format,
-				     VkImageLayout dest_image_layout,
-				     const VkImageResolve2KHR *region);
+                                     struct radv_image *src_image, VkFormat src_format,
+                                     VkImageLayout src_image_layout, struct radv_image *dest_image,
+                                     VkFormat dest_format, VkImageLayout dest_image_layout,
+                                     const VkImageResolve2KHR *region);
 
 void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
-				      struct radv_image *src_image,
-				      VkImageLayout src_image_layout,
-				      struct radv_image *dest_image,
-				      VkImageLayout dest_image_layout,
-				     const VkImageResolve2KHR *region);
+                                      struct radv_image *src_image, VkImageLayout src_image_layout,
+                                      struct radv_image *dest_image,
+                                      VkImageLayout dest_image_layout,
+                                      const VkImageResolve2KHR *region);
 
 void radv_decompress_resolve_subpass_src(struct radv_cmd_buffer *cmd_buffer);
 
-void radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer,
-				 struct radv_image *src_image,
-				 VkImageLayout src_image_layout,
-				 const VkImageResolve2KHR *region);
-
-uint32_t radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
-			  struct radv_image *image,
-			  const VkImageSubresourceRange *range, uint32_t value);
-uint32_t radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
-			  struct radv_image *image,
-			  const VkImageSubresourceRange *range, uint32_t value);
-uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
-			struct radv_image *image,
-			const VkImageSubresourceRange *range, uint32_t value);
-uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
-			  const struct radv_image *image,
-			  const VkImageSubresourceRange *range, uint32_t value);
+void radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+                                 VkImageLayout src_image_layout, const VkImageResolve2KHR *region);
+
+uint32_t radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                          const VkImageSubresourceRange *range, uint32_t value);
+uint32_t radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                          const VkImageSubresourceRange *range, uint32_t value);
+uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                        const VkImageSubresourceRange *range, uint32_t value);
+uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
+                          const VkImageSubresourceRange *range, uint32_t value);
 
 /**
  * Return whether the bound pipeline is the FMASK decompress pass.
@@ -264,11 +240,11 @@ uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
 static inline bool
 radv_is_fmask_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
-	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
 
-	return radv_pipeline_to_handle(pipeline) ==
-	       meta_state->fast_clear_flush.fmask_decompress_pipeline;
+   return radv_pipeline_to_handle(pipeline) ==
+          meta_state->fast_clear_flush.fmask_decompress_pipeline;
 }
 
 /**
@@ -277,11 +253,10 @@ radv_is_fmask_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
 static inline bool
 radv_is_dcc_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
-	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
 
-	return radv_pipeline_to_handle(pipeline) ==
-	       meta_state->fast_clear_flush.dcc_decompress_pipeline;
+   return radv_pipeline_to_handle(pipeline) == meta_state->fast_clear_flush.dcc_decompress_pipeline;
 }
 
 /* common nir builder helpers */
@@ -292,15 +267,11 @@ nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *c
 nir_shader *radv_meta_build_nir_vs_generate_vertices(void);
 nir_shader *radv_meta_build_nir_fs_noop(void);
 
-void radv_meta_build_resolve_shader_core(nir_builder *b,
-					 bool is_integer,
-					 int samples,
-					 nir_variable *input_img,
-					 nir_variable *color,
-					 nir_ssa_def *img_coord);
+void radv_meta_build_resolve_shader_core(nir_builder *b, bool is_integer, int samples,
+                                         nir_variable *input_img, nir_variable *color,
+                                         nir_ssa_def *img_coord);
 
-nir_ssa_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set,
-				       unsigned binding);
+nir_ssa_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding);
 
 #ifdef __cplusplus
 }
diff --git a/src/amd/vulkan/radv_meta_blit.c b/src/amd/vulkan/radv_meta_blit.c
index ff573ad7f04..03a4912f033 100644
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -21,1198 +21,1147 @@
  * IN THE SOFTWARE.
  */
 
-#include "radv_meta.h"
 #include "nir/nir_builder.h"
+#include "radv_meta.h"
 
 struct blit_region {
-	VkOffset3D src_offset;
-	VkExtent3D src_extent;
-	VkOffset3D dest_offset;
-	VkExtent3D dest_extent;
+   VkOffset3D src_offset;
+   VkExtent3D src_extent;
+   VkOffset3D dest_offset;
+   VkExtent3D dest_extent;
 };
 
-static VkResult
-build_pipeline(struct radv_device *device,
-               VkImageAspectFlagBits aspect,
-               enum glsl_sampler_dim tex_dim,
-               unsigned fs_key,
-               VkPipeline *pipeline);
+static VkResult build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect,
+                               enum glsl_sampler_dim tex_dim, unsigned fs_key,
+                               VkPipeline *pipeline);
 
 static nir_shader *
 build_nir_vertex_shader(void)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_blit_vs");
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_blit_vs");
 
-	nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
-						    vec4, "gl_Position");
-	pos_out->data.location = VARYING_SLOT_POS;
+   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
+   pos_out->data.location = VARYING_SLOT_POS;
 
-	nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
-							vec4, "v_tex_pos");
-	tex_pos_out->data.location = VARYING_SLOT_VAR0;
-	tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
+   nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "v_tex_pos");
+   tex_pos_out->data.location = VARYING_SLOT_VAR0;
+   tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
 
-	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+   nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
 
-	nir_store_var(&b, pos_out, outvec, 0xf);
+   nir_store_var(&b, pos_out, outvec, 0xf);
 
-	nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range=16);
-	nir_ssa_def *src0_z = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base=16, .range=4);
+   nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
+   nir_ssa_def *src0_z =
+      nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
 
-	nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
+   nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
 
-	/* vertex 0 - src0_x, src0_y, src0_z */
-	/* vertex 1 - src0_x, src1_y, src0_z*/
-	/* vertex 2 - src1_x, src0_y, src0_z */
-	/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
-	   channel 1 is vertex id != 1 ? src_y : src_y + w */
+   /* vertex 0 - src0_x, src0_y, src0_z */
+   /* vertex 1 - src0_x, src1_y, src0_z*/
+   /* vertex 2 - src1_x, src0_y, src0_z */
+   /* so channel 0 is vertex_id != 2 ? src_x : src_x + w
+      channel 1 is vertex id != 1 ? src_y : src_y + w */
 
-	nir_ssa_def *c0cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 2));
-	nir_ssa_def *c1cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 1));
+   nir_ssa_def *c0cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 2));
+   nir_ssa_def *c1cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 1));
 
-	nir_ssa_def *comp[4];
-	comp[0] = nir_bcsel(&b, c0cmp,
-			    nir_channel(&b, src_box, 0),
-			    nir_channel(&b, src_box, 2));
+   nir_ssa_def *comp[4];
+   comp[0] = nir_bcsel(&b, c0cmp, nir_channel(&b, src_box, 0), nir_channel(&b, src_box, 2));
 
-	comp[1] = nir_bcsel(&b, c1cmp,
-			    nir_channel(&b, src_box, 1),
-			    nir_channel(&b, src_box, 3));
-	comp[2] = src0_z;
-	comp[3] = nir_imm_float(&b, 1.0);
-	nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 4);
-	nir_store_var(&b, tex_pos_out, out_tex_vec, 0xf);
-	return b.shader;
+   comp[1] = nir_bcsel(&b, c1cmp, nir_channel(&b, src_box, 1), nir_channel(&b, src_box, 3));
+   comp[2] = src0_z;
+   comp[3] = nir_imm_float(&b, 1.0);
+   nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 4);
+   nir_store_var(&b, tex_pos_out, out_tex_vec, 0xf);
+   return b.shader;
 }
 
 static nir_shader *
 build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_blit_fs.%d", tex_dim);
-
-	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-						       vec4, "v_tex_pos");
-	tex_pos_in->data.location = VARYING_SLOT_VAR0;
-
-	/* Swizzle the array index which comes in as Z coordinate into the right
-	 * position.
-	 */
-	unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
-	nir_ssa_def *const tex_pos =
-		nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
-			    (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
-
-	const struct glsl_type *sampler_type =
-		glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
-				  glsl_get_base_type(vec4));
-	nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
-						    sampler_type, "s_tex");
-	sampler->data.descriptor_set = 0;
-	sampler->data.binding = 0;
-
-	nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-	tex->sampler_dim = tex_dim;
-	tex->op = nir_texop_tex;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(tex_pos);
-	tex->src[1].src_type = nir_tex_src_texture_deref;
-	tex->src[1].src = nir_src_for_ssa(tex_deref);
-	tex->src[2].src_type = nir_tex_src_sampler_deref;
-	tex->src[2].src = nir_src_for_ssa(tex_deref);
-	tex->dest_type = nir_type_float32; /* TODO */
-	tex->is_array = glsl_sampler_type_is_array(sampler_type);
-	tex->coord_components = tex_pos->num_components;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(&b, &tex->instr);
-
-	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
-						      vec4, "f_color");
-	color_out->data.location = FRAG_RESULT_DATA0;
-	nir_store_var(&b, color_out, &tex->dest.ssa, 0xf);
-
-	return b.shader;
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_blit_fs.%d", tex_dim);
+
+   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
+   tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+   /* Swizzle the array index which comes in as Z coordinate into the right
+    * position.
+    */
+   unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
+   nir_ssa_def *const tex_pos =
+      nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
+
+   const struct glsl_type *sampler_type =
+      glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, glsl_get_base_type(vec4));
+   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+   sampler->data.descriptor_set = 0;
+   sampler->data.binding = 0;
+
+   nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = tex_dim;
+   tex->op = nir_texop_tex;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(tex_pos);
+   tex->src[1].src_type = nir_tex_src_texture_deref;
+   tex->src[1].src = nir_src_for_ssa(tex_deref);
+   tex->src[2].src_type = nir_tex_src_sampler_deref;
+   tex->src[2].src = nir_src_for_ssa(tex_deref);
+   tex->dest_type = nir_type_float32; /* TODO */
+   tex->is_array = glsl_sampler_type_is_array(sampler_type);
+   tex->coord_components = tex_pos->num_components;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(&b, &tex->instr);
+
+   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+   color_out->data.location = FRAG_RESULT_DATA0;
+   nir_store_var(&b, color_out, &tex->dest.ssa, 0xf);
+
+   return b.shader;
 }
 
 static nir_shader *
 build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_blit_depth_fs.%d", tex_dim);
-
-	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-						       vec4, "v_tex_pos");
-	tex_pos_in->data.location = VARYING_SLOT_VAR0;
-
-	/* Swizzle the array index which comes in as Z coordinate into the right
-	 * position.
-	 */
-	unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
-	nir_ssa_def *const tex_pos =
-		nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
-			    (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
-
-	const struct glsl_type *sampler_type =
-		glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
-				  glsl_get_base_type(vec4));
-	nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
-						    sampler_type, "s_tex");
-	sampler->data.descriptor_set = 0;
-	sampler->data.binding = 0;
-
-	nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-	tex->sampler_dim = tex_dim;
-	tex->op = nir_texop_tex;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(tex_pos);
-	tex->src[1].src_type = nir_tex_src_texture_deref;
-	tex->src[1].src = nir_src_for_ssa(tex_deref);
-	tex->src[2].src_type = nir_tex_src_sampler_deref;
-	tex->src[2].src = nir_src_for_ssa(tex_deref);
-	tex->dest_type = nir_type_float32; /* TODO */
-	tex->is_array = glsl_sampler_type_is_array(sampler_type);
-	tex->coord_components = tex_pos->num_components;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(&b, &tex->instr);
-
-	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
-						      vec4, "f_color");
-	color_out->data.location = FRAG_RESULT_DEPTH;
-	nir_store_var(&b, color_out, &tex->dest.ssa, 0x1);
-
-	return b.shader;
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_blit_depth_fs.%d", tex_dim);
+
+   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
+   tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+   /* Swizzle the array index which comes in as Z coordinate into the right
+    * position.
+    */
+   unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
+   nir_ssa_def *const tex_pos =
+      nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
+
+   const struct glsl_type *sampler_type =
+      glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, glsl_get_base_type(vec4));
+   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+   sampler->data.descriptor_set = 0;
+   sampler->data.binding = 0;
+
+   nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = tex_dim;
+   tex->op = nir_texop_tex;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(tex_pos);
+   tex->src[1].src_type = nir_tex_src_texture_deref;
+   tex->src[1].src = nir_src_for_ssa(tex_deref);
+   tex->src[2].src_type = nir_tex_src_sampler_deref;
+   tex->src[2].src = nir_src_for_ssa(tex_deref);
+   tex->dest_type = nir_type_float32; /* TODO */
+   tex->is_array = glsl_sampler_type_is_array(sampler_type);
+   tex->coord_components = tex_pos->num_components;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(&b, &tex->instr);
+
+   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+   color_out->data.location = FRAG_RESULT_DEPTH;
+   nir_store_var(&b, color_out, &tex->dest.ssa, 0x1);
+
+   return b.shader;
 }
 
 static nir_shader *
 build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_blit_stencil_fs.%d", tex_dim);
-
-	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-						       vec4, "v_tex_pos");
-	tex_pos_in->data.location = VARYING_SLOT_VAR0;
-
-	/* Swizzle the array index which comes in as Z coordinate into the right
-	 * position.
-	 */
-	unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
-	nir_ssa_def *const tex_pos =
-		nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
-			    (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
-
-	const struct glsl_type *sampler_type =
-		glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
-				  glsl_get_base_type(vec4));
-	nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
-						    sampler_type, "s_tex");
-	sampler->data.descriptor_set = 0;
-	sampler->data.binding = 0;
-
-	nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-	tex->sampler_dim = tex_dim;
-	tex->op = nir_texop_tex;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(tex_pos);
-	tex->src[1].src_type = nir_tex_src_texture_deref;
-	tex->src[1].src = nir_src_for_ssa(tex_deref);
-	tex->src[2].src_type = nir_tex_src_sampler_deref;
-	tex->src[2].src = nir_src_for_ssa(tex_deref);
-	tex->dest_type = nir_type_float32; /* TODO */
-	tex->is_array = glsl_sampler_type_is_array(sampler_type);
-	tex->coord_components = tex_pos->num_components;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(&b, &tex->instr);
-
-	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
-						      vec4, "f_color");
-	color_out->data.location = FRAG_RESULT_STENCIL;
-	nir_store_var(&b, color_out, &tex->dest.ssa, 0x1);
-
-	return b.shader;
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
+                                                  "meta_blit_stencil_fs.%d", tex_dim);
+
+   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
+   tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+   /* Swizzle the array index which comes in as Z coordinate into the right
+    * position.
+    */
+   unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
+   nir_ssa_def *const tex_pos =
+      nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
+
+   const struct glsl_type *sampler_type =
+      glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, glsl_get_base_type(vec4));
+   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+   sampler->data.descriptor_set = 0;
+   sampler->data.binding = 0;
+
+   nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = tex_dim;
+   tex->op = nir_texop_tex;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(tex_pos);
+   tex->src[1].src_type = nir_tex_src_texture_deref;
+   tex->src[1].src = nir_src_for_ssa(tex_deref);
+   tex->src[2].src_type = nir_tex_src_sampler_deref;
+   tex->src[2].src = nir_src_for_ssa(tex_deref);
+   tex->dest_type = nir_type_float32; /* TODO */
+   tex->is_array = glsl_sampler_type_is_array(sampler_type);
+   tex->coord_components = tex_pos->num_components;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(&b, &tex->instr);
+
+   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+   color_out->data.location = FRAG_RESULT_STENCIL;
+   nir_store_var(&b, color_out, &tex->dest.ssa, 0x1);
+
+   return b.shader;
 }
 
 static enum glsl_sampler_dim
-translate_sampler_dim(VkImageType type) {
-	switch(type) {
-	case VK_IMAGE_TYPE_1D:
-		return GLSL_SAMPLER_DIM_1D;
-	case VK_IMAGE_TYPE_2D:
-		return GLSL_SAMPLER_DIM_2D;
-	case VK_IMAGE_TYPE_3D:
-		return GLSL_SAMPLER_DIM_3D;
-	default:
-		unreachable("Unhandled image type");
-	}
+translate_sampler_dim(VkImageType type)
+{
+   switch (type) {
+   case VK_IMAGE_TYPE_1D:
+      return GLSL_SAMPLER_DIM_1D;
+   case VK_IMAGE_TYPE_2D:
+      return GLSL_SAMPLER_DIM_2D;
+   case VK_IMAGE_TYPE_3D:
+      return GLSL_SAMPLER_DIM_3D;
+   default:
+      unreachable("Unhandled image type");
+   }
 }
 
 static void
-meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
-               struct radv_image *src_image,
-               struct radv_image_view *src_iview,
-	       VkImageLayout src_image_layout,
-               float src_offset_0[3],
-               float src_offset_1[3],
-               struct radv_image *dest_image,
-               struct radv_image_view *dest_iview,
-	       VkImageLayout dest_image_layout,
-               VkOffset2D dest_offset_0,
-               VkOffset2D dest_offset_1,
-               VkRect2D dest_box,
+meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+               struct radv_image_view *src_iview, VkImageLayout src_image_layout,
+               float src_offset_0[3], float src_offset_1[3], struct radv_image *dest_image,
+               struct radv_image_view *dest_iview, VkImageLayout dest_image_layout,
+               VkOffset2D dest_offset_0, VkOffset2D dest_offset_1, VkRect2D dest_box,
                VkSampler sampler)
 {
-	struct radv_device *device = cmd_buffer->device;
-	uint32_t src_width = radv_minify(src_iview->image->info.width, src_iview->base_mip);
-	uint32_t src_height = radv_minify(src_iview->image->info.height, src_iview->base_mip);
-	uint32_t src_depth = radv_minify(src_iview->image->info.depth, src_iview->base_mip);
-	uint32_t dst_width = radv_minify(dest_iview->image->info.width, dest_iview->base_mip);
-	uint32_t dst_height = radv_minify(dest_iview->image->info.height, dest_iview->base_mip);
-
-	assert(src_image->info.samples == dest_image->info.samples);
-
-	float vertex_push_constants[5] = {
-		src_offset_0[0] / (float)src_width,
-		src_offset_0[1] / (float)src_height,
-		src_offset_1[0] / (float)src_width,
-		src_offset_1[1] / (float)src_height,
-		src_offset_0[2] / (float)src_depth,
-	};
-
-	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-			      device->meta_state.blit.pipeline_layout,
-			      VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
-			      vertex_push_constants);
-
-	VkFramebuffer fb;
-	radv_CreateFramebuffer(radv_device_to_handle(device),
-			       &(VkFramebufferCreateInfo) {
-				       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
-					       .attachmentCount = 1,
-					       .pAttachments = (VkImageView[]) {
-					       radv_image_view_to_handle(dest_iview),
-				       },
-				       .width = dst_width,
-				       .height = dst_height,
-				       .layers = 1,
-				}, &cmd_buffer->pool->alloc, &fb);
-	VkPipeline* pipeline = NULL;
-	unsigned fs_key = 0;
-	switch (src_iview->aspect_mask) {
-	case VK_IMAGE_ASPECT_COLOR_BIT: {
-		unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
-		fs_key = radv_format_meta_fs_key(device, dest_image->vk_format);
-
-		radv_cmd_buffer_begin_render_pass(cmd_buffer,
-						  &(VkRenderPassBeginInfo) {
-							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-								.renderPass = device->meta_state.blit.render_pass[fs_key][dst_layout],
-								.framebuffer = fb,
-								.renderArea = {
-									.offset = { dest_box.offset.x, dest_box.offset.y },
-									.extent = { dest_box.extent.width, dest_box.extent.height },
-								},
-							.clearValueCount = 0,
-							.pClearValues = NULL,
-						}, NULL);
-		switch (src_image->type) {
-		case VK_IMAGE_TYPE_1D:
-			pipeline = &device->meta_state.blit.pipeline_1d_src[fs_key];
-			break;
-		case VK_IMAGE_TYPE_2D:
-			pipeline = &device->meta_state.blit.pipeline_2d_src[fs_key];
-			break;
-		case VK_IMAGE_TYPE_3D:
-			pipeline = &device->meta_state.blit.pipeline_3d_src[fs_key];
-			break;
-		default:
-			unreachable("bad VkImageType");
-		}
-		break;
-	}
-	case VK_IMAGE_ASPECT_DEPTH_BIT: {
-		enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
-		radv_cmd_buffer_begin_render_pass(cmd_buffer,
-						  &(VkRenderPassBeginInfo) {
-							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-							.renderPass = device->meta_state.blit.depth_only_rp[ds_layout],
-							.framebuffer = fb,
-							.renderArea = {
-								.offset = { dest_box.offset.x, dest_box.offset.y },
-								.extent = { dest_box.extent.width, dest_box.extent.height },
-							},
-							.clearValueCount = 0,
-							.pClearValues = NULL,
-						  }, NULL);
-		switch (src_image->type) {
-		case VK_IMAGE_TYPE_1D:
-			pipeline = &device->meta_state.blit.depth_only_1d_pipeline;
-			break;
-		case VK_IMAGE_TYPE_2D:
-			pipeline = &device->meta_state.blit.depth_only_2d_pipeline;
-			break;
-		case VK_IMAGE_TYPE_3D:
-			pipeline = &device->meta_state.blit.depth_only_3d_pipeline;
-			break;
-		default:
-			unreachable("bad VkImageType");
-		}
-		break;
-	}
-	case VK_IMAGE_ASPECT_STENCIL_BIT: {
-		enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
-		radv_cmd_buffer_begin_render_pass(cmd_buffer,
-						  &(VkRenderPassBeginInfo) {
-							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-							.renderPass = device->meta_state.blit.stencil_only_rp[ds_layout],
-							.framebuffer = fb,
-							.renderArea = {
-								.offset = { dest_box.offset.x, dest_box.offset.y },
-								.extent = { dest_box.extent.width, dest_box.extent.height },
-						        },
-							.clearValueCount = 0,
-							.pClearValues = NULL,
-						  }, NULL);
-		switch (src_image->type) {
-		case VK_IMAGE_TYPE_1D:
-			pipeline = &device->meta_state.blit.stencil_only_1d_pipeline;
-			break;
-		case VK_IMAGE_TYPE_2D:
-			pipeline = &device->meta_state.blit.stencil_only_2d_pipeline;
-			break;
-		case VK_IMAGE_TYPE_3D:
-			pipeline = &device->meta_state.blit.stencil_only_3d_pipeline;
-			break;
-		default:
-			unreachable("bad VkImageType");
-		}
-		break;
-	}
-	default:
-		unreachable("bad VkImageType");
-	}
-
-	radv_cmd_buffer_set_subpass(cmd_buffer,
-				    &cmd_buffer->state.pass->subpasses[0]);
-
-	if (!*pipeline) {
-		VkResult ret = build_pipeline(device, src_iview->aspect_mask, translate_sampler_dim(src_image->type), fs_key, pipeline);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			goto fail_pipeline;
-		}
-	}
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
-
-	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
-			              device->meta_state.blit.pipeline_layout,
-				      0, /* set */
-				      1, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-				                      .pImageInfo = (VkDescriptorImageInfo[]) {
-				                              {
-				                                      .sampler = sampler,
-				                                      .imageView = radv_image_view_to_handle(src_iview),
-				                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-				                              },
-				                      }
-				              }
-				      });
-
-	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-		.x = dest_offset_0.x,
-		.y = dest_offset_0.y,
-		.width = dest_offset_1.x - dest_offset_0.x,
-		.height = dest_offset_1.y - dest_offset_0.y,
-		.minDepth = 0.0f,
-		.maxDepth = 1.0f
-	});
-
-	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-		.offset = (VkOffset2D) { MIN2(dest_offset_0.x, dest_offset_1.x), MIN2(dest_offset_0.y, dest_offset_1.y) },
-		.extent = (VkExtent2D) {
-			abs(dest_offset_1.x - dest_offset_0.x),
-			abs(dest_offset_1.y - dest_offset_0.y)
-		},
-	});
-
-	radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+   struct radv_device *device = cmd_buffer->device;
+   uint32_t src_width = radv_minify(src_iview->image->info.width, src_iview->base_mip);
+   uint32_t src_height = radv_minify(src_iview->image->info.height, src_iview->base_mip);
+   uint32_t src_depth = radv_minify(src_iview->image->info.depth, src_iview->base_mip);
+   uint32_t dst_width = radv_minify(dest_iview->image->info.width, dest_iview->base_mip);
+   uint32_t dst_height = radv_minify(dest_iview->image->info.height, dest_iview->base_mip);
+
+   assert(src_image->info.samples == dest_image->info.samples);
+
+   float vertex_push_constants[5] = {
+      src_offset_0[0] / (float)src_width, src_offset_0[1] / (float)src_height,
+      src_offset_1[0] / (float)src_width, src_offset_1[1] / (float)src_height,
+      src_offset_0[2] / (float)src_depth,
+   };
+
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                         device->meta_state.blit.pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
+                         vertex_push_constants);
+
+   VkFramebuffer fb;
+   radv_CreateFramebuffer(radv_device_to_handle(device),
+                          &(VkFramebufferCreateInfo){
+                             .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+                             .attachmentCount = 1,
+                             .pAttachments =
+                                (VkImageView[]){
+                                   radv_image_view_to_handle(dest_iview),
+                                },
+                             .width = dst_width,
+                             .height = dst_height,
+                             .layers = 1,
+                          },
+                          &cmd_buffer->pool->alloc, &fb);
+   VkPipeline *pipeline = NULL;
+   unsigned fs_key = 0;
+   switch (src_iview->aspect_mask) {
+   case VK_IMAGE_ASPECT_COLOR_BIT: {
+      unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
+      fs_key = radv_format_meta_fs_key(device, dest_image->vk_format);
+
+      radv_cmd_buffer_begin_render_pass(
+         cmd_buffer,
+         &(VkRenderPassBeginInfo){
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+            .renderPass = device->meta_state.blit.render_pass[fs_key][dst_layout],
+            .framebuffer = fb,
+            .renderArea =
+               {
+                  .offset = {dest_box.offset.x, dest_box.offset.y},
+                  .extent = {dest_box.extent.width, dest_box.extent.height},
+               },
+            .clearValueCount = 0,
+            .pClearValues = NULL,
+         },
+         NULL);
+      switch (src_image->type) {
+      case VK_IMAGE_TYPE_1D:
+         pipeline = &device->meta_state.blit.pipeline_1d_src[fs_key];
+         break;
+      case VK_IMAGE_TYPE_2D:
+         pipeline = &device->meta_state.blit.pipeline_2d_src[fs_key];
+         break;
+      case VK_IMAGE_TYPE_3D:
+         pipeline = &device->meta_state.blit.pipeline_3d_src[fs_key];
+         break;
+      default:
+         unreachable("bad VkImageType");
+      }
+      break;
+   }
+   case VK_IMAGE_ASPECT_DEPTH_BIT: {
+      enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
+      radv_cmd_buffer_begin_render_pass(
+         cmd_buffer,
+         &(VkRenderPassBeginInfo){
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+            .renderPass = device->meta_state.blit.depth_only_rp[ds_layout],
+            .framebuffer = fb,
+            .renderArea =
+               {
+                  .offset = {dest_box.offset.x, dest_box.offset.y},
+                  .extent = {dest_box.extent.width, dest_box.extent.height},
+               },
+            .clearValueCount = 0,
+            .pClearValues = NULL,
+         },
+         NULL);
+      switch (src_image->type) {
+      case VK_IMAGE_TYPE_1D:
+         pipeline = &device->meta_state.blit.depth_only_1d_pipeline;
+         break;
+      case VK_IMAGE_TYPE_2D:
+         pipeline = &device->meta_state.blit.depth_only_2d_pipeline;
+         break;
+      case VK_IMAGE_TYPE_3D:
+         pipeline = &device->meta_state.blit.depth_only_3d_pipeline;
+         break;
+      default:
+         unreachable("bad VkImageType");
+      }
+      break;
+   }
+   case VK_IMAGE_ASPECT_STENCIL_BIT: {
+      enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
+      radv_cmd_buffer_begin_render_pass(
+         cmd_buffer,
+         &(VkRenderPassBeginInfo){
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+            .renderPass = device->meta_state.blit.stencil_only_rp[ds_layout],
+            .framebuffer = fb,
+            .renderArea =
+               {
+                  .offset = {dest_box.offset.x, dest_box.offset.y},
+                  .extent = {dest_box.extent.width, dest_box.extent.height},
+               },
+            .clearValueCount = 0,
+            .pClearValues = NULL,
+         },
+         NULL);
+      switch (src_image->type) {
+      case VK_IMAGE_TYPE_1D:
+         pipeline = &device->meta_state.blit.stencil_only_1d_pipeline;
+         break;
+      case VK_IMAGE_TYPE_2D:
+         pipeline = &device->meta_state.blit.stencil_only_2d_pipeline;
+         break;
+      case VK_IMAGE_TYPE_3D:
+         pipeline = &device->meta_state.blit.stencil_only_3d_pipeline;
+         break;
+      default:
+         unreachable("bad VkImageType");
+      }
+      break;
+   }
+   default:
+      unreachable("bad VkImageType");
+   }
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+   if (!*pipeline) {
+      VkResult ret = build_pipeline(device, src_iview->aspect_mask,
+                                    translate_sampler_dim(src_image->type), fs_key, pipeline);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         goto fail_pipeline;
+      }
+   }
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+                        *pipeline);
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline_layout,
+      0, /* set */
+      1, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 0,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                                .pImageInfo = (VkDescriptorImageInfo[]){
+                                   {
+                                      .sampler = sampler,
+                                      .imageView = radv_image_view_to_handle(src_iview),
+                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                   },
+                                }}});
+
+   radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                       &(VkViewport){.x = dest_offset_0.x,
+                                     .y = dest_offset_0.y,
+                                     .width = dest_offset_1.x - dest_offset_0.x,
+                                     .height = dest_offset_1.y - dest_offset_0.y,
+                                     .minDepth = 0.0f,
+                                     .maxDepth = 1.0f});
+
+   radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                      &(VkRect2D){
+                         .offset = (VkOffset2D){MIN2(dest_offset_0.x, dest_offset_1.x),
+                                                MIN2(dest_offset_0.y, dest_offset_1.y)},
+                         .extent = (VkExtent2D){abs(dest_offset_1.x - dest_offset_0.x),
+                                                abs(dest_offset_1.y - dest_offset_0.y)},
+                      });
+
+   radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
 
 fail_pipeline:
-	radv_cmd_buffer_end_render_pass(cmd_buffer);
+   radv_cmd_buffer_end_render_pass(cmd_buffer);
 
-	/* At the point where we emit the draw call, all data from the
-	 * descriptor sets, etc. has been used.  We are free to delete it.
-	 */
-	/* TODO: above comment is not valid for at least descriptor sets/pools,
-	 * as we may not free them till after execution finishes. Check others. */
+   /* At the point where we emit the draw call, all data from the
+    * descriptor sets, etc. has been used.  We are free to delete it.
+    */
+   /* TODO: above comment is not valid for at least descriptor sets/pools,
+    * as we may not free them till after execution finishes. Check others. */
 
-	radv_DestroyFramebuffer(radv_device_to_handle(device), fb,
-				&cmd_buffer->pool->alloc);
+   radv_DestroyFramebuffer(radv_device_to_handle(device), fb, &cmd_buffer->pool->alloc);
 }
 
 static bool
 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
 {
-	bool flip = false;
-	if (*src0 > *src1) {
-		unsigned tmp = *src0;
-		*src0 = *src1;
-		*src1 = tmp;
-		flip = !flip;
-	}
-
-	if (*dst0 > *dst1) {
-		unsigned tmp = *dst0;
-		*dst0 = *dst1;
-		*dst1 = tmp;
-		flip = !flip;
-	}
-	return flip;
+   bool flip = false;
+   if (*src0 > *src1) {
+      unsigned tmp = *src0;
+      *src0 = *src1;
+      *src1 = tmp;
+      flip = !flip;
+   }
+
+   if (*dst0 > *dst1) {
+      unsigned tmp = *dst0;
+      *dst0 = *dst1;
+      *dst1 = tmp;
+      flip = !flip;
+   }
+   return flip;
 }
 
 static void
-blit_image(struct radv_cmd_buffer *cmd_buffer,
-	   struct radv_image *src_image,
-	   VkImageLayout src_image_layout,
-	   struct radv_image *dst_image,
-	   VkImageLayout dst_image_layout,
-	   const VkImageBlit2KHR *region,
-	   VkFilter filter)
+blit_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+           VkImageLayout src_image_layout, struct radv_image *dst_image,
+           VkImageLayout dst_image_layout, const VkImageBlit2KHR *region, VkFilter filter)
 {
-	const VkImageSubresourceLayers *src_res = &region->srcSubresource;
-	const VkImageSubresourceLayers *dst_res = &region->dstSubresource;
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_meta_saved_state saved_state;
-	bool old_predicating;
-	VkSampler sampler;
-
-	/* From the Vulkan 1.0 spec:
-	 *
-	 *    vkCmdBlitImage must not be used for multisampled source or
-	 *    destination images. Use vkCmdResolveImage for this purpose.
-	 */
-	assert(src_image->info.samples == 1);
-	assert(dst_image->info.samples == 1);
-
-	radv_CreateSampler(radv_device_to_handle(device),
-			   &(VkSamplerCreateInfo) {
-				.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
-				.magFilter = filter,
-				.minFilter = filter,
-				.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-				.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-				.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-			   }, &cmd_buffer->pool->alloc, &sampler);
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	/* VK_EXT_conditional_rendering says that blit commands should not be
-	 * affected by conditional rendering.
-	 */
-	old_predicating = cmd_buffer->state.predicating;
-	cmd_buffer->state.predicating = false;
-
-	unsigned dst_start, dst_end;
-	if (dst_image->type == VK_IMAGE_TYPE_3D) {
-		assert(dst_res->baseArrayLayer == 0);
-		dst_start = region->dstOffsets[0].z;
-		dst_end = region->dstOffsets[1].z;
-	} else {
-		dst_start = dst_res->baseArrayLayer;
-		dst_end = dst_start + dst_res->layerCount;
-	}
-
-	unsigned src_start, src_end;
-	if (src_image->type == VK_IMAGE_TYPE_3D) {
-		assert(src_res->baseArrayLayer == 0);
-		src_start = region->srcOffsets[0].z;
-		src_end = region->srcOffsets[1].z;
-	} else {
-		src_start = src_res->baseArrayLayer;
-		src_end = src_start + src_res->layerCount;
-	}
-
-	bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
-	float src_z_step = (float)(src_end - src_start) /
-		(float)(dst_end - dst_start);
-
-	/* There is no interpolation to the pixel center during
-	 * rendering, so add the 0.5 offset ourselves here. */
-	float depth_center_offset = 0;
-	if (src_image->type == VK_IMAGE_TYPE_3D)
-		depth_center_offset = 0.5 / (dst_end - dst_start) * (src_end - src_start);
-
-	if (flip_z) {
-		src_start = src_end;
-		src_z_step *= -1;
-		depth_center_offset *= -1;
-	}
-
-	unsigned src_x0 = region->srcOffsets[0].x;
-	unsigned src_x1 = region->srcOffsets[1].x;
-	unsigned dst_x0 = region->dstOffsets[0].x;
-	unsigned dst_x1 = region->dstOffsets[1].x;
-
-	unsigned src_y0 = region->srcOffsets[0].y;
-	unsigned src_y1 = region->srcOffsets[1].y;
-	unsigned dst_y0 = region->dstOffsets[0].y;
-	unsigned dst_y1 = region->dstOffsets[1].y;
-
-	VkRect2D dst_box;
-	dst_box.offset.x = MIN2(dst_x0, dst_x1);
-	dst_box.offset.y = MIN2(dst_y0, dst_y1);
-	dst_box.extent.width = dst_x1 - dst_x0;
-	dst_box.extent.height = dst_y1 - dst_y0;
-
-	const unsigned num_layers = dst_end - dst_start;
-	for (unsigned i = 0; i < num_layers; i++) {
-		struct radv_image_view dst_iview, src_iview;
-
-		const VkOffset2D dst_offset_0 = {
-			.x = dst_x0,
-			.y = dst_y0,
-		};
-		const VkOffset2D dst_offset_1 = {
-			.x = dst_x1,
-			.y = dst_y1,
-		};
-
-		float src_offset_0[3] = {
-			src_x0,
-			src_y0,
-			src_start + i * src_z_step + depth_center_offset,
-		};
-		float src_offset_1[3] = {
-			src_x1,
-			src_y1,
-			src_start + i * src_z_step + depth_center_offset,
-		};
-		const uint32_t dst_array_slice = dst_start + i;
-
-		/* 3D images have just 1 layer */
-		const uint32_t src_array_slice = src_image->type == VK_IMAGE_TYPE_3D ? 0 : src_start + i;
-
-		radv_image_view_init(&dst_iview, cmd_buffer->device,
-				     &(VkImageViewCreateInfo) {
-					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-						     .image = radv_image_to_handle(dst_image),
-						     .viewType = radv_meta_get_view_type(dst_image),
-						     .format = dst_image->vk_format,
-						     .subresourceRange = {
-						     .aspectMask = dst_res->aspectMask,
-						     .baseMipLevel = dst_res->mipLevel,
-						     .levelCount = 1,
-						     .baseArrayLayer = dst_array_slice,
-						     .layerCount = 1
-					     },
-				     }, NULL);
-		radv_image_view_init(&src_iview, cmd_buffer->device,
-				     &(VkImageViewCreateInfo) {
-					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-						.image = radv_image_to_handle(src_image),
-						.viewType = radv_meta_get_view_type(src_image),
-						.format = src_image->vk_format,
-						.subresourceRange = {
-						.aspectMask = src_res->aspectMask,
-						.baseMipLevel = src_res->mipLevel,
-						.levelCount = 1,
-						.baseArrayLayer = src_array_slice,
-						.layerCount = 1
-					},
-				}, NULL);
-		meta_emit_blit(cmd_buffer,
-			       src_image, &src_iview, src_image_layout,
-			       src_offset_0, src_offset_1,
-			       dst_image, &dst_iview, dst_image_layout,
-			       dst_offset_0, dst_offset_1,
-			       dst_box,
-			       sampler);
-	}
-
-	/* Restore conditional rendering. */
-	cmd_buffer->state.predicating = old_predicating;
-
-	radv_meta_restore(&saved_state, cmd_buffer);
-
-	radv_DestroySampler(radv_device_to_handle(device), sampler,
-			    &cmd_buffer->pool->alloc);
+   const VkImageSubresourceLayers *src_res = &region->srcSubresource;
+   const VkImageSubresourceLayers *dst_res = &region->dstSubresource;
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_saved_state saved_state;
+   bool old_predicating;
+   VkSampler sampler;
+
+   /* From the Vulkan 1.0 spec:
+    *
+    *    vkCmdBlitImage must not be used for multisampled source or
+    *    destination images. Use vkCmdResolveImage for this purpose.
+    */
+   assert(src_image->info.samples == 1);
+   assert(dst_image->info.samples == 1);
+
+   radv_CreateSampler(radv_device_to_handle(device),
+                      &(VkSamplerCreateInfo){
+                         .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+                         .magFilter = filter,
+                         .minFilter = filter,
+                         .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                         .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                         .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                      },
+                      &cmd_buffer->pool->alloc, &sampler);
+
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   /* VK_EXT_conditional_rendering says that blit commands should not be
+    * affected by conditional rendering.
+    */
+   old_predicating = cmd_buffer->state.predicating;
+   cmd_buffer->state.predicating = false;
+
+   unsigned dst_start, dst_end;
+   if (dst_image->type == VK_IMAGE_TYPE_3D) {
+      assert(dst_res->baseArrayLayer == 0);
+      dst_start = region->dstOffsets[0].z;
+      dst_end = region->dstOffsets[1].z;
+   } else {
+      dst_start = dst_res->baseArrayLayer;
+      dst_end = dst_start + dst_res->layerCount;
+   }
+
+   unsigned src_start, src_end;
+   if (src_image->type == VK_IMAGE_TYPE_3D) {
+      assert(src_res->baseArrayLayer == 0);
+      src_start = region->srcOffsets[0].z;
+      src_end = region->srcOffsets[1].z;
+   } else {
+      src_start = src_res->baseArrayLayer;
+      src_end = src_start + src_res->layerCount;
+   }
+
+   bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
+   float src_z_step = (float)(src_end - src_start) / (float)(dst_end - dst_start);
+
+   /* There is no interpolation to the pixel center during
+    * rendering, so add the 0.5 offset ourselves here. */
+   float depth_center_offset = 0;
+   if (src_image->type == VK_IMAGE_TYPE_3D)
+      depth_center_offset = 0.5 / (dst_end - dst_start) * (src_end - src_start);
+
+   if (flip_z) {
+      src_start = src_end;
+      src_z_step *= -1;
+      depth_center_offset *= -1;
+   }
+
+   unsigned src_x0 = region->srcOffsets[0].x;
+   unsigned src_x1 = region->srcOffsets[1].x;
+   unsigned dst_x0 = region->dstOffsets[0].x;
+   unsigned dst_x1 = region->dstOffsets[1].x;
+
+   unsigned src_y0 = region->srcOffsets[0].y;
+   unsigned src_y1 = region->srcOffsets[1].y;
+   unsigned dst_y0 = region->dstOffsets[0].y;
+   unsigned dst_y1 = region->dstOffsets[1].y;
+
+   VkRect2D dst_box;
+   dst_box.offset.x = MIN2(dst_x0, dst_x1);
+   dst_box.offset.y = MIN2(dst_y0, dst_y1);
+   dst_box.extent.width = dst_x1 - dst_x0;
+   dst_box.extent.height = dst_y1 - dst_y0;
+
+   const unsigned num_layers = dst_end - dst_start;
+   for (unsigned i = 0; i < num_layers; i++) {
+      struct radv_image_view dst_iview, src_iview;
+
+      const VkOffset2D dst_offset_0 = {
+         .x = dst_x0,
+         .y = dst_y0,
+      };
+      const VkOffset2D dst_offset_1 = {
+         .x = dst_x1,
+         .y = dst_y1,
+      };
+
+      float src_offset_0[3] = {
+         src_x0,
+         src_y0,
+         src_start + i * src_z_step + depth_center_offset,
+      };
+      float src_offset_1[3] = {
+         src_x1,
+         src_y1,
+         src_start + i * src_z_step + depth_center_offset,
+      };
+      const uint32_t dst_array_slice = dst_start + i;
+
+      /* 3D images have just 1 layer */
+      const uint32_t src_array_slice = src_image->type == VK_IMAGE_TYPE_3D ? 0 : src_start + i;
+
+      radv_image_view_init(&dst_iview, cmd_buffer->device,
+                           &(VkImageViewCreateInfo){
+                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                              .image = radv_image_to_handle(dst_image),
+                              .viewType = radv_meta_get_view_type(dst_image),
+                              .format = dst_image->vk_format,
+                              .subresourceRange = {.aspectMask = dst_res->aspectMask,
+                                                   .baseMipLevel = dst_res->mipLevel,
+                                                   .levelCount = 1,
+                                                   .baseArrayLayer = dst_array_slice,
+                                                   .layerCount = 1},
+                           },
+                           NULL);
+      radv_image_view_init(&src_iview, cmd_buffer->device,
+                           &(VkImageViewCreateInfo){
+                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                              .image = radv_image_to_handle(src_image),
+                              .viewType = radv_meta_get_view_type(src_image),
+                              .format = src_image->vk_format,
+                              .subresourceRange = {.aspectMask = src_res->aspectMask,
+                                                   .baseMipLevel = src_res->mipLevel,
+                                                   .levelCount = 1,
+                                                   .baseArrayLayer = src_array_slice,
+                                                   .layerCount = 1},
+                           },
+                           NULL);
+      meta_emit_blit(cmd_buffer, src_image, &src_iview, src_image_layout, src_offset_0,
+                     src_offset_1, dst_image, &dst_iview, dst_image_layout, dst_offset_0,
+                     dst_offset_1, dst_box, sampler);
+   }
+
+   /* Restore conditional rendering. */
+   cmd_buffer->state.predicating = old_predicating;
+
+   radv_meta_restore(&saved_state, cmd_buffer);
+
+   radv_DestroySampler(radv_device_to_handle(device), sampler, &cmd_buffer->pool->alloc);
 }
 
-void radv_CmdBlitImage2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkBlitImageInfo2KHR*                  pBlitImageInfo)
+void
+radv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, const VkBlitImageInfo2KHR *pBlitImageInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_image, src_image, pBlitImageInfo->srcImage);
-	RADV_FROM_HANDLE(radv_image, dst_image, pBlitImageInfo->dstImage);
-
-	for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
-		blit_image(cmd_buffer,
-			   src_image, pBlitImageInfo->srcImageLayout,
-			   dst_image, pBlitImageInfo->dstImageLayout,
-			   &pBlitImageInfo->pRegions[r],
-			   pBlitImageInfo->filter);
-	}
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_image, src_image, pBlitImageInfo->srcImage);
+   RADV_FROM_HANDLE(radv_image, dst_image, pBlitImageInfo->dstImage);
+
+   for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
+      blit_image(cmd_buffer, src_image, pBlitImageInfo->srcImageLayout, dst_image,
+                 pBlitImageInfo->dstImageLayout, &pBlitImageInfo->pRegions[r],
+                 pBlitImageInfo->filter);
+   }
 }
 
 void
 radv_device_finish_meta_blit_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
-		for (unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
-			radv_DestroyRenderPass(radv_device_to_handle(device),
-			                       state->blit.render_pass[i][j],
-			                       &state->alloc);
-		}
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->blit.pipeline_1d_src[i],
-				     &state->alloc);
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->blit.pipeline_2d_src[i],
-				     &state->alloc);
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->blit.pipeline_3d_src[i],
-				     &state->alloc);
-	}
-
-	for (enum radv_blit_ds_layout i = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; i < RADV_BLIT_DS_LAYOUT_COUNT; i++) {
-		radv_DestroyRenderPass(radv_device_to_handle(device),
-				       state->blit.depth_only_rp[i], &state->alloc);
-		radv_DestroyRenderPass(radv_device_to_handle(device),
-				       state->blit.stencil_only_rp[i], &state->alloc);
-	}
-
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->blit.depth_only_1d_pipeline, &state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->blit.depth_only_2d_pipeline, &state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->blit.depth_only_3d_pipeline, &state->alloc);
-
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->blit.stencil_only_1d_pipeline,
-			     &state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->blit.stencil_only_2d_pipeline,
-			     &state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->blit.stencil_only_3d_pipeline,
-			     &state->alloc);
-
-
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->blit.pipeline_layout, &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-					state->blit.ds_layout, &state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
+      for (unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
+         radv_DestroyRenderPass(radv_device_to_handle(device), state->blit.render_pass[i][j],
+                                &state->alloc);
+      }
+      radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_1d_src[i],
+                           &state->alloc);
+      radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_2d_src[i],
+                           &state->alloc);
+      radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_3d_src[i],
+                           &state->alloc);
+   }
+
+   for (enum radv_blit_ds_layout i = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; i < RADV_BLIT_DS_LAYOUT_COUNT;
+        i++) {
+      radv_DestroyRenderPass(radv_device_to_handle(device), state->blit.depth_only_rp[i],
+                             &state->alloc);
+      radv_DestroyRenderPass(radv_device_to_handle(device), state->blit.stencil_only_rp[i],
+                             &state->alloc);
+   }
+
+   radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_1d_pipeline,
+                        &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_2d_pipeline,
+                        &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_3d_pipeline,
+                        &state->alloc);
+
+   radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_1d_pipeline,
+                        &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_2d_pipeline,
+                        &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_3d_pipeline,
+                        &state->alloc);
+
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->blit.pipeline_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->blit.ds_layout,
+                                   &state->alloc);
 }
 
 static VkResult
-build_pipeline(struct radv_device *device,
-               VkImageAspectFlagBits aspect,
-               enum glsl_sampler_dim tex_dim,
-               unsigned fs_key,
-               VkPipeline *pipeline)
+build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect,
+               enum glsl_sampler_dim tex_dim, unsigned fs_key, VkPipeline *pipeline)
 {
-	VkResult result = VK_SUCCESS;
-
-	mtx_lock(&device->meta_state.mtx);
-
-	if (*pipeline) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-        nir_shader *fs;
-        nir_shader *vs = build_nir_vertex_shader();
-	VkRenderPass rp;
-
-	switch(aspect) {
-	case VK_IMAGE_ASPECT_COLOR_BIT:
-		fs = build_nir_copy_fragment_shader(tex_dim);
-		rp = device->meta_state.blit.render_pass[fs_key][0];
-		break;
-	case VK_IMAGE_ASPECT_DEPTH_BIT:
-		fs = build_nir_copy_fragment_shader_depth(tex_dim);
-		rp = device->meta_state.blit.depth_only_rp[0];
-		break;
-	case VK_IMAGE_ASPECT_STENCIL_BIT:
-		fs = build_nir_copy_fragment_shader_stencil(tex_dim);
-		rp = device->meta_state.blit.stencil_only_rp[0];
-		break;
-	default:
-		unreachable("Unhandled aspect");
-	}
-	VkPipelineVertexInputStateCreateInfo vi_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 0,
-		.vertexAttributeDescriptionCount = 0,
-	};
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-		{
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_VERTEX_BIT,
-			.module = vk_shader_module_handle_from_nir(vs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		}, {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-			.module = vk_shader_module_handle_from_nir(fs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		},
-	};
-
-	VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
-		.pStages = pipeline_shader_stages,
-		.pVertexInputState = &vi_create_info,
-		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-			.primitiveRestartEnable = false,
-		},
-		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-			.viewportCount = 1,
-			.scissorCount = 1,
-		},
-		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-			.rasterizerDiscardEnable = false,
-			.polygonMode = VK_POLYGON_MODE_FILL,
-			.cullMode = VK_CULL_MODE_NONE,
-			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-		},
-		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.rasterizationSamples = 1,
-			.sampleShadingEnable = false,
-			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-		},
-		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-			.dynamicStateCount = 4,
-			.pDynamicStates = (VkDynamicState[]) {
-				VK_DYNAMIC_STATE_VIEWPORT,
-				VK_DYNAMIC_STATE_SCISSOR,
-				VK_DYNAMIC_STATE_LINE_WIDTH,
-				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-			},
-		},
-		.flags = 0,
-		.layout = device->meta_state.blit.pipeline_layout,
-		.renderPass = rp,
-		.subpass = 0,
-	};
-
-	VkPipelineColorBlendStateCreateInfo color_blend_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-		.attachmentCount = 1,
-		.pAttachments = (VkPipelineColorBlendAttachmentState []) {
-			{
-				.colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
-						  VK_COLOR_COMPONENT_R_BIT |
-						  VK_COLOR_COMPONENT_G_BIT |
-						  VK_COLOR_COMPONENT_B_BIT },
-			}
-		};
-
-	VkPipelineDepthStencilStateCreateInfo depth_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-		.depthTestEnable = true,
-		.depthWriteEnable = true,
-		.depthCompareOp = VK_COMPARE_OP_ALWAYS,
-	};
-
-	VkPipelineDepthStencilStateCreateInfo stencil_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-		.depthTestEnable = false,
-		.depthWriteEnable = false,
-		.stencilTestEnable = true,
-		.front = {
-			.failOp = VK_STENCIL_OP_REPLACE,
-			.passOp = VK_STENCIL_OP_REPLACE,
-			.depthFailOp = VK_STENCIL_OP_REPLACE,
-			.compareOp = VK_COMPARE_OP_ALWAYS,
-			.compareMask = 0xff,
-			.writeMask = 0xff,
-			.reference = 0
-		},
-		.back = {
-			.failOp = VK_STENCIL_OP_REPLACE,
-			.passOp = VK_STENCIL_OP_REPLACE,
-			.depthFailOp = VK_STENCIL_OP_REPLACE,
-			.compareOp = VK_COMPARE_OP_ALWAYS,
-			.compareMask = 0xff,
-			.writeMask = 0xff,
-			.reference = 0
-		},
-		.depthCompareOp = VK_COMPARE_OP_ALWAYS,
-	};
-
-	switch(aspect) {
-	case VK_IMAGE_ASPECT_COLOR_BIT:
-		vk_pipeline_info.pColorBlendState = &color_blend_info;
-		break;
-	case VK_IMAGE_ASPECT_DEPTH_BIT:
-		vk_pipeline_info.pDepthStencilState = &depth_info;
-		break;
-	case VK_IMAGE_ASPECT_STENCIL_BIT:
-		vk_pipeline_info.pDepthStencilState = &stencil_info;
-		break;
-	default:
-		unreachable("Unhandled aspect");
-	}
-
-	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-		.use_rectlist = true
-	};
-
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-	                                       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-	                                       &vk_pipeline_info, &radv_pipeline_info,
-	                                       &device->meta_state.alloc, pipeline);
-	ralloc_free(vs);
-	ralloc_free(fs);
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   VkResult result = VK_SUCCESS;
+
+   mtx_lock(&device->meta_state.mtx);
+
+   if (*pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   nir_shader *fs;
+   nir_shader *vs = build_nir_vertex_shader();
+   VkRenderPass rp;
+
+   switch (aspect) {
+   case VK_IMAGE_ASPECT_COLOR_BIT:
+      fs = build_nir_copy_fragment_shader(tex_dim);
+      rp = device->meta_state.blit.render_pass[fs_key][0];
+      break;
+   case VK_IMAGE_ASPECT_DEPTH_BIT:
+      fs = build_nir_copy_fragment_shader_depth(tex_dim);
+      rp = device->meta_state.blit.depth_only_rp[0];
+      break;
+   case VK_IMAGE_ASPECT_STENCIL_BIT:
+      fs = build_nir_copy_fragment_shader_stencil(tex_dim);
+      rp = device->meta_state.blit.stencil_only_rp[0];
+      break;
+   default:
+      unreachable("Unhandled aspect");
+   }
+   VkPipelineVertexInputStateCreateInfo vi_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+      .vertexBindingDescriptionCount = 0,
+      .vertexAttributeDescriptionCount = 0,
+   };
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_VERTEX_BIT,
+       .module = vk_shader_module_handle_from_nir(vs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+       .module = vk_shader_module_handle_from_nir(fs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+   };
+
+   VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+      .pStages = pipeline_shader_stages,
+      .pVertexInputState = &vi_create_info,
+      .pInputAssemblyState =
+         &(VkPipelineInputAssemblyStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+            .primitiveRestartEnable = false,
+         },
+      .pViewportState =
+         &(VkPipelineViewportStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+            .viewportCount = 1,
+            .scissorCount = 1,
+         },
+      .pRasterizationState =
+         &(VkPipelineRasterizationStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+            .rasterizerDiscardEnable = false,
+            .polygonMode = VK_POLYGON_MODE_FILL,
+            .cullMode = VK_CULL_MODE_NONE,
+            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+      .pMultisampleState =
+         &(VkPipelineMultisampleStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+            .rasterizationSamples = 1,
+            .sampleShadingEnable = false,
+            .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+         },
+      .pDynamicState =
+         &(VkPipelineDynamicStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+            .dynamicStateCount = 4,
+            .pDynamicStates =
+               (VkDynamicState[]){
+                  VK_DYNAMIC_STATE_VIEWPORT,
+                  VK_DYNAMIC_STATE_SCISSOR,
+                  VK_DYNAMIC_STATE_LINE_WIDTH,
+                  VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+               },
+         },
+      .flags = 0,
+      .layout = device->meta_state.blit.pipeline_layout,
+      .renderPass = rp,
+      .subpass = 0,
+   };
+
+   VkPipelineColorBlendStateCreateInfo color_blend_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+      .attachmentCount = 1,
+      .pAttachments = (VkPipelineColorBlendAttachmentState[]){
+         {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
+                            VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+      }};
+
+   VkPipelineDepthStencilStateCreateInfo depth_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+      .depthTestEnable = true,
+      .depthWriteEnable = true,
+      .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+   };
+
+   VkPipelineDepthStencilStateCreateInfo stencil_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+      .depthTestEnable = false,
+      .depthWriteEnable = false,
+      .stencilTestEnable = true,
+      .front = {.failOp = VK_STENCIL_OP_REPLACE,
+                .passOp = VK_STENCIL_OP_REPLACE,
+                .depthFailOp = VK_STENCIL_OP_REPLACE,
+                .compareOp = VK_COMPARE_OP_ALWAYS,
+                .compareMask = 0xff,
+                .writeMask = 0xff,
+                .reference = 0},
+      .back = {.failOp = VK_STENCIL_OP_REPLACE,
+               .passOp = VK_STENCIL_OP_REPLACE,
+               .depthFailOp = VK_STENCIL_OP_REPLACE,
+               .compareOp = VK_COMPARE_OP_ALWAYS,
+               .compareMask = 0xff,
+               .writeMask = 0xff,
+               .reference = 0},
+      .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+   };
+
+   switch (aspect) {
+   case VK_IMAGE_ASPECT_COLOR_BIT:
+      vk_pipeline_info.pColorBlendState = &color_blend_info;
+      break;
+   case VK_IMAGE_ASPECT_DEPTH_BIT:
+      vk_pipeline_info.pDepthStencilState = &depth_info;
+      break;
+   case VK_IMAGE_ASPECT_STENCIL_BIT:
+      vk_pipeline_info.pDepthStencilState = &stencil_info;
+      break;
+   default:
+      unreachable("Unhandled aspect");
+   }
+
+   const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+   result = radv_graphics_pipeline_create(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc, pipeline);
+   ralloc_free(vs);
+   ralloc_free(fs);
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 static VkResult
 radv_device_init_meta_blit_color(struct radv_device *device, bool on_demand)
 {
-	VkResult result;
-
-	for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
-		unsigned key = radv_format_meta_fs_key(device, radv_fs_key_format_exemplars[i]);
-		for(unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
-			VkImageLayout layout = radv_meta_dst_layout_to_layout(j);
-			result = radv_CreateRenderPass2(radv_device_to_handle(device),
-						&(VkRenderPassCreateInfo2) {
-							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-								.attachmentCount = 1,
-								.pAttachments = &(VkAttachmentDescription2) {
-								.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-								.format = radv_fs_key_format_exemplars[i],
-								.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-								.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-								.initialLayout = layout,
-								.finalLayout = layout,
-							},
-								.subpassCount = 1,
-										.pSubpasses = &(VkSubpassDescription2) {
-								.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-								.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-								.inputAttachmentCount = 0,
-								.colorAttachmentCount = 1,
-								.pColorAttachments = &(VkAttachmentReference2) {
-									.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-									.attachment = 0,
-									.layout = layout,
-								},
-								.pResolveAttachments = NULL,
-								.pDepthStencilAttachment = &(VkAttachmentReference2) {
-									.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-									.attachment = VK_ATTACHMENT_UNUSED,
-									.layout = VK_IMAGE_LAYOUT_GENERAL,
-								},
-								.preserveAttachmentCount = 0,
-								.pPreserveAttachments = NULL,
-							},
-							.dependencyCount = 2,
-							.pDependencies = (VkSubpassDependency2[]) {
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = VK_SUBPASS_EXTERNAL,
-									.dstSubpass = 0,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								},
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = 0,
-									.dstSubpass = VK_SUBPASS_EXTERNAL,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								}
-							},
-						}, &device->meta_state.alloc, &device->meta_state.blit.render_pass[key][j]);
-			if (result != VK_SUCCESS)
-				goto fail;
-		}
-
-		if (on_demand)
-			continue;
-
-		result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_1D, key, &device->meta_state.blit.pipeline_1d_src[key]);
-		if (result != VK_SUCCESS)
-			goto fail;
-
-		result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_2D, key, &device->meta_state.blit.pipeline_2d_src[key]);
-		if (result != VK_SUCCESS)
-			goto fail;
-
-		result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_3D, key, &device->meta_state.blit.pipeline_3d_src[key]);
-		if (result != VK_SUCCESS)
-			goto fail;
-
-	}
-
-	result = VK_SUCCESS;
+   VkResult result;
+
+   for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
+      unsigned key = radv_format_meta_fs_key(device, radv_fs_key_format_exemplars[i]);
+      for (unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
+         VkImageLayout layout = radv_meta_dst_layout_to_layout(j);
+         result = radv_CreateRenderPass2(
+            radv_device_to_handle(device),
+            &(VkRenderPassCreateInfo2){
+               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+               .attachmentCount = 1,
+               .pAttachments =
+                  &(VkAttachmentDescription2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                     .format = radv_fs_key_format_exemplars[i],
+                     .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                     .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                     .initialLayout = layout,
+                     .finalLayout = layout,
+                  },
+               .subpassCount = 1,
+               .pSubpasses =
+                  &(VkSubpassDescription2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                     .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                     .inputAttachmentCount = 0,
+                     .colorAttachmentCount = 1,
+                     .pColorAttachments =
+                        &(VkAttachmentReference2){
+                           .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                           .attachment = 0,
+                           .layout = layout,
+                        },
+                     .pResolveAttachments = NULL,
+                     .pDepthStencilAttachment =
+                        &(VkAttachmentReference2){
+                           .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                           .attachment = VK_ATTACHMENT_UNUSED,
+                           .layout = VK_IMAGE_LAYOUT_GENERAL,
+                        },
+                     .preserveAttachmentCount = 0,
+                     .pPreserveAttachments = NULL,
+                  },
+               .dependencyCount = 2,
+               .pDependencies =
+                  (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                            .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                            .dstSubpass = 0,
+                                            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                            .srcAccessMask = 0,
+                                            .dstAccessMask = 0,
+                                            .dependencyFlags = 0},
+                                           {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                            .srcSubpass = 0,
+                                            .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                            .srcAccessMask = 0,
+                                            .dstAccessMask = 0,
+                                            .dependencyFlags = 0}},
+            },
+            &device->meta_state.alloc, &device->meta_state.blit.render_pass[key][j]);
+         if (result != VK_SUCCESS)
+            goto fail;
+      }
+
+      if (on_demand)
+         continue;
+
+      result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_1D, key,
+                              &device->meta_state.blit.pipeline_1d_src[key]);
+      if (result != VK_SUCCESS)
+         goto fail;
+
+      result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_2D, key,
+                              &device->meta_state.blit.pipeline_2d_src[key]);
+      if (result != VK_SUCCESS)
+         goto fail;
+
+      result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_3D, key,
+                              &device->meta_state.blit.pipeline_3d_src[key]);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   result = VK_SUCCESS;
 fail:
-	return result;
+   return result;
 }
 
 static VkResult
 radv_device_init_meta_blit_depth(struct radv_device *device, bool on_demand)
 {
-	VkResult result;
-
-	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
-		VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
-		result = radv_CreateRenderPass2(radv_device_to_handle(device),
-						&(VkRenderPassCreateInfo2) {
-						       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-						       .attachmentCount = 1,
-						       .pAttachments = &(VkAttachmentDescription2) {
-							       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-							       .format = VK_FORMAT_D32_SFLOAT,
-							       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-							       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-							       .initialLayout = layout,
-							       .finalLayout = layout,
-						       },
-						       .subpassCount = 1,
-						       .pSubpasses = &(VkSubpassDescription2) {
-							       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-							       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-							       .inputAttachmentCount = 0,
-							       .colorAttachmentCount = 0,
-							       .pColorAttachments = NULL,
-							       .pResolveAttachments = NULL,
-							       .pDepthStencilAttachment = &(VkAttachmentReference2) {
-								       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-								       .attachment = 0,
-								       .layout = layout,
-								},
-							       .preserveAttachmentCount = 0,
-							       .pPreserveAttachments = NULL,
-							},
-							.dependencyCount = 2,
-							.pDependencies = (VkSubpassDependency2[]) {
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = VK_SUBPASS_EXTERNAL,
-									.dstSubpass = 0,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								},
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = 0,
-									.dstSubpass = VK_SUBPASS_EXTERNAL,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								}
-							},
-						}, &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp[ds_layout]);
-		if (result != VK_SUCCESS)
-			goto fail;
-	}
-
-	if (on_demand)
-		return VK_SUCCESS;
-
-	result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.depth_only_1d_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_2D, 0, &device->meta_state.blit.depth_only_2d_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_3D, 0, &device->meta_state.blit.depth_only_3d_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
+   VkResult result;
+
+   for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+        ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+      VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+      result = radv_CreateRenderPass2(
+         radv_device_to_handle(device),
+         &(VkRenderPassCreateInfo2){
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+            .attachmentCount = 1,
+            .pAttachments =
+               &(VkAttachmentDescription2){
+                  .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                  .format = VK_FORMAT_D32_SFLOAT,
+                  .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                  .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                  .initialLayout = layout,
+                  .finalLayout = layout,
+               },
+            .subpassCount = 1,
+            .pSubpasses =
+               &(VkSubpassDescription2){
+                  .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                  .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                  .inputAttachmentCount = 0,
+                  .colorAttachmentCount = 0,
+                  .pColorAttachments = NULL,
+                  .pResolveAttachments = NULL,
+                  .pDepthStencilAttachment =
+                     &(VkAttachmentReference2){
+                        .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                        .attachment = 0,
+                        .layout = layout,
+                     },
+                  .preserveAttachmentCount = 0,
+                  .pPreserveAttachments = NULL,
+               },
+            .dependencyCount = 2,
+            .pDependencies =
+               (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                         .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                         .dstSubpass = 0,
+                                         .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                         .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                         .srcAccessMask = 0,
+                                         .dstAccessMask = 0,
+                                         .dependencyFlags = 0},
+                                        {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                         .srcSubpass = 0,
+                                         .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                         .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                         .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                         .srcAccessMask = 0,
+                                         .dstAccessMask = 0,
+                                         .dependencyFlags = 0}},
+         },
+         &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp[ds_layout]);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   if (on_demand)
+      return VK_SUCCESS;
+
+   result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, 0,
+                           &device->meta_state.blit.depth_only_1d_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_2D, 0,
+                           &device->meta_state.blit.depth_only_2d_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_3D, 0,
+                           &device->meta_state.blit.depth_only_3d_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
 
 fail:
-	return result;
+   return result;
 }
 
 static VkResult
 radv_device_init_meta_blit_stencil(struct radv_device *device, bool on_demand)
 {
-	VkResult result;
-
-	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
-		VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
-		result = radv_CreateRenderPass2(radv_device_to_handle(device),
-						&(VkRenderPassCreateInfo2) {
-						       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-						       .attachmentCount = 1,
-						       .pAttachments = &(VkAttachmentDescription2) {
-							       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-							       .format = VK_FORMAT_S8_UINT,
-							       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-							       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-							       .initialLayout = layout,
-							       .finalLayout = layout,
-						       },
-						       .subpassCount = 1,
-						       .pSubpasses = &(VkSubpassDescription2) {
-							       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-							       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-							       .inputAttachmentCount = 0,
-							       .colorAttachmentCount = 0,
-							       .pColorAttachments = NULL,
-							       .pResolveAttachments = NULL,
-							       .pDepthStencilAttachment = &(VkAttachmentReference2) {
-								       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-								       .attachment = 0,
-								       .layout = layout,
-							       },
-							       .preserveAttachmentCount = 0,
-							       .pPreserveAttachments = NULL,
-						       },
-						       .dependencyCount = 2,
-						       .pDependencies = (VkSubpassDependency2[]) {
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = VK_SUBPASS_EXTERNAL,
-									.dstSubpass = 0,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								},
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = 0,
-									.dstSubpass = VK_SUBPASS_EXTERNAL,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								}
-							},
-
-					 }, &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp[ds_layout]);
-	}
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	if (on_demand)
-		return VK_SUCCESS;
-
-	result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.stencil_only_1d_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_2D, 0, &device->meta_state.blit.stencil_only_2d_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_3D, 0, &device->meta_state.blit.stencil_only_3d_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
+   VkResult result;
+
+   for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+        ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+      VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+      result = radv_CreateRenderPass2(
+         radv_device_to_handle(device),
+         &(VkRenderPassCreateInfo2){
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+            .attachmentCount = 1,
+            .pAttachments =
+               &(VkAttachmentDescription2){
+                  .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                  .format = VK_FORMAT_S8_UINT,
+                  .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                  .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                  .initialLayout = layout,
+                  .finalLayout = layout,
+               },
+            .subpassCount = 1,
+            .pSubpasses =
+               &(VkSubpassDescription2){
+                  .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                  .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                  .inputAttachmentCount = 0,
+                  .colorAttachmentCount = 0,
+                  .pColorAttachments = NULL,
+                  .pResolveAttachments = NULL,
+                  .pDepthStencilAttachment =
+                     &(VkAttachmentReference2){
+                        .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                        .attachment = 0,
+                        .layout = layout,
+                     },
+                  .preserveAttachmentCount = 0,
+                  .pPreserveAttachments = NULL,
+               },
+            .dependencyCount = 2,
+            .pDependencies =
+               (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                         .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                         .dstSubpass = 0,
+                                         .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                         .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                         .srcAccessMask = 0,
+                                         .dstAccessMask = 0,
+                                         .dependencyFlags = 0},
+                                        {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                         .srcSubpass = 0,
+                                         .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                         .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                         .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                         .srcAccessMask = 0,
+                                         .dstAccessMask = 0,
+                                         .dependencyFlags = 0}},
+
+         },
+         &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp[ds_layout]);
+   }
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   if (on_demand)
+      return VK_SUCCESS;
+
+   result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, 0,
+                           &device->meta_state.blit.stencil_only_1d_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_2D, 0,
+                           &device->meta_state.blit.stencil_only_2d_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_3D, 0,
+                           &device->meta_state.blit.stencil_only_3d_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
 
 fail:
-	return result;
+   return result;
 }
 
 VkResult
 radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand)
 {
-	VkResult result;
-
-	VkDescriptorSetLayoutCreateInfo ds_layout_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 1,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_layout_info,
-						&device->meta_state.alloc,
-						&device->meta_state.blit.ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	const VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_VERTEX_BIT, 0, 20};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					   &(VkPipelineLayoutCreateInfo) {
-						   .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-							   .setLayoutCount = 1,
-							   .pSetLayouts = &device->meta_state.blit.ds_layout,
-							   .pushConstantRangeCount = 1,
-							   .pPushConstantRanges = &push_constant_range,
-							   },
-					   &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	result = radv_device_init_meta_blit_color(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	result = radv_device_init_meta_blit_depth(device, on_demand);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	result = radv_device_init_meta_blit_stencil(device, on_demand);
+   VkResult result;
+
+   VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 1,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+   result =
+      radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
+                                     &device->meta_state.alloc, &device->meta_state.blit.ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   const VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_VERTEX_BIT, 0, 20};
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+                                      &(VkPipelineLayoutCreateInfo){
+                                         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+                                         .setLayoutCount = 1,
+                                         .pSetLayouts = &device->meta_state.blit.ds_layout,
+                                         .pushConstantRangeCount = 1,
+                                         .pPushConstantRanges = &push_constant_range,
+                                      },
+                                      &device->meta_state.alloc,
+                                      &device->meta_state.blit.pipeline_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = radv_device_init_meta_blit_color(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = radv_device_init_meta_blit_depth(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = radv_device_init_meta_blit_stencil(device, on_demand);
 
 fail:
-	if (result != VK_SUCCESS)
-		radv_device_finish_meta_blit_state(device);
-	return result;
+   if (result != VK_SUCCESS)
+      radv_device_finish_meta_blit_state(device);
+   return result;
 }
diff --git a/src/amd/vulkan/radv_meta_blit2d.c b/src/amd/vulkan/radv_meta_blit2d.c
index d2bd95bf757..b6ac95be413 100644
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -24,1360 +24,1323 @@
  * IN THE SOFTWARE.
  */
 
-#include "radv_meta.h"
 #include "nir/nir_builder.h"
+#include "radv_meta.h"
 #include "vk_format.h"
 
 enum blit2d_src_type {
-	BLIT2D_SRC_TYPE_IMAGE,
-	BLIT2D_SRC_TYPE_IMAGE_3D,
-	BLIT2D_SRC_TYPE_BUFFER,
-	BLIT2D_NUM_SRC_TYPES,
+   BLIT2D_SRC_TYPE_IMAGE,
+   BLIT2D_SRC_TYPE_IMAGE_3D,
+   BLIT2D_SRC_TYPE_BUFFER,
+   BLIT2D_NUM_SRC_TYPES,
 };
 
-static VkResult
-blit2d_init_color_pipeline(struct radv_device *device,
-			   enum blit2d_src_type src_type,
-			   VkFormat format,
-			   uint32_t log2_samples);
+static VkResult blit2d_init_color_pipeline(struct radv_device *device,
+                                           enum blit2d_src_type src_type, VkFormat format,
+                                           uint32_t log2_samples);
 
-static VkResult
-blit2d_init_depth_only_pipeline(struct radv_device *device,
-				enum blit2d_src_type src_type,
-				uint32_t log2_samples);
+static VkResult blit2d_init_depth_only_pipeline(struct radv_device *device,
+                                                enum blit2d_src_type src_type,
+                                                uint32_t log2_samples);
 
-static VkResult
-blit2d_init_stencil_only_pipeline(struct radv_device *device,
-				  enum blit2d_src_type src_type,
-				  uint32_t log2_samples);
+static VkResult blit2d_init_stencil_only_pipeline(struct radv_device *device,
+                                                  enum blit2d_src_type src_type,
+                                                  uint32_t log2_samples);
 
 static void
-create_iview(struct radv_cmd_buffer *cmd_buffer,
-             struct radv_meta_blit2d_surf *surf,
-             struct radv_image_view *iview, VkFormat depth_format,
-              VkImageAspectFlagBits aspects)
+create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
+             struct radv_image_view *iview, VkFormat depth_format, VkImageAspectFlagBits aspects)
 {
-	VkFormat format;
-	VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
-		radv_meta_get_view_type(surf->image);
-
-	if (depth_format)
-		format = depth_format;
-	else
-		format = surf->format;
-
-	radv_image_view_init(iview, cmd_buffer->device,
-			     &(VkImageViewCreateInfo) {
-				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-					     .image = radv_image_to_handle(surf->image),
-					     .viewType = view_type,
-					     .format = format,
-					     .subresourceRange = {
-					     .aspectMask = aspects,
-					     .baseMipLevel = surf->level,
-					     .levelCount = 1,
-					     .baseArrayLayer = surf->layer,
-					     .layerCount = 1
-				     },
-			     }, NULL);
+   VkFormat format;
+   VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9
+                                  ? VK_IMAGE_VIEW_TYPE_2D
+                                  : radv_meta_get_view_type(surf->image);
+
+   if (depth_format)
+      format = depth_format;
+   else
+      format = surf->format;
+
+   radv_image_view_init(iview, cmd_buffer->device,
+                        &(VkImageViewCreateInfo){
+                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                           .image = radv_image_to_handle(surf->image),
+                           .viewType = view_type,
+                           .format = format,
+                           .subresourceRange = {.aspectMask = aspects,
+                                                .baseMipLevel = surf->level,
+                                                .levelCount = 1,
+                                                .baseArrayLayer = surf->layer,
+                                                .layerCount = 1},
+                        },
+                        NULL);
 }
 
 static void
-create_bview(struct radv_cmd_buffer *cmd_buffer,
-	     struct radv_meta_blit2d_buffer *src,
-	     struct radv_buffer_view *bview, VkFormat depth_format)
+create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
+             struct radv_buffer_view *bview, VkFormat depth_format)
 {
-	VkFormat format;
-
-	if (depth_format)
-		format = depth_format;
-	else
-		format = src->format;
-	radv_buffer_view_init(bview, cmd_buffer->device,
-			      &(VkBufferViewCreateInfo) {
-				      .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
-				      .flags = 0,
-				      .buffer = radv_buffer_to_handle(src->buffer),
-				      .format = format,
-				      .offset = src->offset,
-				      .range = VK_WHOLE_SIZE,
-			      });
-
+   VkFormat format;
+
+   if (depth_format)
+      format = depth_format;
+   else
+      format = src->format;
+   radv_buffer_view_init(bview, cmd_buffer->device,
+                         &(VkBufferViewCreateInfo){
+                            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+                            .flags = 0,
+                            .buffer = radv_buffer_to_handle(src->buffer),
+                            .format = format,
+                            .offset = src->offset,
+                            .range = VK_WHOLE_SIZE,
+                         });
 }
 
 struct blit2d_src_temps {
-	struct radv_image_view iview;
-	struct radv_buffer_view bview;
+   struct radv_image_view iview;
+   struct radv_buffer_view bview;
 };
 
 static void
-blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
-                struct radv_meta_blit2d_surf *src_img,
-                struct radv_meta_blit2d_buffer *src_buf,
-                struct blit2d_src_temps *tmp,
-                enum blit2d_src_type src_type, VkFormat depth_format,
-                VkImageAspectFlagBits aspects,
+blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
+                struct radv_meta_blit2d_buffer *src_buf, struct blit2d_src_temps *tmp,
+                enum blit2d_src_type src_type, VkFormat depth_format, VkImageAspectFlagBits aspects,
                 uint32_t log2_samples)
 {
-	struct radv_device *device = cmd_buffer->device;
-
-	if (src_type == BLIT2D_SRC_TYPE_BUFFER) {
-		create_bview(cmd_buffer, src_buf, &tmp->bview, depth_format);
-
-		radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
-					      device->meta_state.blit2d[log2_samples].p_layouts[src_type],
-					      0, /* set */
-					      1, /* descriptorWriteCount */
-					      (VkWriteDescriptorSet[]) {
-					              {
-					                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-					                      .dstBinding = 0,
-					                      .dstArrayElement = 0,
-					                      .descriptorCount = 1,
-					                      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
-					                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(&tmp->bview) }
-					              }
-					      });
-
-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.blit2d[log2_samples].p_layouts[src_type],
-				      VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
-				      &src_buf->pitch);
-	} else {
-		create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);
-
-		if (src_type == BLIT2D_SRC_TYPE_IMAGE_3D)
-			radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-					      device->meta_state.blit2d[log2_samples].p_layouts[src_type],
-					      VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
-					      &src_img->layer);
-
-		radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
-					      device->meta_state.blit2d[log2_samples].p_layouts[src_type],
-					      0, /* set */
-					      1, /* descriptorWriteCount */
-					      (VkWriteDescriptorSet[]) {
-					              {
-					                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-					                      .dstBinding = 0,
-					                      .dstArrayElement = 0,
-					                      .descriptorCount = 1,
-					                      .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-					                      .pImageInfo = (VkDescriptorImageInfo[]) {
-					                              {
-					                                      .sampler = VK_NULL_HANDLE,
-					                                      .imageView = radv_image_view_to_handle(&tmp->iview),
-					                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-					                              },
-					                      }
-					              }
-					      });
-	}
+   struct radv_device *device = cmd_buffer->device;
+
+   if (src_type == BLIT2D_SRC_TYPE_BUFFER) {
+      create_bview(cmd_buffer, src_buf, &tmp->bview, depth_format);
+
+      radv_meta_push_descriptor_set(
+         cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+         device->meta_state.blit2d[log2_samples].p_layouts[src_type], 0, /* set */
+         1,                                                              /* descriptorWriteCount */
+         (VkWriteDescriptorSet[]){
+            {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+             .dstBinding = 0,
+             .dstArrayElement = 0,
+             .descriptorCount = 1,
+             .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+             .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(&tmp->bview)}}});
+
+      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                            device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+                            VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4, &src_buf->pitch);
+   } else {
+      create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);
+
+      if (src_type == BLIT2D_SRC_TYPE_IMAGE_3D)
+         radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                               device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+                               VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4, &src_img->layer);
+
+      radv_meta_push_descriptor_set(
+         cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+         device->meta_state.blit2d[log2_samples].p_layouts[src_type], 0, /* set */
+         1,                                                              /* descriptorWriteCount */
+         (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                   .dstBinding = 0,
+                                   .dstArrayElement = 0,
+                                   .descriptorCount = 1,
+                                   .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                   .pImageInfo = (VkDescriptorImageInfo[]){
+                                      {
+                                         .sampler = VK_NULL_HANDLE,
+                                         .imageView = radv_image_view_to_handle(&tmp->iview),
+                                         .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                      },
+                                   }}});
+   }
 }
 
 struct blit2d_dst_temps {
-	VkImage image;
-	struct radv_image_view iview;
-	VkFramebuffer fb;
+   VkImage image;
+   struct radv_image_view iview;
+   VkFramebuffer fb;
 };
 
 static void
-blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer,
-                struct radv_meta_blit2d_surf *dst,
-                uint32_t width,
-                uint32_t height,
-		VkFormat depth_format,
-                struct blit2d_dst_temps *tmp,
-                VkImageAspectFlagBits aspects)
+blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
+                uint32_t width, uint32_t height, VkFormat depth_format,
+                struct blit2d_dst_temps *tmp, VkImageAspectFlagBits aspects)
 {
-	create_iview(cmd_buffer, dst, &tmp->iview, depth_format, aspects);
-
-	radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
-			       &(VkFramebufferCreateInfo) {
-				       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
-					       .attachmentCount = 1,
-					       .pAttachments = (VkImageView[]) {
-					       radv_image_view_to_handle(&tmp->iview),
-				       },
-				       .width = width,
-				       .height = height,
-				       .layers = 1
-				}, &cmd_buffer->pool->alloc, &tmp->fb);
+   create_iview(cmd_buffer, dst, &tmp->iview, depth_format, aspects);
+
+   radv_CreateFramebuffer(
+      radv_device_to_handle(cmd_buffer->device),
+      &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+                                 .attachmentCount = 1,
+                                 .pAttachments =
+                                    (VkImageView[]){
+                                       radv_image_view_to_handle(&tmp->iview),
+                                    },
+                                 .width = width,
+                                 .height = height,
+                                 .layers = 1},
+      &cmd_buffer->pool->alloc, &tmp->fb);
 }
 
 static void
-bind_pipeline(struct radv_cmd_buffer *cmd_buffer,
-              enum blit2d_src_type src_type, unsigned fs_key,
+bind_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, unsigned fs_key,
               uint32_t log2_samples)
 {
-	VkPipeline pipeline =
-		cmd_buffer->device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key];
+   VkPipeline pipeline =
+      cmd_buffer->device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key];
 
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+                        pipeline);
 }
 
 static void
-bind_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
-		    enum blit2d_src_type src_type,
-		    uint32_t log2_samples)
+bind_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type,
+                    uint32_t log2_samples)
 {
-	VkPipeline pipeline =
-		cmd_buffer->device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type];
+   VkPipeline pipeline =
+      cmd_buffer->device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type];
 
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+                        pipeline);
 }
 
 static void
-bind_stencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
-		      enum blit2d_src_type src_type,
-		      uint32_t log2_samples)
+bind_stencil_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type,
+                      uint32_t log2_samples)
 {
-	VkPipeline pipeline =
-		cmd_buffer->device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type];
+   VkPipeline pipeline =
+      cmd_buffer->device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type];
 
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+                        pipeline);
 }
 
 static void
 radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
-			    struct radv_meta_blit2d_surf *src_img,
-			    struct radv_meta_blit2d_buffer *src_buf,
-			    struct radv_meta_blit2d_surf *dst,
-			    unsigned num_rects,
-			    struct radv_meta_blit2d_rect *rects, enum blit2d_src_type src_type,
-			    uint32_t log2_samples)
+                            struct radv_meta_blit2d_surf *src_img,
+                            struct radv_meta_blit2d_buffer *src_buf,
+                            struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+                            struct radv_meta_blit2d_rect *rects, enum blit2d_src_type src_type,
+                            uint32_t log2_samples)
 {
-	struct radv_device *device = cmd_buffer->device;
-
-	for (unsigned r = 0; r < num_rects; ++r) {
-		u_foreach_bit(i, dst->aspect_mask) {
-			unsigned aspect_mask = 1u << i;
-			unsigned src_aspect_mask = aspect_mask;
-			VkFormat depth_format = 0;
-			if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
-				depth_format = vk_format_stencil_only(dst->image->vk_format);
-			else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
-				depth_format = vk_format_depth_only(dst->image->vk_format);
-			else if (src_img)
-				src_aspect_mask = src_img->aspect_mask;
-
-			struct blit2d_src_temps src_temps;
-			blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format, src_aspect_mask, log2_samples);
-
-			struct blit2d_dst_temps dst_temps;
-			blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
-					rects[r].dst_y + rects[r].height, depth_format, &dst_temps, aspect_mask);
-
-			float vertex_push_constants[4] = {
-				rects[r].src_x,
-				rects[r].src_y,
-				rects[r].src_x + rects[r].width,
-				rects[r].src_y + rects[r].height,
-			};
-
-			radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-					device->meta_state.blit2d[log2_samples].p_layouts[src_type],
-					VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
-					vertex_push_constants);
-
-			if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT ||
-			    aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT ||
-			    aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT ||
-			    aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) {
-				unsigned fs_key = radv_format_meta_fs_key(device, dst_temps.iview.vk_format);
-				unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);
-
-				if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) {
-					VkResult ret = blit2d_init_color_pipeline(device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples);
-					if (ret != VK_SUCCESS) {
-						cmd_buffer->record_result = ret;
-						goto fail_pipeline;
-					}
-				}
-
-				radv_cmd_buffer_begin_render_pass(cmd_buffer,
-								  &(VkRenderPassBeginInfo) {
-									.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-									.renderPass = device->meta_state.blit2d_render_passes[fs_key][dst_layout],
-									.framebuffer = dst_temps.fb,
-									.renderArea = {
-										.offset = { rects[r].dst_x, rects[r].dst_y, },
-										.extent = { rects[r].width, rects[r].height },
-									},
-									.clearValueCount = 0,
-									.pClearValues = NULL,
-								  }, &(struct radv_extra_render_pass_begin_info) {
-									.disable_dcc = dst->disable_compression
-								  });
-
-				radv_cmd_buffer_set_subpass(cmd_buffer,
-							    &cmd_buffer->state.pass->subpasses[0]);
-
-				bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples);
-			} else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
-				enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
-
-				if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == VK_NULL_HANDLE) {
-					VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples);
-					if (ret != VK_SUCCESS) {
-						cmd_buffer->record_result = ret;
-						goto fail_pipeline;
-					}
-				}
-
-				radv_cmd_buffer_begin_render_pass(cmd_buffer,
-								  &(VkRenderPassBeginInfo) {
-									.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-									.renderPass = device->meta_state.blit2d_depth_only_rp[ds_layout],
-									.framebuffer = dst_temps.fb,
-									.renderArea = {
-										.offset = { rects[r].dst_x, rects[r].dst_y, },
-										.extent = { rects[r].width, rects[r].height },
-									},
-									.clearValueCount = 0,
-									.pClearValues = NULL,
-								  }, NULL);
-
-				radv_cmd_buffer_set_subpass(cmd_buffer,
-							    &cmd_buffer->state.pass->subpasses[0]);
-
-				bind_depth_pipeline(cmd_buffer, src_type, log2_samples);
-
-			} else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
-				enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
-
-				if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == VK_NULL_HANDLE) {
-					VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples);
-					if (ret != VK_SUCCESS) {
-						cmd_buffer->record_result = ret;
-						goto fail_pipeline;
-					}
-				}
-
-				radv_cmd_buffer_begin_render_pass(cmd_buffer,
-								  &(VkRenderPassBeginInfo) {
-									.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-									.renderPass = device->meta_state.blit2d_stencil_only_rp[ds_layout],
-									.framebuffer = dst_temps.fb,
-									.renderArea = {
-										.offset = { rects[r].dst_x, rects[r].dst_y, },
-										.extent = { rects[r].width, rects[r].height },
-									},
-									.clearValueCount = 0,
-									.pClearValues = NULL,
-								   }, NULL);
-
-				radv_cmd_buffer_set_subpass(cmd_buffer,
-							    &cmd_buffer->state.pass->subpasses[0]);
-
-				bind_stencil_pipeline(cmd_buffer, src_type, log2_samples);
-			} else
-				unreachable("Processing blit2d with multiple aspects.");
-
-			radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-				.x = rects[r].dst_x,
-				.y = rects[r].dst_y,
-				.width = rects[r].width,
-				.height = rects[r].height,
-				.minDepth = 0.0f,
-				.maxDepth = 1.0f
-			});
-
-			radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-				.offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y },
-				.extent = (VkExtent2D) { rects[r].width, rects[r].height },
-			});
-
-
-
-			radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
-			radv_cmd_buffer_end_render_pass(cmd_buffer);
-
-fail_pipeline:
-			/* At the point where we emit the draw call, all data from the
-			* descriptor sets, etc. has been used.  We are free to delete it.
-			*/
-			radv_DestroyFramebuffer(radv_device_to_handle(device),
-						dst_temps.fb,
-						&cmd_buffer->pool->alloc);
-		}
-	}
+   struct radv_device *device = cmd_buffer->device;
+
+   for (unsigned r = 0; r < num_rects; ++r) {
+      u_foreach_bit(i, dst->aspect_mask)
+      {
+         unsigned aspect_mask = 1u << i;
+         unsigned src_aspect_mask = aspect_mask;
+         VkFormat depth_format = 0;
+         if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+            depth_format = vk_format_stencil_only(dst->image->vk_format);
+         else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
+            depth_format = vk_format_depth_only(dst->image->vk_format);
+         else if (src_img)
+            src_aspect_mask = src_img->aspect_mask;
+
+         struct blit2d_src_temps src_temps;
+         blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format,
+                         src_aspect_mask, log2_samples);
+
+         struct blit2d_dst_temps dst_temps;
+         blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
+                         rects[r].dst_y + rects[r].height, depth_format, &dst_temps, aspect_mask);
+
+         float vertex_push_constants[4] = {
+            rects[r].src_x,
+            rects[r].src_y,
+            rects[r].src_x + rects[r].width,
+            rects[r].src_y + rects[r].height,
+         };
+
+         radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                               device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+                               VK_SHADER_STAGE_VERTEX_BIT, 0, 16, vertex_push_constants);
+
+         if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT ||
+             aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT ||
+             aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT ||
+             aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) {
+            unsigned fs_key = radv_format_meta_fs_key(device, dst_temps.iview.vk_format);
+            unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);
+
+            if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] ==
+                VK_NULL_HANDLE) {
+               VkResult ret = blit2d_init_color_pipeline(
+                  device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples);
+               if (ret != VK_SUCCESS) {
+                  cmd_buffer->record_result = ret;
+                  goto fail_pipeline;
+               }
+            }
+
+            radv_cmd_buffer_begin_render_pass(
+               cmd_buffer,
+               &(VkRenderPassBeginInfo){
+                  .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                  .renderPass = device->meta_state.blit2d_render_passes[fs_key][dst_layout],
+                  .framebuffer = dst_temps.fb,
+                  .renderArea =
+                     {
+                        .offset =
+                           {
+                              rects[r].dst_x,
+                              rects[r].dst_y,
+                           },
+                        .extent = {rects[r].width, rects[r].height},
+                     },
+                  .clearValueCount = 0,
+                  .pClearValues = NULL,
+               },
+               &(struct radv_extra_render_pass_begin_info){.disable_dcc =
+                                                              dst->disable_compression});
+
+            radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+            bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples);
+         } else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+            enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
+
+            if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] ==
+                VK_NULL_HANDLE) {
+               VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples);
+               if (ret != VK_SUCCESS) {
+                  cmd_buffer->record_result = ret;
+                  goto fail_pipeline;
+               }
+            }
+
+            radv_cmd_buffer_begin_render_pass(
+               cmd_buffer,
+               &(VkRenderPassBeginInfo){
+                  .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                  .renderPass = device->meta_state.blit2d_depth_only_rp[ds_layout],
+                  .framebuffer = dst_temps.fb,
+                  .renderArea =
+                     {
+                        .offset =
+                           {
+                              rects[r].dst_x,
+                              rects[r].dst_y,
+                           },
+                        .extent = {rects[r].width, rects[r].height},
+                     },
+                  .clearValueCount = 0,
+                  .pClearValues = NULL,
+               },
+               NULL);
+
+            radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+            bind_depth_pipeline(cmd_buffer, src_type, log2_samples);
+
+         } else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+            enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
+
+            if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] ==
+                VK_NULL_HANDLE) {
+               VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples);
+               if (ret != VK_SUCCESS) {
+                  cmd_buffer->record_result = ret;
+                  goto fail_pipeline;
+               }
+            }
+
+            radv_cmd_buffer_begin_render_pass(
+               cmd_buffer,
+               &(VkRenderPassBeginInfo){
+                  .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                  .renderPass = device->meta_state.blit2d_stencil_only_rp[ds_layout],
+                  .framebuffer = dst_temps.fb,
+                  .renderArea =
+                     {
+                        .offset =
+                           {
+                              rects[r].dst_x,
+                              rects[r].dst_y,
+                           },
+                        .extent = {rects[r].width, rects[r].height},
+                     },
+                  .clearValueCount = 0,
+                  .pClearValues = NULL,
+               },
+               NULL);
+
+            radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+            bind_stencil_pipeline(cmd_buffer, src_type, log2_samples);
+         } else
+            unreachable("Processing blit2d with multiple aspects.");
+
+         radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                             &(VkViewport){.x = rects[r].dst_x,
+                                           .y = rects[r].dst_y,
+                                           .width = rects[r].width,
+                                           .height = rects[r].height,
+                                           .minDepth = 0.0f,
+                                           .maxDepth = 1.0f});
+
+         radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                            &(VkRect2D){
+                               .offset = (VkOffset2D){rects[r].dst_x, rects[r].dst_y},
+                               .extent = (VkExtent2D){rects[r].width, rects[r].height},
+                            });
+
+         radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+         radv_cmd_buffer_end_render_pass(cmd_buffer);
+
+      fail_pipeline:
+         /* At the point where we emit the draw call, all data from the
+          * descriptor sets, etc. has been used.  We are free to delete it.
+          */
+         radv_DestroyFramebuffer(radv_device_to_handle(device), dst_temps.fb,
+                                 &cmd_buffer->pool->alloc);
+      }
+   }
 }
 
 void
-radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer,
-		 struct radv_meta_blit2d_surf *src_img,
-		 struct radv_meta_blit2d_buffer *src_buf,
-		 struct radv_meta_blit2d_surf *dst,
-		 unsigned num_rects,
-		 struct radv_meta_blit2d_rect *rects)
+radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
+                 struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst,
+                 unsigned num_rects, struct radv_meta_blit2d_rect *rects)
 {
-	bool use_3d = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
-		(src_img && src_img->image->type == VK_IMAGE_TYPE_3D);
-	enum blit2d_src_type src_type = src_buf ? BLIT2D_SRC_TYPE_BUFFER :
-		use_3d ? BLIT2D_SRC_TYPE_IMAGE_3D : BLIT2D_SRC_TYPE_IMAGE;
-	radv_meta_blit2d_normal_dst(cmd_buffer, src_img, src_buf, dst,
-				    num_rects, rects, src_type,
-				    src_img ? util_logbase2(src_img->image->info.samples) : 0);
+   bool use_3d = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+                 (src_img && src_img->image->type == VK_IMAGE_TYPE_3D);
+   enum blit2d_src_type src_type = src_buf  ? BLIT2D_SRC_TYPE_BUFFER
+                                   : use_3d ? BLIT2D_SRC_TYPE_IMAGE_3D
+                                            : BLIT2D_SRC_TYPE_IMAGE;
+   radv_meta_blit2d_normal_dst(cmd_buffer, src_img, src_buf, dst, num_rects, rects, src_type,
+                               src_img ? util_logbase2(src_img->image->info.samples) : 0);
 }
 
 static nir_shader *
 build_nir_vertex_shader(void)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_blit2d_vs");
-
-	nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
-						    vec4, "gl_Position");
-	pos_out->data.location = VARYING_SLOT_POS;
-
-	nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
-							vec2, "v_tex_pos");
-	tex_pos_out->data.location = VARYING_SLOT_VAR0;
-	tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
-
-	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
-	nir_store_var(&b, pos_out, outvec, 0xf);
-
-	nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range=16);
-	nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
-
-	/* vertex 0 - src_x, src_y */
-	/* vertex 1 - src_x, src_y+h */
-	/* vertex 2 - src_x+w, src_y */
-	/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
-	   channel 1 is vertex id != 1 ? src_y : src_y + w */
-
-	nir_ssa_def *c0cmp = nir_ine(&b, vertex_id,
-				     nir_imm_int(&b, 2));
-	nir_ssa_def *c1cmp = nir_ine(&b, vertex_id,
-				     nir_imm_int(&b, 1));
-
-	nir_ssa_def *comp[2];
-	comp[0] = nir_bcsel(&b, c0cmp,
-			    nir_channel(&b, src_box, 0),
-			    nir_channel(&b, src_box, 2));
-
-	comp[1] = nir_bcsel(&b, c1cmp,
-			    nir_channel(&b, src_box, 1),
-			    nir_channel(&b, src_box, 3));
-	nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 2);
-	nir_store_var(&b, tex_pos_out, out_tex_vec, 0x3);
-	return b.shader;
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_blit2d_vs");
+
+   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
+   pos_out->data.location = VARYING_SLOT_POS;
+
+   nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec2, "v_tex_pos");
+   tex_pos_out->data.location = VARYING_SLOT_VAR0;
+   tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
+
+   nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+   nir_store_var(&b, pos_out, outvec, 0xf);
+
+   nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
+   nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
+
+   /* vertex 0 - src_x, src_y */
+   /* vertex 1 - src_x, src_y+h */
+   /* vertex 2 - src_x+w, src_y */
+   /* so channel 0 is vertex_id != 2 ? src_x : src_x + w
+      channel 1 is vertex id != 1 ? src_y : src_y + w */
+
+   nir_ssa_def *c0cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 2));
+   nir_ssa_def *c1cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 1));
+
+   nir_ssa_def *comp[2];
+   comp[0] = nir_bcsel(&b, c0cmp, nir_channel(&b, src_box, 0), nir_channel(&b, src_box, 2));
+
+   comp[1] = nir_bcsel(&b, c1cmp, nir_channel(&b, src_box, 1), nir_channel(&b, src_box, 3));
+   nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 2);
+   nir_store_var(&b, tex_pos_out, out_tex_vec, 0x3);
+   return b.shader;
 }
 
-typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *,
-                                               struct radv_device *,
+typedef nir_ssa_def *(*texel_fetch_build_func)(struct nir_builder *, struct radv_device *,
                                                nir_ssa_def *, bool, bool);
 
 static nir_ssa_def *
-build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device,
-                      nir_ssa_def *tex_pos, bool is_3d, bool is_multisampled)
+build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos,
+                      bool is_3d, bool is_multisampled)
 {
-	enum glsl_sampler_dim dim =
-		is_3d ? GLSL_SAMPLER_DIM_3D : is_multisampled ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
-	const struct glsl_type *sampler_type =
-		glsl_sampler_type(dim, false, false, GLSL_TYPE_UINT);
-	nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
-						    sampler_type, "s_tex");
-	sampler->data.descriptor_set = 0;
-	sampler->data.binding = 0;
-
-	nir_ssa_def *tex_pos_3d = NULL;
-	nir_ssa_def *sample_idx = NULL;
-	if (is_3d) {
-		nir_ssa_def *layer = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base=16, .range=4);
-
-		nir_ssa_def *chans[3];
-		chans[0] = nir_channel(b, tex_pos, 0);
-		chans[1] = nir_channel(b, tex_pos, 1);
-		chans[2] = layer;
-		tex_pos_3d = nir_vec(b, chans, 3);
-	}
-	if (is_multisampled) {
-		sample_idx = nir_load_sample_id(b);
-	}
-
-	nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b->shader, is_multisampled ? 4 : 3);
-	tex->sampler_dim = dim;
-	tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(is_3d ? tex_pos_3d : tex_pos);
-	tex->src[1].src_type = is_multisampled ? nir_tex_src_ms_index : nir_tex_src_lod;
-	tex->src[1].src = nir_src_for_ssa(is_multisampled ? sample_idx : nir_imm_int(b, 0));
-	tex->src[2].src_type = nir_tex_src_texture_deref;
-	tex->src[2].src = nir_src_for_ssa(tex_deref);
-	if (is_multisampled) {
-		tex->src[3].src_type = nir_tex_src_lod;
-		tex->src[3].src = nir_src_for_ssa(nir_imm_int(b, 0));
-	}
-	tex->dest_type = nir_type_uint32;
-	tex->is_array = false;
-	tex->coord_components = is_3d ? 3 : 2;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(b, &tex->instr);
-
-	return &tex->dest.ssa;
+   enum glsl_sampler_dim dim = is_3d             ? GLSL_SAMPLER_DIM_3D
+                               : is_multisampled ? GLSL_SAMPLER_DIM_MS
+                                                 : GLSL_SAMPLER_DIM_2D;
+   const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_UINT);
+   nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
+   sampler->data.descriptor_set = 0;
+   sampler->data.binding = 0;
+
+   nir_ssa_def *tex_pos_3d = NULL;
+   nir_ssa_def *sample_idx = NULL;
+   if (is_3d) {
+      nir_ssa_def *layer =
+         nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
+
+      nir_ssa_def *chans[3];
+      chans[0] = nir_channel(b, tex_pos, 0);
+      chans[1] = nir_channel(b, tex_pos, 1);
+      chans[2] = layer;
+      tex_pos_3d = nir_vec(b, chans, 3);
+   }
+   if (is_multisampled) {
+      sample_idx = nir_load_sample_id(b);
+   }
+
+   nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, is_multisampled ? 4 : 3);
+   tex->sampler_dim = dim;
+   tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(is_3d ? tex_pos_3d : tex_pos);
+   tex->src[1].src_type = is_multisampled ? nir_tex_src_ms_index : nir_tex_src_lod;
+   tex->src[1].src = nir_src_for_ssa(is_multisampled ? sample_idx : nir_imm_int(b, 0));
+   tex->src[2].src_type = nir_tex_src_texture_deref;
+   tex->src[2].src = nir_src_for_ssa(tex_deref);
+   if (is_multisampled) {
+      tex->src[3].src_type = nir_tex_src_lod;
+      tex->src[3].src = nir_src_for_ssa(nir_imm_int(b, 0));
+   }
+   tex->dest_type = nir_type_uint32;
+   tex->is_array = false;
+   tex->coord_components = is_3d ? 3 : 2;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(b, &tex->instr);
+
+   return &tex->dest.ssa;
 }
 
-
 static nir_ssa_def *
-build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device,
-		       nir_ssa_def *tex_pos, bool is_3d, bool is_multisampled)
+build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos,
+                       bool is_3d, bool is_multisampled)
 {
-	const struct glsl_type *sampler_type =
-		glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
-	nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
-						    sampler_type, "s_tex");
-	sampler->data.descriptor_set = 0;
-	sampler->data.binding = 0;
-
-	nir_ssa_def *width = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base=16, .range=4);
-
-	nir_ssa_def *pos_x = nir_channel(b, tex_pos, 0);
-	nir_ssa_def *pos_y = nir_channel(b, tex_pos, 1);
-	pos_y = nir_imul(b, pos_y, width);
-	pos_x = nir_iadd(b, pos_x, pos_y);
-
-	nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
-	tex->op = nir_texop_txf;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(pos_x);
-	tex->src[1].src_type = nir_tex_src_texture_deref;
-	tex->src[1].src = nir_src_for_ssa(tex_deref);
-	tex->dest_type = nir_type_uint32;
-	tex->is_array = false;
-	tex->coord_components = 1;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(b, &tex->instr);
-
-	return &tex->dest.ssa;
+   const struct glsl_type *sampler_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
+   nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
+   sampler->data.descriptor_set = 0;
+   sampler->data.binding = 0;
+
+   nir_ssa_def *width = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
+
+   nir_ssa_def *pos_x = nir_channel(b, tex_pos, 0);
+   nir_ssa_def *pos_y = nir_channel(b, tex_pos, 1);
+   pos_y = nir_imul(b, pos_y, width);
+   pos_x = nir_iadd(b, pos_x, pos_y);
+
+   nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
+   tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+   tex->op = nir_texop_txf;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(pos_x);
+   tex->src[1].src_type = nir_tex_src_texture_deref;
+   tex->src[1].src = nir_src_for_ssa(tex_deref);
+   tex->dest_type = nir_type_uint32;
+   tex->is_array = false;
+   tex->coord_components = 1;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(b, &tex->instr);
+
+   return &tex->dest.ssa;
 }
 
 static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
-	.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-	.vertexBindingDescriptionCount = 0,
-	.vertexAttributeDescriptionCount = 0,
+   .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+   .vertexBindingDescriptionCount = 0,
+   .vertexAttributeDescriptionCount = 0,
 };
 
 static nir_shader *
-build_nir_copy_fragment_shader(struct radv_device *device,
-                               texel_fetch_build_func txf_func, const char* name, bool is_3d,
-                               bool is_multisampled)
+build_nir_copy_fragment_shader(struct radv_device *device, texel_fetch_build_func txf_func,
+                               const char *name, bool is_3d, bool is_multisampled)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
 
-	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-						       vec2, "v_tex_pos");
-	tex_pos_in->data.location = VARYING_SLOT_VAR0;
+   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos");
+   tex_pos_in->data.location = VARYING_SLOT_VAR0;
 
-	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
-						      vec4, "f_color");
-	color_out->data.location = FRAG_RESULT_DATA0;
+   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+   color_out->data.location = FRAG_RESULT_DATA0;
 
-	nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
-	nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
+   nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
+   nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
 
-	nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
-	nir_store_var(&b, color_out, color, 0xf);
+   nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
+   nir_store_var(&b, color_out, color, 0xf);
 
-	return b.shader;
+   return b.shader;
 }
 
 static nir_shader *
-build_nir_copy_fragment_shader_depth(struct radv_device *device,
-				     texel_fetch_build_func txf_func, const char* name, bool is_3d,
-				     bool is_multisampled)
+build_nir_copy_fragment_shader_depth(struct radv_device *device, texel_fetch_build_func txf_func,
+                                     const char *name, bool is_3d, bool is_multisampled)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
 
-	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-						       vec2, "v_tex_pos");
-	tex_pos_in->data.location = VARYING_SLOT_VAR0;
+   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos");
+   tex_pos_in->data.location = VARYING_SLOT_VAR0;
 
-	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
-						      vec4, "f_color");
-	color_out->data.location = FRAG_RESULT_DEPTH;
+   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+   color_out->data.location = FRAG_RESULT_DEPTH;
 
-	nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
-	nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
+   nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
+   nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
 
-	nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
-	nir_store_var(&b, color_out, color, 0x1);
+   nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
+   nir_store_var(&b, color_out, color, 0x1);
 
-	return b.shader;
+   return b.shader;
 }
 
 static nir_shader *
-build_nir_copy_fragment_shader_stencil(struct radv_device *device,
-				       texel_fetch_build_func txf_func, const char* name, bool is_3d,
-				       bool is_multisampled)
+build_nir_copy_fragment_shader_stencil(struct radv_device *device, texel_fetch_build_func txf_func,
+                                       const char *name, bool is_3d, bool is_multisampled)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
 
-	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-						       vec2, "v_tex_pos");
-	tex_pos_in->data.location = VARYING_SLOT_VAR0;
+   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos");
+   tex_pos_in->data.location = VARYING_SLOT_VAR0;
 
-	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
-						      vec4, "f_color");
-	color_out->data.location = FRAG_RESULT_STENCIL;
+   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+   color_out->data.location = FRAG_RESULT_STENCIL;
 
-	nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
-	nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
+   nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
+   nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
 
-	nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
-	nir_store_var(&b, color_out, color, 0x1);
+   nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
+   nir_store_var(&b, color_out, color, 0x1);
 
-	return b.shader;
+   return b.shader;
 }
 
 void
 radv_device_finish_meta_blit2d_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	for(unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
-		for (unsigned k = 0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
-			radv_DestroyRenderPass(radv_device_to_handle(device),
-					       state->blit2d_render_passes[j][k],
-					       &state->alloc);
-		}
-	}
-
-	for (enum radv_blit_ds_layout j = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; j < RADV_BLIT_DS_LAYOUT_COUNT; j++) {
-		radv_DestroyRenderPass(radv_device_to_handle(device),
-				       state->blit2d_depth_only_rp[j], &state->alloc);
-		radv_DestroyRenderPass(radv_device_to_handle(device),
-				       state->blit2d_stencil_only_rp[j], &state->alloc);
-	}
-
-	for (unsigned log2_samples = 0; log2_samples < MAX_SAMPLES_LOG2; ++log2_samples) {
-		for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
-			radv_DestroyPipelineLayout(radv_device_to_handle(device),
-						   state->blit2d[log2_samples].p_layouts[src],
-						   &state->alloc);
-			radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-							state->blit2d[log2_samples].ds_layouts[src],
-							&state->alloc);
-
-			for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
-				radv_DestroyPipeline(radv_device_to_handle(device),
-						     state->blit2d[log2_samples].pipelines[src][j],
-						     &state->alloc);
-			}
-
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->blit2d[log2_samples].depth_only_pipeline[src],
-					     &state->alloc);
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->blit2d[log2_samples].stencil_only_pipeline[src],
-					     &state->alloc);
-		}
-	}
+   struct radv_meta_state *state = &device->meta_state;
+
+   for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+      for (unsigned k = 0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
+         radv_DestroyRenderPass(radv_device_to_handle(device), state->blit2d_render_passes[j][k],
+                                &state->alloc);
+      }
+   }
+
+   for (enum radv_blit_ds_layout j = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; j < RADV_BLIT_DS_LAYOUT_COUNT;
+        j++) {
+      radv_DestroyRenderPass(radv_device_to_handle(device), state->blit2d_depth_only_rp[j],
+                             &state->alloc);
+      radv_DestroyRenderPass(radv_device_to_handle(device), state->blit2d_stencil_only_rp[j],
+                             &state->alloc);
+   }
+
+   for (unsigned log2_samples = 0; log2_samples < MAX_SAMPLES_LOG2; ++log2_samples) {
+      for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
+         radv_DestroyPipelineLayout(radv_device_to_handle(device),
+                                    state->blit2d[log2_samples].p_layouts[src], &state->alloc);
+         radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+                                         state->blit2d[log2_samples].ds_layouts[src],
+                                         &state->alloc);
+
+         for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+            radv_DestroyPipeline(radv_device_to_handle(device),
+                                 state->blit2d[log2_samples].pipelines[src][j], &state->alloc);
+         }
+
+         radv_DestroyPipeline(radv_device_to_handle(device),
+                              state->blit2d[log2_samples].depth_only_pipeline[src], &state->alloc);
+         radv_DestroyPipeline(radv_device_to_handle(device),
+                              state->blit2d[log2_samples].stencil_only_pipeline[src],
+                              &state->alloc);
+      }
+   }
 }
 
 static VkResult
-blit2d_init_color_pipeline(struct radv_device *device,
-			   enum blit2d_src_type src_type,
-			   VkFormat format,
-			   uint32_t log2_samples)
+blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
+                           VkFormat format, uint32_t log2_samples)
 {
-	VkResult result;
-	unsigned fs_key = radv_format_meta_fs_key(device, format);
-	const char *name;
-
-	mtx_lock(&device->meta_state.mtx);
-	if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	texel_fetch_build_func src_func;
-	switch(src_type) {
-	case BLIT2D_SRC_TYPE_IMAGE:
-		src_func = build_nir_texel_fetch;
-		name = "meta_blit2d_image_fs";
-		break;
-	case BLIT2D_SRC_TYPE_IMAGE_3D:
-		src_func = build_nir_texel_fetch;
-		name = "meta_blit3d_image_fs";
-		break;
-	case BLIT2D_SRC_TYPE_BUFFER:
-		src_func = build_nir_buffer_fetch;
-		name = "meta_blit2d_buffer_fs";
-		break;
-	default:
-		unreachable("unknown blit src type\n");
-		break;
-	}
-
-	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
-	nir_shader *fs = build_nir_copy_fragment_shader(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
-	nir_shader *vs = build_nir_vertex_shader();
-
-	vi_create_info = &normal_vi_create_info;
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-		{
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_VERTEX_BIT,
-			.module = vk_shader_module_handle_from_nir(vs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		}, {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-			.module = vk_shader_module_handle_from_nir(fs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		},
-	};
-
-	for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
-		if (!device->meta_state.blit2d_render_passes[fs_key][dst_layout]) {
-			VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
-
-			result = radv_CreateRenderPass2(radv_device_to_handle(device),
-						&(VkRenderPassCreateInfo2) {
-							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-							.attachmentCount = 1,
-							.pAttachments = &(VkAttachmentDescription2) {
-							.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-							.format = format,
-							.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-							.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-							.initialLayout = layout,
-							.finalLayout = layout,
-							},
-						.subpassCount = 1,
-						.pSubpasses = &(VkSubpassDescription2) {
-							.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-							.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-							.inputAttachmentCount = 0,
-							.colorAttachmentCount = 1,
-							.pColorAttachments = &(VkAttachmentReference2) {
-								.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-								.attachment = 0,
-								.layout = layout,
-								},
-						.pResolveAttachments = NULL,
-						.pDepthStencilAttachment = &(VkAttachmentReference2) {
-							.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-							.attachment = VK_ATTACHMENT_UNUSED,
-							.layout = layout,
-						},
-						.preserveAttachmentCount = 0,
-						.pPreserveAttachments = NULL,
-						},
-						.dependencyCount = 2,
-						.pDependencies = (VkSubpassDependency2[]) {
-							{
-								.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-								.srcSubpass = VK_SUBPASS_EXTERNAL,
-								.dstSubpass = 0,
-								.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-								.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-								.srcAccessMask = 0,
-								.dstAccessMask = 0,
-								.dependencyFlags = 0
-							},
-							{
-								.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-								.srcSubpass = 0,
-								.dstSubpass = VK_SUBPASS_EXTERNAL,
-								.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-								.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-								.srcAccessMask = 0,
-								.dstAccessMask = 0,
-								.dependencyFlags = 0
-							}
-						},
-					}, &device->meta_state.alloc, &device->meta_state.blit2d_render_passes[fs_key][dst_layout]);
-		}
-	}
-
-	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
-		.pStages = pipeline_shader_stages,
-		.pVertexInputState = vi_create_info,
-		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-			.primitiveRestartEnable = false,
-		},
-		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-			.viewportCount = 1,
-			.scissorCount = 1,
-		},
-		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-			.rasterizerDiscardEnable = false,
-			.polygonMode = VK_POLYGON_MODE_FILL,
-			.cullMode = VK_CULL_MODE_NONE,
-			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-		},
-		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.rasterizationSamples = 1 << log2_samples,
-			.sampleShadingEnable = log2_samples > 1,
-			.minSampleShading = 1.0,
-			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-		},
-		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-			.attachmentCount = 1,
-			.pAttachments = (VkPipelineColorBlendAttachmentState []) {
-				{ .colorWriteMask =
-				  VK_COLOR_COMPONENT_A_BIT |
-				  VK_COLOR_COMPONENT_R_BIT |
-				  VK_COLOR_COMPONENT_G_BIT |
-				  VK_COLOR_COMPONENT_B_BIT },
-			}
-		},
-		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-			.dynamicStateCount = 9,
-			.pDynamicStates = (VkDynamicState[]) {
-				VK_DYNAMIC_STATE_VIEWPORT,
-				VK_DYNAMIC_STATE_SCISSOR,
-				VK_DYNAMIC_STATE_LINE_WIDTH,
-				VK_DYNAMIC_STATE_DEPTH_BIAS,
-				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-				VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
-				VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
-				VK_DYNAMIC_STATE_STENCIL_REFERENCE,
-			},
-		},
-		.flags = 0,
-		.layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
-		.renderPass = device->meta_state.blit2d_render_passes[fs_key][0],
-		.subpass = 0,
-	};
-
-	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-		.use_rectlist = true
-	};
-
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc,
-					       &device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]);
-
-
-	ralloc_free(vs);
-	ralloc_free(fs);
-
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   VkResult result;
+   unsigned fs_key = radv_format_meta_fs_key(device, format);
+   const char *name;
+
+   mtx_lock(&device->meta_state.mtx);
+   if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   texel_fetch_build_func src_func;
+   switch (src_type) {
+   case BLIT2D_SRC_TYPE_IMAGE:
+      src_func = build_nir_texel_fetch;
+      name = "meta_blit2d_image_fs";
+      break;
+   case BLIT2D_SRC_TYPE_IMAGE_3D:
+      src_func = build_nir_texel_fetch;
+      name = "meta_blit3d_image_fs";
+      break;
+   case BLIT2D_SRC_TYPE_BUFFER:
+      src_func = build_nir_buffer_fetch;
+      name = "meta_blit2d_buffer_fs";
+      break;
+   default:
+      unreachable("unknown blit src type\n");
+      break;
+   }
+
+   const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+   nir_shader *fs = build_nir_copy_fragment_shader(
+      device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
+   nir_shader *vs = build_nir_vertex_shader();
+
+   vi_create_info = &normal_vi_create_info;
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_VERTEX_BIT,
+       .module = vk_shader_module_handle_from_nir(vs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+       .module = vk_shader_module_handle_from_nir(fs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+   };
+
+   for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
+      if (!device->meta_state.blit2d_render_passes[fs_key][dst_layout]) {
+         VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
+
+         result = radv_CreateRenderPass2(
+            radv_device_to_handle(device),
+            &(VkRenderPassCreateInfo2){
+               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+               .attachmentCount = 1,
+               .pAttachments =
+                  &(VkAttachmentDescription2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                     .format = format,
+                     .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                     .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                     .initialLayout = layout,
+                     .finalLayout = layout,
+                  },
+               .subpassCount = 1,
+               .pSubpasses =
+                  &(VkSubpassDescription2){
+                     .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+                     .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                     .inputAttachmentCount = 0,
+                     .colorAttachmentCount = 1,
+                     .pColorAttachments =
+                        &(VkAttachmentReference2){
+                           .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                           .attachment = 0,
+                           .layout = layout,
+                        },
+                     .pResolveAttachments = NULL,
+                     .pDepthStencilAttachment =
+                        &(VkAttachmentReference2){
+                           .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                           .attachment = VK_ATTACHMENT_UNUSED,
+                           .layout = layout,
+                        },
+                     .preserveAttachmentCount = 0,
+                     .pPreserveAttachments = NULL,
+                  },
+               .dependencyCount = 2,
+               .pDependencies =
+                  (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                            .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                            .dstSubpass = 0,
+                                            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                            .srcAccessMask = 0,
+                                            .dstAccessMask = 0,
+                                            .dependencyFlags = 0},
+                                           {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                            .srcSubpass = 0,
+                                            .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                            .srcAccessMask = 0,
+                                            .dstAccessMask = 0,
+                                            .dependencyFlags = 0}},
+            },
+            &device->meta_state.alloc,
+            &device->meta_state.blit2d_render_passes[fs_key][dst_layout]);
+      }
+   }
+
+   const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+      .pStages = pipeline_shader_stages,
+      .pVertexInputState = vi_create_info,
+      .pInputAssemblyState =
+         &(VkPipelineInputAssemblyStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+            .primitiveRestartEnable = false,
+         },
+      .pViewportState =
+         &(VkPipelineViewportStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+            .viewportCount = 1,
+            .scissorCount = 1,
+         },
+      .pRasterizationState =
+         &(VkPipelineRasterizationStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+            .rasterizerDiscardEnable = false,
+            .polygonMode = VK_POLYGON_MODE_FILL,
+            .cullMode = VK_CULL_MODE_NONE,
+            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+      .pMultisampleState =
+         &(VkPipelineMultisampleStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+            .rasterizationSamples = 1 << log2_samples,
+            .sampleShadingEnable = log2_samples > 1,
+            .minSampleShading = 1.0,
+            .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+         },
+      .pColorBlendState =
+         &(VkPipelineColorBlendStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+            .attachmentCount = 1,
+            .pAttachments =
+               (VkPipelineColorBlendAttachmentState[]){
+                  {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
+                                     VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+               }},
+      .pDynamicState =
+         &(VkPipelineDynamicStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+            .dynamicStateCount = 9,
+            .pDynamicStates =
+               (VkDynamicState[]){
+                  VK_DYNAMIC_STATE_VIEWPORT,
+                  VK_DYNAMIC_STATE_SCISSOR,
+                  VK_DYNAMIC_STATE_LINE_WIDTH,
+                  VK_DYNAMIC_STATE_DEPTH_BIAS,
+                  VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+                  VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+                  VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+                  VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+                  VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+               },
+         },
+      .flags = 0,
+      .layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+      .renderPass = device->meta_state.blit2d_render_passes[fs_key][0],
+      .subpass = 0,
+   };
+
+   const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+   result = radv_graphics_pipeline_create(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc,
+      &device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]);
+
+   ralloc_free(vs);
+   ralloc_free(fs);
+
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 static VkResult
-blit2d_init_depth_only_pipeline(struct radv_device *device,
-				enum blit2d_src_type src_type,
-				uint32_t log2_samples)
+blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
+                                uint32_t log2_samples)
 {
-	VkResult result;
-	const char *name;
-
-	mtx_lock(&device->meta_state.mtx);
-	if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	texel_fetch_build_func src_func;
-	switch(src_type) {
-	case BLIT2D_SRC_TYPE_IMAGE:
-		src_func = build_nir_texel_fetch;
-		name = "meta_blit2d_depth_image_fs";
-		break;
-	case BLIT2D_SRC_TYPE_IMAGE_3D:
-		src_func = build_nir_texel_fetch;
-		name = "meta_blit3d_depth_image_fs";
-		break;
-	case BLIT2D_SRC_TYPE_BUFFER:
-		src_func = build_nir_buffer_fetch;
-		name = "meta_blit2d_depth_buffer_fs";
-		break;
-	default:
-		unreachable("unknown blit src type\n");
-		break;
-	}
-
-	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
-	nir_shader *fs = build_nir_copy_fragment_shader_depth(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
-	nir_shader *vs = build_nir_vertex_shader();
-
-	vi_create_info = &normal_vi_create_info;
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-		{
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_VERTEX_BIT,
-			.module = vk_shader_module_handle_from_nir(vs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		}, {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-			.module = vk_shader_module_handle_from_nir(fs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		},
-	};
-
-	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
-		if (!device->meta_state.blit2d_depth_only_rp[ds_layout]) {
-			VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
-			result = radv_CreateRenderPass2(radv_device_to_handle(device),
-							&(VkRenderPassCreateInfo2) {
-							       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-							       .attachmentCount = 1,
-							       .pAttachments = &(VkAttachmentDescription2) {
-								       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-								       .format = VK_FORMAT_D32_SFLOAT,
-								       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-								       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-								       .initialLayout = layout,
-								       .finalLayout = layout,
-							       },
-							       .subpassCount = 1,
-							       .pSubpasses = &(VkSubpassDescription2) {
-								       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-								       .inputAttachmentCount = 0,
-								       .colorAttachmentCount = 0,
-								       .pColorAttachments = NULL,
-								       .pResolveAttachments = NULL,
-								       .pDepthStencilAttachment = &(VkAttachmentReference2) {
-									       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-									       .attachment = 0,
-									       .layout = layout,
-								       },
-								       .preserveAttachmentCount = 0,
-								       .pPreserveAttachments = NULL,
-							       },
-							       .dependencyCount = 2,
-							       .pDependencies = (VkSubpassDependency2[]) {
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = VK_SUBPASS_EXTERNAL,
-									.dstSubpass = 0,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								},
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = 0,
-									.dstSubpass = VK_SUBPASS_EXTERNAL,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								}
-							},
-							}, &device->meta_state.alloc, &device->meta_state.blit2d_depth_only_rp[ds_layout]);
-		}
-	}
-
-	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
-		.pStages = pipeline_shader_stages,
-		.pVertexInputState = vi_create_info,
-		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-			.primitiveRestartEnable = false,
-		},
-		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-			.viewportCount = 1,
-			.scissorCount = 1,
-		},
-		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-			.rasterizerDiscardEnable = false,
-			.polygonMode = VK_POLYGON_MODE_FILL,
-			.cullMode = VK_CULL_MODE_NONE,
-			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-		},
-		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.rasterizationSamples = 1 << log2_samples,
-			.sampleShadingEnable = false,
-			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-		},
-		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-			.attachmentCount = 0,
-			.pAttachments = NULL,
-		},
-		.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-			.depthTestEnable = true,
-			.depthWriteEnable = true,
-			.depthCompareOp = VK_COMPARE_OP_ALWAYS,
-		},
-		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-			.dynamicStateCount = 9,
-			.pDynamicStates = (VkDynamicState[]) {
-				VK_DYNAMIC_STATE_VIEWPORT,
-				VK_DYNAMIC_STATE_SCISSOR,
-				VK_DYNAMIC_STATE_LINE_WIDTH,
-				VK_DYNAMIC_STATE_DEPTH_BIAS,
-				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-				VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
-				VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
-				VK_DYNAMIC_STATE_STENCIL_REFERENCE,
-			},
-		},
-		.flags = 0,
-		.layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
-		.renderPass = device->meta_state.blit2d_depth_only_rp[0],
-		.subpass = 0,
-	};
-
-	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-		.use_rectlist = true
-	};
-
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc,
-					       &device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]);
-
-
-	ralloc_free(vs);
-	ralloc_free(fs);
-
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   VkResult result;
+   const char *name;
+
+   mtx_lock(&device->meta_state.mtx);
+   if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   texel_fetch_build_func src_func;
+   switch (src_type) {
+   case BLIT2D_SRC_TYPE_IMAGE:
+      src_func = build_nir_texel_fetch;
+      name = "meta_blit2d_depth_image_fs";
+      break;
+   case BLIT2D_SRC_TYPE_IMAGE_3D:
+      src_func = build_nir_texel_fetch;
+      name = "meta_blit3d_depth_image_fs";
+      break;
+   case BLIT2D_SRC_TYPE_BUFFER:
+      src_func = build_nir_buffer_fetch;
+      name = "meta_blit2d_depth_buffer_fs";
+      break;
+   default:
+      unreachable("unknown blit src type\n");
+      break;
+   }
+
+   const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+   nir_shader *fs = build_nir_copy_fragment_shader_depth(
+      device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
+   nir_shader *vs = build_nir_vertex_shader();
+
+   vi_create_info = &normal_vi_create_info;
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_VERTEX_BIT,
+       .module = vk_shader_module_handle_from_nir(vs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+       .module = vk_shader_module_handle_from_nir(fs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+   };
+
+   for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+        ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+      if (!device->meta_state.blit2d_depth_only_rp[ds_layout]) {
+         VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+         result = radv_CreateRenderPass2(
+            radv_device_to_handle(device),
+            &(VkRenderPassCreateInfo2){
+               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+               .attachmentCount = 1,
+               .pAttachments =
+                  &(VkAttachmentDescription2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                     .format = VK_FORMAT_D32_SFLOAT,
+                     .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                     .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                     .initialLayout = layout,
+                     .finalLayout = layout,
+                  },
+               .subpassCount = 1,
+               .pSubpasses =
+                  &(VkSubpassDescription2){
+                     .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+                     .inputAttachmentCount = 0,
+                     .colorAttachmentCount = 0,
+                     .pColorAttachments = NULL,
+                     .pResolveAttachments = NULL,
+                     .pDepthStencilAttachment =
+                        &(VkAttachmentReference2){
+                           .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                           .attachment = 0,
+                           .layout = layout,
+                        },
+                     .preserveAttachmentCount = 0,
+                     .pPreserveAttachments = NULL,
+                  },
+               .dependencyCount = 2,
+               .pDependencies =
+                  (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                            .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                            .dstSubpass = 0,
+                                            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                            .srcAccessMask = 0,
+                                            .dstAccessMask = 0,
+                                            .dependencyFlags = 0},
+                                           {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                            .srcSubpass = 0,
+                                            .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                            .srcAccessMask = 0,
+                                            .dstAccessMask = 0,
+                                            .dependencyFlags = 0}},
+            },
+            &device->meta_state.alloc, &device->meta_state.blit2d_depth_only_rp[ds_layout]);
+      }
+   }
+
+   const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+      .pStages = pipeline_shader_stages,
+      .pVertexInputState = vi_create_info,
+      .pInputAssemblyState =
+         &(VkPipelineInputAssemblyStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+            .primitiveRestartEnable = false,
+         },
+      .pViewportState =
+         &(VkPipelineViewportStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+            .viewportCount = 1,
+            .scissorCount = 1,
+         },
+      .pRasterizationState =
+         &(VkPipelineRasterizationStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+            .rasterizerDiscardEnable = false,
+            .polygonMode = VK_POLYGON_MODE_FILL,
+            .cullMode = VK_CULL_MODE_NONE,
+            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+      .pMultisampleState =
+         &(VkPipelineMultisampleStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+            .rasterizationSamples = 1 << log2_samples,
+            .sampleShadingEnable = false,
+            .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+         },
+      .pColorBlendState =
+         &(VkPipelineColorBlendStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+            .attachmentCount = 0,
+            .pAttachments = NULL,
+         },
+      .pDepthStencilState =
+         &(VkPipelineDepthStencilStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+            .depthTestEnable = true,
+            .depthWriteEnable = true,
+            .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+         },
+      .pDynamicState =
+         &(VkPipelineDynamicStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+            .dynamicStateCount = 9,
+            .pDynamicStates =
+               (VkDynamicState[]){
+                  VK_DYNAMIC_STATE_VIEWPORT,
+                  VK_DYNAMIC_STATE_SCISSOR,
+                  VK_DYNAMIC_STATE_LINE_WIDTH,
+                  VK_DYNAMIC_STATE_DEPTH_BIAS,
+                  VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+                  VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+                  VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+                  VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+                  VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+               },
+         },
+      .flags = 0,
+      .layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+      .renderPass = device->meta_state.blit2d_depth_only_rp[0],
+      .subpass = 0,
+   };
+
+   const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+   result = radv_graphics_pipeline_create(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc,
+      &device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]);
+
+   ralloc_free(vs);
+   ralloc_free(fs);
+
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 static VkResult
-blit2d_init_stencil_only_pipeline(struct radv_device *device,
-				  enum blit2d_src_type src_type,
-				  uint32_t log2_samples)
+blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
+                                  uint32_t log2_samples)
 {
-	VkResult result;
-	const char *name;
-
-	mtx_lock(&device->meta_state.mtx);
-	if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	texel_fetch_build_func src_func;
-	switch(src_type) {
-	case BLIT2D_SRC_TYPE_IMAGE:
-		src_func = build_nir_texel_fetch;
-		name = "meta_blit2d_stencil_image_fs";
-		break;
-	case BLIT2D_SRC_TYPE_IMAGE_3D:
-		src_func = build_nir_texel_fetch;
-		name = "meta_blit3d_stencil_image_fs";
-		break;
-	case BLIT2D_SRC_TYPE_BUFFER:
-		src_func = build_nir_buffer_fetch;
-		name = "meta_blit2d_stencil_buffer_fs";
-		break;
-	default:
-		unreachable("unknown blit src type\n");
-		break;
-	}
-
-	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
-	nir_shader *fs = build_nir_copy_fragment_shader_stencil(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
-	nir_shader *vs = build_nir_vertex_shader();
-
-	vi_create_info = &normal_vi_create_info;
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-		{
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_VERTEX_BIT,
-			.module = vk_shader_module_handle_from_nir(vs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		}, {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-			.module = vk_shader_module_handle_from_nir(fs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		},
-	};
-
-	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
-		if (!device->meta_state.blit2d_stencil_only_rp[ds_layout]) {
-			VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
-			result = radv_CreateRenderPass2(radv_device_to_handle(device),
-							&(VkRenderPassCreateInfo2) {
-							       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-							       .attachmentCount = 1,
-							       .pAttachments = &(VkAttachmentDescription2) {
-								       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-								       .format = VK_FORMAT_S8_UINT,
-								       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-								       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-								       .initialLayout = layout,
-								       .finalLayout = layout,
-							       },
-							       .subpassCount = 1,
-							       .pSubpasses = &(VkSubpassDescription2) {
-								       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-								       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-								       .inputAttachmentCount = 0,
-								       .colorAttachmentCount = 0,
-								       .pColorAttachments = NULL,
-								       .pResolveAttachments = NULL,
-								       .pDepthStencilAttachment = &(VkAttachmentReference2) {
-									       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-									       .attachment = 0,
-									       .layout = layout,
-								       },
-								       .preserveAttachmentCount = 0,
-								       .pPreserveAttachments = NULL,
-							       },
-							       .dependencyCount = 2,
-							       .pDependencies = (VkSubpassDependency2[]) {
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = VK_SUBPASS_EXTERNAL,
-									.dstSubpass = 0,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								},
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = 0,
-									.dstSubpass = VK_SUBPASS_EXTERNAL,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								}
-							},
-						       }, &device->meta_state.alloc, &device->meta_state.blit2d_stencil_only_rp[ds_layout]);
-		}
-	}
-
-	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
-		.pStages = pipeline_shader_stages,
-		.pVertexInputState = vi_create_info,
-		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-			.primitiveRestartEnable = false,
-		},
-		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-			.viewportCount = 1,
-			.scissorCount = 1,
-		},
-		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-			.rasterizerDiscardEnable = false,
-			.polygonMode = VK_POLYGON_MODE_FILL,
-			.cullMode = VK_CULL_MODE_NONE,
-			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-		},
-		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.rasterizationSamples = 1 << log2_samples,
-			.sampleShadingEnable = false,
-			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-		},
-		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-			.attachmentCount = 0,
-			.pAttachments = NULL,
-		},
-		.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-			.depthTestEnable = false,
-			.depthWriteEnable = false,
-			.stencilTestEnable = true,
-			.front = {
-				.failOp = VK_STENCIL_OP_REPLACE,
-				.passOp = VK_STENCIL_OP_REPLACE,
-				.depthFailOp = VK_STENCIL_OP_REPLACE,
-				.compareOp = VK_COMPARE_OP_ALWAYS,
-				.compareMask = 0xff,
-				.writeMask = 0xff,
-				.reference = 0
-			},
-			.back = {
-				.failOp = VK_STENCIL_OP_REPLACE,
-				.passOp = VK_STENCIL_OP_REPLACE,
-				.depthFailOp = VK_STENCIL_OP_REPLACE,
-				.compareOp = VK_COMPARE_OP_ALWAYS,
-				.compareMask = 0xff,
-				.writeMask = 0xff,
-				.reference = 0
-			},
-			.depthCompareOp = VK_COMPARE_OP_ALWAYS,
-		},
-		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-			.dynamicStateCount = 6,
-			.pDynamicStates = (VkDynamicState[]) {
-				VK_DYNAMIC_STATE_VIEWPORT,
-				VK_DYNAMIC_STATE_SCISSOR,
-				VK_DYNAMIC_STATE_LINE_WIDTH,
-				VK_DYNAMIC_STATE_DEPTH_BIAS,
-				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-			},
-		},
-		.flags = 0,
-		.layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
-		.renderPass = device->meta_state.blit2d_stencil_only_rp[0],
-		.subpass = 0,
-	};
-
-	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-		.use_rectlist = true
-	};
-
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc,
-					       &device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]);
-
-
-	ralloc_free(vs);
-	ralloc_free(fs);
-
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   VkResult result;
+   const char *name;
+
+   mtx_lock(&device->meta_state.mtx);
+   if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   texel_fetch_build_func src_func;
+   switch (src_type) {
+   case BLIT2D_SRC_TYPE_IMAGE:
+      src_func = build_nir_texel_fetch;
+      name = "meta_blit2d_stencil_image_fs";
+      break;
+   case BLIT2D_SRC_TYPE_IMAGE_3D:
+      src_func = build_nir_texel_fetch;
+      name = "meta_blit3d_stencil_image_fs";
+      break;
+   case BLIT2D_SRC_TYPE_BUFFER:
+      src_func = build_nir_buffer_fetch;
+      name = "meta_blit2d_stencil_buffer_fs";
+      break;
+   default:
+      unreachable("unknown blit src type\n");
+      break;
+   }
+
+   const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+   nir_shader *fs = build_nir_copy_fragment_shader_stencil(
+      device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
+   nir_shader *vs = build_nir_vertex_shader();
+
+   vi_create_info = &normal_vi_create_info;
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_VERTEX_BIT,
+       .module = vk_shader_module_handle_from_nir(vs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+       .module = vk_shader_module_handle_from_nir(fs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+   };
+
+   for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+        ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+      if (!device->meta_state.blit2d_stencil_only_rp[ds_layout]) {
+         VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+         result = radv_CreateRenderPass2(
+            radv_device_to_handle(device),
+            &(VkRenderPassCreateInfo2){
+               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+               .attachmentCount = 1,
+               .pAttachments =
+                  &(VkAttachmentDescription2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                     .format = VK_FORMAT_S8_UINT,
+                     .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                     .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                     .initialLayout = layout,
+                     .finalLayout = layout,
+                  },
+               .subpassCount = 1,
+               .pSubpasses =
+                  &(VkSubpassDescription2){
+                     .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+                     .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                     .inputAttachmentCount = 0,
+                     .colorAttachmentCount = 0,
+                     .pColorAttachments = NULL,
+                     .pResolveAttachments = NULL,
+                     .pDepthStencilAttachment =
+                        &(VkAttachmentReference2){
+                           .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                           .attachment = 0,
+                           .layout = layout,
+                        },
+                     .preserveAttachmentCount = 0,
+                     .pPreserveAttachments = NULL,
+                  },
+               .dependencyCount = 2,
+               .pDependencies =
+                  (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                            .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                            .dstSubpass = 0,
+                                            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                            .srcAccessMask = 0,
+                                            .dstAccessMask = 0,
+                                            .dependencyFlags = 0},
+                                           {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                            .srcSubpass = 0,
+                                            .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                            .srcAccessMask = 0,
+                                            .dstAccessMask = 0,
+                                            .dependencyFlags = 0}},
+            },
+            &device->meta_state.alloc, &device->meta_state.blit2d_stencil_only_rp[ds_layout]);
+      }
+   }
+
+   const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+      .pStages = pipeline_shader_stages,
+      .pVertexInputState = vi_create_info,
+      .pInputAssemblyState =
+         &(VkPipelineInputAssemblyStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+            .primitiveRestartEnable = false,
+         },
+      .pViewportState =
+         &(VkPipelineViewportStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+            .viewportCount = 1,
+            .scissorCount = 1,
+         },
+      .pRasterizationState =
+         &(VkPipelineRasterizationStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+            .rasterizerDiscardEnable = false,
+            .polygonMode = VK_POLYGON_MODE_FILL,
+            .cullMode = VK_CULL_MODE_NONE,
+            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+      .pMultisampleState =
+         &(VkPipelineMultisampleStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+            .rasterizationSamples = 1 << log2_samples,
+            .sampleShadingEnable = false,
+            .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+         },
+      .pColorBlendState =
+         &(VkPipelineColorBlendStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+            .attachmentCount = 0,
+            .pAttachments = NULL,
+         },
+      .pDepthStencilState =
+         &(VkPipelineDepthStencilStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+            .depthTestEnable = false,
+            .depthWriteEnable = false,
+            .stencilTestEnable = true,
+            .front = {.failOp = VK_STENCIL_OP_REPLACE,
+                      .passOp = VK_STENCIL_OP_REPLACE,
+                      .depthFailOp = VK_STENCIL_OP_REPLACE,
+                      .compareOp = VK_COMPARE_OP_ALWAYS,
+                      .compareMask = 0xff,
+                      .writeMask = 0xff,
+                      .reference = 0},
+            .back = {.failOp = VK_STENCIL_OP_REPLACE,
+                     .passOp = VK_STENCIL_OP_REPLACE,
+                     .depthFailOp = VK_STENCIL_OP_REPLACE,
+                     .compareOp = VK_COMPARE_OP_ALWAYS,
+                     .compareMask = 0xff,
+                     .writeMask = 0xff,
+                     .reference = 0},
+            .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+         },
+      .pDynamicState =
+         &(VkPipelineDynamicStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+            .dynamicStateCount = 6,
+            .pDynamicStates =
+               (VkDynamicState[]){
+                  VK_DYNAMIC_STATE_VIEWPORT,
+                  VK_DYNAMIC_STATE_SCISSOR,
+                  VK_DYNAMIC_STATE_LINE_WIDTH,
+                  VK_DYNAMIC_STATE_DEPTH_BIAS,
+                  VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+                  VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+               },
+         },
+      .flags = 0,
+      .layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+      .renderPass = device->meta_state.blit2d_stencil_only_rp[0],
+      .subpass = 0,
+   };
+
+   const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+   result = radv_graphics_pipeline_create(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc,
+      &device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]);
+
+   ralloc_free(vs);
+   ralloc_free(fs);
+
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 static VkResult
-meta_blit2d_create_pipe_layout(struct radv_device *device,
-			       int idx,
-			       uint32_t log2_samples)
+meta_blit2d_create_pipe_layout(struct radv_device *device, int idx, uint32_t log2_samples)
 {
-	VkResult result;
-	VkDescriptorType desc_type = (idx == BLIT2D_SRC_TYPE_BUFFER) ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
-	const VkPushConstantRange push_constant_ranges[] = {
-		{VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
-		{VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4},
-	};
-	int num_push_constant_range = (idx != BLIT2D_SRC_TYPE_IMAGE || log2_samples > 0) ? 2 : 1;
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&(VkDescriptorSetLayoutCreateInfo) {
-							.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-							.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-							.bindingCount = 1,
-							.pBindings = (VkDescriptorSetLayoutBinding[]) {
-							{
-								.binding = 0,
-								.descriptorType = desc_type,
-								.descriptorCount = 1,
-								.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
-								.pImmutableSamplers = NULL
-							},
-							}
-						}, &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].ds_layouts[idx]);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					   &(VkPipelineLayoutCreateInfo) {
-						   .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-							   .setLayoutCount = 1,
-							   .pSetLayouts = &device->meta_state.blit2d[log2_samples].ds_layouts[idx],
-							   .pushConstantRangeCount = num_push_constant_range,
-							   .pPushConstantRanges = push_constant_ranges,
-							   },
-					   &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].p_layouts[idx]);
-	if (result != VK_SUCCESS)
-		goto fail;
-	return VK_SUCCESS;
+   VkResult result;
+   VkDescriptorType desc_type = (idx == BLIT2D_SRC_TYPE_BUFFER)
+                                   ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
+                                   : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+   const VkPushConstantRange push_constant_ranges[] = {
+      {VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
+      {VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4},
+   };
+   int num_push_constant_range = (idx != BLIT2D_SRC_TYPE_IMAGE || log2_samples > 0) ? 2 : 1;
+
+   result = radv_CreateDescriptorSetLayout(
+      radv_device_to_handle(device),
+      &(VkDescriptorSetLayoutCreateInfo){
+         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+         .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+         .bindingCount = 1,
+         .pBindings =
+            (VkDescriptorSetLayoutBinding[]){
+               {.binding = 0,
+                .descriptorType = desc_type,
+                .descriptorCount = 1,
+                .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+                .pImmutableSamplers = NULL},
+            }},
+      &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].ds_layouts[idx]);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = radv_CreatePipelineLayout(
+      radv_device_to_handle(device),
+      &(VkPipelineLayoutCreateInfo){
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+         .setLayoutCount = 1,
+         .pSetLayouts = &device->meta_state.blit2d[log2_samples].ds_layouts[idx],
+         .pushConstantRangeCount = num_push_constant_range,
+         .pPushConstantRanges = push_constant_ranges,
+      },
+      &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].p_layouts[idx]);
+   if (result != VK_SUCCESS)
+      goto fail;
+   return VK_SUCCESS;
 fail:
-	return result;
+   return result;
 }
 
 VkResult
 radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand)
 {
-	VkResult result;
-	bool create_3d = device->physical_device->rad_info.chip_class >= GFX9;
+   VkResult result;
+   bool create_3d = device->physical_device->rad_info.chip_class >= GFX9;
 
-	for (unsigned log2_samples = 0; log2_samples < MAX_SAMPLES_LOG2; log2_samples++) {
-		for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
-			if (src == BLIT2D_SRC_TYPE_IMAGE_3D && !create_3d)
-				continue;
+   for (unsigned log2_samples = 0; log2_samples < MAX_SAMPLES_LOG2; log2_samples++) {
+      for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
+         if (src == BLIT2D_SRC_TYPE_IMAGE_3D && !create_3d)
+            continue;
 
-			/* Don't need to handle copies between buffers and multisample images. */
-			if (src == BLIT2D_SRC_TYPE_BUFFER && log2_samples > 0)
-				continue;
+         /* Don't need to handle copies between buffers and multisample images. */
+         if (src == BLIT2D_SRC_TYPE_BUFFER && log2_samples > 0)
+            continue;
 
-			result = meta_blit2d_create_pipe_layout(device, src, log2_samples);
-			if (result != VK_SUCCESS)
-				goto fail;
+         result = meta_blit2d_create_pipe_layout(device, src, log2_samples);
+         if (result != VK_SUCCESS)
+            goto fail;
 
-			if (on_demand)
-				continue;
+         if (on_demand)
+            continue;
 
-			for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
-				result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], log2_samples);
-				if (result != VK_SUCCESS)
-					goto fail;
-			}
+         for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+            result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j],
+                                                log2_samples);
+            if (result != VK_SUCCESS)
+               goto fail;
+         }
 
-			result = blit2d_init_depth_only_pipeline(device, src, log2_samples);
-			if (result != VK_SUCCESS)
-				goto fail;
+         result = blit2d_init_depth_only_pipeline(device, src, log2_samples);
+         if (result != VK_SUCCESS)
+            goto fail;
 
-			result = blit2d_init_stencil_only_pipeline(device, src, log2_samples);
-			if (result != VK_SUCCESS)
-				goto fail;
-		}
-	}
+         result = blit2d_init_stencil_only_pipeline(device, src, log2_samples);
+         if (result != VK_SUCCESS)
+            goto fail;
+      }
+   }
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 
 fail:
-	radv_device_finish_meta_blit2d_state(device);
-	return result;
+   radv_device_finish_meta_blit2d_state(device);
+   return result;
 }
diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c
index 1bfc15064f0..9c33dfe64ad 100644
--- a/src/amd/vulkan/radv_meta_buffer.c
+++ b/src/amd/vulkan/radv_meta_buffer.c
@@ -1,527 +1,450 @@
-#include "radv_meta.h"
 #include "nir/nir_builder.h"
+#include "radv_meta.h"
 
-#include "sid.h"
 #include "radv_cs.h"
+#include "sid.h"
 
 static nir_shader *
 build_buffer_fill_shader(struct radv_device *dev)
 {
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
-						       "meta_buffer_fill");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_buffer_fill");
+   b.shader->info.cs.local_size[0] = 64;
+   b.shader->info.cs.local_size[1] = 1;
+   b.shader->info.cs.local_size[2] = 1;
 
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
 
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
 
-	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
-	offset = nir_channel(&b, offset, 0);
+   nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
+   offset = nir_channel(&b, offset, 0);
 
-	nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+   nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
 
-	nir_ssa_def *load = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=4);
-	nir_ssa_def *swizzled_load = nir_swizzle(&b, load, (unsigned[]) { 0, 0, 0, 0}, 4);
+   nir_ssa_def *load = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
+   nir_ssa_def *swizzled_load = nir_swizzle(&b, load, (unsigned[]){0, 0, 0, 0}, 4);
 
-	nir_store_ssbo(&b, swizzled_load, dst_buf, offset, .write_mask=0xf,
-			   .access=ACCESS_NON_READABLE, .align_mul=16);
+   nir_store_ssbo(&b, swizzled_load, dst_buf, offset, .write_mask = 0xf,
+                  .access = ACCESS_NON_READABLE, .align_mul = 16);
 
-	return b.shader;
+   return b.shader;
 }
 
 static nir_shader *
 build_buffer_copy_shader(struct radv_device *dev)
 {
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
-						       "meta_buffer_copy");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_buffer_copy");
+   b.shader->info.cs.local_size[0] = 64;
+   b.shader->info.cs.local_size[1] = 1;
+   b.shader->info.cs.local_size[2] = 1;
 
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
 
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
 
-	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
-	offset = nir_channel(&b, offset, 0);
+   nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
+   offset = nir_channel(&b, offset, 0);
 
-	nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
-	nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
+   nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+   nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
 
-	nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, src_buf, offset, .align_mul=16);
-	nir_store_ssbo(&b, load, dst_buf, offset, .write_mask=0xf,
-			   .access=ACCESS_NON_READABLE, .align_mul=16);
+   nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, src_buf, offset, .align_mul = 16);
+   nir_store_ssbo(&b, load, dst_buf, offset, .write_mask = 0xf, .access = ACCESS_NON_READABLE,
+                  .align_mul = 16);
 
-	return b.shader;
+   return b.shader;
 }
 
-
-
-VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
+VkResult
+radv_device_init_meta_buffer_state(struct radv_device *device)
 {
-	VkResult result;
-	nir_shader *fill_cs = build_buffer_fill_shader(device);
-	nir_shader *copy_cs = build_buffer_copy_shader(device);
-
-	VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 1,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&fill_ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.buffer.fill_ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 2,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&copy_ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.buffer.copy_ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-
-	VkPipelineLayoutCreateInfo fill_pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &fill_pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.buffer.fill_p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineLayoutCreateInfo copy_pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
-		.pushConstantRangeCount = 0,
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &copy_pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.buffer.copy_p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(fill_cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo fill_vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = fill_pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.buffer.fill_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &fill_vk_pipeline_info, NULL,
-					     &device->meta_state.buffer.fill_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(copy_cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo copy_vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = copy_pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.buffer.copy_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &copy_vk_pipeline_info, NULL,
-					     &device->meta_state.buffer.copy_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	ralloc_free(fill_cs);
-	ralloc_free(copy_cs);
-	return VK_SUCCESS;
+   VkResult result;
+   nir_shader *fill_cs = build_buffer_fill_shader(device);
+   nir_shader *copy_cs = build_buffer_copy_shader(device);
+
+   VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 1,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &fill_ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.buffer.fill_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &copy_ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.buffer.copy_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo fill_pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &fill_pl_create_info,
+                                      &device->meta_state.alloc,
+                                      &device->meta_state.buffer.fill_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo copy_pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
+      .pushConstantRangeCount = 0,
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &copy_pl_create_info,
+                                      &device->meta_state.alloc,
+                                      &device->meta_state.buffer.copy_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(fill_cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo fill_vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = fill_pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.buffer.fill_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &fill_vk_pipeline_info, NULL, &device->meta_state.buffer.fill_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(copy_cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo copy_vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = copy_pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.buffer.copy_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &copy_vk_pipeline_info, NULL, &device->meta_state.buffer.copy_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   ralloc_free(fill_cs);
+   ralloc_free(copy_cs);
+   return VK_SUCCESS;
 fail:
-	radv_device_finish_meta_buffer_state(device);
-	ralloc_free(fill_cs);
-	ralloc_free(copy_cs);
-	return result;
+   radv_device_finish_meta_buffer_state(device);
+   ralloc_free(fill_cs);
+   ralloc_free(copy_cs);
+   return result;
 }
 
-void radv_device_finish_meta_buffer_state(struct radv_device *device)
+void
+radv_device_finish_meta_buffer_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->buffer.copy_pipeline, &state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->buffer.fill_pipeline, &state->alloc);
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->buffer.copy_p_layout, &state->alloc);
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->buffer.fill_p_layout, &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-					state->buffer.copy_ds_layout,
-					&state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-					state->buffer.fill_ds_layout,
-					&state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.copy_pipeline, &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.fill_pipeline, &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.copy_p_layout,
+                              &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.fill_p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->buffer.copy_ds_layout,
+                                   &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->buffer.fill_ds_layout,
+                                   &state->alloc);
 }
 
-static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
-			       struct radeon_winsys_bo *bo,
-			       uint64_t offset, uint64_t size, uint32_t value)
+static void
+fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t offset,
+                   uint64_t size, uint32_t value)
 {
-	struct radv_device *device = cmd_buffer->device;
-	uint64_t block_count = round_up_u64(size, 1024);
-	struct radv_meta_saved_state saved_state;
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_COMPUTE_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	struct radv_buffer dst_buffer = {
-		.bo = bo,
-		.offset = offset,
-		.size = size
-	};
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE,
-			     device->meta_state.buffer.fill_pipeline);
-
-	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
-			              device->meta_state.buffer.fill_p_layout,
-				      0, /* set */
-				      1, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
-				                              .buffer = radv_buffer_to_handle(&dst_buffer),
-				                              .offset = 0,
-				                              .range = size
-				                      }
-				              }
-				      });
-
-	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-			      device->meta_state.buffer.fill_p_layout,
-			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
-			      &value);
-
-	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_device *device = cmd_buffer->device;
+   uint64_t block_count = round_up_u64(size, 1024);
+   struct radv_meta_saved_state saved_state;
+
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   struct radv_buffer dst_buffer = {.bo = bo, .offset = offset, .size = size};
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        device->meta_state.buffer.fill_pipeline);
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.buffer.fill_p_layout,
+      0, /* set */
+      1, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){
+         {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+          .dstBinding = 0,
+          .dstArrayElement = 0,
+          .descriptorCount = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
+                                                   .offset = 0,
+                                                   .range = size}}});
+
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                         device->meta_state.buffer.fill_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
+                         &value);
+
+   radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
-static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
-			       struct radeon_winsys_bo *src_bo,
-			       struct radeon_winsys_bo *dst_bo,
-			       uint64_t src_offset, uint64_t dst_offset,
-			       uint64_t size)
+static void
+copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
+                   struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
+                   uint64_t size)
 {
-	struct radv_device *device = cmd_buffer->device;
-	uint64_t block_count = round_up_u64(size, 1024);
-	struct radv_meta_saved_state saved_state;
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_COMPUTE_PIPELINE |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	struct radv_buffer dst_buffer = {
-		.bo = dst_bo,
-		.offset = dst_offset,
-		.size = size
-	};
-
-	struct radv_buffer src_buffer = {
-		.bo = src_bo,
-		.offset = src_offset,
-		.size = size
-	};
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE,
-			     device->meta_state.buffer.copy_pipeline);
-
-	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
-			              device->meta_state.buffer.copy_p_layout,
-				      0, /* set */
-				      2, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
-				                              .buffer = radv_buffer_to_handle(&dst_buffer),
-				                              .offset = 0,
-				                              .range = size
-				                      }
-				              },
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 1,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
-				                              .buffer = radv_buffer_to_handle(&src_buffer),
-				                              .offset = 0,
-				                              .range = size
-				                      }
-				              }
-				      });
-
-	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_device *device = cmd_buffer->device;
+   uint64_t block_count = round_up_u64(size, 1024);
+   struct radv_meta_saved_state saved_state;
+
+   radv_meta_save(&saved_state, cmd_buffer,
+                  RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
+
+   struct radv_buffer dst_buffer = {.bo = dst_bo, .offset = dst_offset, .size = size};
+
+   struct radv_buffer src_buffer = {.bo = src_bo, .offset = src_offset, .size = size};
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        device->meta_state.buffer.copy_pipeline);
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.buffer.copy_p_layout,
+      0, /* set */
+      2, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){
+         {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+          .dstBinding = 0,
+          .dstArrayElement = 0,
+          .descriptorCount = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
+                                                   .offset = 0,
+                                                   .range = size}},
+         {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+          .dstBinding = 1,
+          .dstArrayElement = 0,
+          .descriptorCount = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&src_buffer),
+                                                   .offset = 0,
+                                                   .range = size}}});
+
+   radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
 static bool
-radv_prefer_compute_dma(const struct radv_device *device,
-			uint64_t size,
-			struct radeon_winsys_bo *src_bo,
-			struct radeon_winsys_bo *dst_bo)
+radv_prefer_compute_dma(const struct radv_device *device, uint64_t size,
+                        struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo)
 {
-	bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
-
-	if (device->physical_device->rad_info.chip_class >= GFX10 &&
-	    device->physical_device->rad_info.has_dedicated_vram) {
-		if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
-		    !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM)) {
-			/* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
-			use_compute = false;
-		}
-	}
-
-	return use_compute;
+   bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
+
+   if (device->physical_device->rad_info.chip_class >= GFX10 &&
+       device->physical_device->rad_info.has_dedicated_vram) {
+      if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
+          !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM)) {
+         /* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
+         use_compute = false;
+      }
+   }
+
+   return use_compute;
 }
 
-uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
-                          const struct radv_image *image,
-                          struct radeon_winsys_bo *bo,
-                          uint64_t offset, uint64_t size, uint32_t value)
+uint32_t
+radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
+                 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t value)
 {
-	bool use_compute = radv_prefer_compute_dma(cmd_buffer->device, size, NULL, bo);
-	uint32_t flush_bits = 0;
+   bool use_compute = radv_prefer_compute_dma(cmd_buffer->device, size, NULL, bo);
+   uint32_t flush_bits = 0;
 
-	assert(!(offset & 3));
-	assert(!(size & 3));
+   assert(!(offset & 3));
+   assert(!(size & 3));
 
-	if (use_compute) {
-		cmd_buffer->state.flush_bits |=
-			radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+   if (use_compute) {
+      cmd_buffer->state.flush_bits |=
+         radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
 
-		fill_buffer_shader(cmd_buffer, bo, offset, size, value);
+      fill_buffer_shader(cmd_buffer, bo, offset, size, value);
 
-		flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-			     RADV_CMD_FLAG_INV_VCACHE |
-			     radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
-	} else if (size) {
-		uint64_t va = radv_buffer_get_va(bo);
-		va += offset;
-		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
-		si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
-	}
+      flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+                   radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+   } else if (size) {
+      uint64_t va = radv_buffer_get_va(bo);
+      va += offset;
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
+      si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
+   }
 
-	return flush_bits;
+   return flush_bits;
 }
 
-static
-void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
-		      struct radeon_winsys_bo *src_bo,
-		      struct radeon_winsys_bo *dst_bo,
-		      uint64_t src_offset, uint64_t dst_offset,
-		      uint64_t size)
+static void
+radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
+                 struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
+                 uint64_t size)
 {
-	bool use_compute = !(size & 3) && !(src_offset & 3) && !(dst_offset & 3) &&
-			   radv_prefer_compute_dma(cmd_buffer->device, size, src_bo, dst_bo);
-
-	if (use_compute)
-		copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
-				   src_offset, dst_offset, size);
-	else if (size) {
-		uint64_t src_va = radv_buffer_get_va(src_bo);
-		uint64_t dst_va = radv_buffer_get_va(dst_bo);
-		src_va += src_offset;
-		dst_va += dst_offset;
-
-		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
-		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
-
-		si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
-	}
+   bool use_compute = !(size & 3) && !(src_offset & 3) && !(dst_offset & 3) &&
+                      radv_prefer_compute_dma(cmd_buffer->device, size, src_bo, dst_bo);
+
+   if (use_compute)
+      copy_buffer_shader(cmd_buffer, src_bo, dst_bo, src_offset, dst_offset, size);
+   else if (size) {
+      uint64_t src_va = radv_buffer_get_va(src_bo);
+      uint64_t dst_va = radv_buffer_get_va(dst_bo);
+      src_va += src_offset;
+      dst_va += dst_offset;
+
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
+
+      si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
+   }
 }
 
-void radv_CmdFillBuffer(
-    VkCommandBuffer                             commandBuffer,
-    VkBuffer                                    dstBuffer,
-    VkDeviceSize                                dstOffset,
-    VkDeviceSize                                fillSize,
-    uint32_t                                    data)
+void
+radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
+                   VkDeviceSize fillSize, uint32_t data)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
 
-	if (fillSize == VK_WHOLE_SIZE)
-		fillSize = (dst_buffer->size - dstOffset) & ~3ull;
+   if (fillSize == VK_WHOLE_SIZE)
+      fillSize = (dst_buffer->size - dstOffset) & ~3ull;
 
-	radv_fill_buffer(cmd_buffer, NULL, dst_buffer->bo, dst_buffer->offset + dstOffset,
-			 fillSize, data);
+   radv_fill_buffer(cmd_buffer, NULL, dst_buffer->bo, dst_buffer->offset + dstOffset, fillSize,
+                    data);
 }
 
 static void
-copy_buffer(struct radv_cmd_buffer *cmd_buffer,
-	    struct radv_buffer *src_buffer,
-	    struct radv_buffer *dst_buffer,
-	    const VkBufferCopy2KHR *region)
+copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *src_buffer,
+            struct radv_buffer *dst_buffer, const VkBufferCopy2KHR *region)
 {
-	bool old_predicating;
-
-	/* VK_EXT_conditional_rendering says that copy commands should not be
-	 * affected by conditional rendering.
-	 */
-	old_predicating = cmd_buffer->state.predicating;
-	cmd_buffer->state.predicating = false;
-
-	radv_copy_buffer(cmd_buffer,
-			 src_buffer->bo,
-			 dst_buffer->bo,
-			 src_buffer->offset + region->srcOffset,
-			 dst_buffer->offset + region->dstOffset,
-			 region->size);
-
-	/* Restore conditional rendering. */
-	cmd_buffer->state.predicating = old_predicating;
+   bool old_predicating;
+
+   /* VK_EXT_conditional_rendering says that copy commands should not be
+    * affected by conditional rendering.
+    */
+   old_predicating = cmd_buffer->state.predicating;
+   cmd_buffer->state.predicating = false;
+
+   radv_copy_buffer(cmd_buffer, src_buffer->bo, dst_buffer->bo,
+                    src_buffer->offset + region->srcOffset, dst_buffer->offset + region->dstOffset,
+                    region->size);
+
+   /* Restore conditional rendering. */
+   cmd_buffer->state.predicating = old_predicating;
 }
 
-void radv_CmdCopyBuffer2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkCopyBufferInfo2KHR*                 pCopyBufferInfo)
+void
+radv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2KHR *pCopyBufferInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
-	RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
-
-	for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
-		copy_buffer(cmd_buffer, src_buffer, dst_buffer,
-			    &pCopyBufferInfo->pRegions[r]);
-	}
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
+   RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
+
+   for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
+      copy_buffer(cmd_buffer, src_buffer, dst_buffer, &pCopyBufferInfo->pRegions[r]);
+   }
 }
 
-void radv_CmdUpdateBuffer(
-	VkCommandBuffer                             commandBuffer,
-	VkBuffer                                    dstBuffer,
-	VkDeviceSize                                dstOffset,
-	VkDeviceSize                                dataSize,
-	const void*                                 pData)
+void
+radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
+                     VkDeviceSize dataSize, const void *pData)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
-	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
-	uint64_t words = dataSize / 4;
-	uint64_t va = radv_buffer_get_va(dst_buffer->bo);
-	va += dstOffset + dst_buffer->offset;
-
-	assert(!(dataSize & 3));
-	assert(!(va & 3));
-
-	if (!dataSize)
-		return;
-
-	if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
-		si_emit_cache_flush(cmd_buffer);
-
-		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
-
-		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
-
-		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
-		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
-		                                V_370_MEM : V_370_MEM_GRBM) |
-		                            S_370_WR_CONFIRM(1) |
-		                            S_370_ENGINE_SEL(V_370_ME));
-		radeon_emit(cmd_buffer->cs, va);
-		radeon_emit(cmd_buffer->cs, va >> 32);
-		radeon_emit_array(cmd_buffer->cs, pData, words);
-
-		if (unlikely(cmd_buffer->device->trace_bo))
-			radv_cmd_buffer_trace_emit(cmd_buffer);
-	} else {
-		uint32_t buf_offset;
-		radv_cmd_buffer_upload_data(cmd_buffer, dataSize, pData, &buf_offset);
-		radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
-				 buf_offset, dstOffset + dst_buffer->offset, dataSize);
-	}
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
+   bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
+   uint64_t words = dataSize / 4;
+   uint64_t va = radv_buffer_get_va(dst_buffer->bo);
+   va += dstOffset + dst_buffer->offset;
+
+   assert(!(dataSize & 3));
+   assert(!(va & 3));
+
+   if (!dataSize)
+      return;
+
+   if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
+      si_emit_cache_flush(cmd_buffer);
+
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
+
+      radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
+
+      radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
+      radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) |
+                                     S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+      radeon_emit(cmd_buffer->cs, va);
+      radeon_emit(cmd_buffer->cs, va >> 32);
+      radeon_emit_array(cmd_buffer->cs, pData, words);
+
+      if (unlikely(cmd_buffer->device->trace_bo))
+         radv_cmd_buffer_trace_emit(cmd_buffer);
+   } else {
+      uint32_t buf_offset;
+      radv_cmd_buffer_upload_data(cmd_buffer, dataSize, pData, &buf_offset);
+      radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo, buf_offset,
+                       dstOffset + dst_buffer->offset, dataSize);
+   }
 }
diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c
index c39be196ab7..da4884444b4 100644
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -21,8 +21,8 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
-#include "radv_meta.h"
 #include "nir/nir_builder.h"
+#include "radv_meta.h"
 
 /*
  * GFX queue: Compute shader implementation of image->buffer copy
@@ -35,2078 +35,1857 @@
 static nir_shader *
 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
 {
-	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
-	const struct glsl_type *sampler_type = glsl_sampler_type(dim,
-								 false,
-								 false,
-								 GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
-							   false,
-							   GLSL_TYPE_FLOAT);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
-	b.shader->info.cs.local_size[0] = 8;
-	b.shader->info.cs.local_size[1] = 8;
-	b.shader->info.cs.local_size[2] = 1;
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      sampler_type, "s_tex");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						       img_type, "out_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 1;
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
-	nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range=16);
-	nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range=16);
-
-	nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
-	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-	tex->sampler_dim = dim;
-	tex->op = nir_texop_txf;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
-	tex->src[1].src_type = nir_tex_src_lod;
-	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-	tex->src[2].src_type = nir_tex_src_texture_deref;
-	tex->src[2].src = nir_src_for_ssa(input_img_deref);
-	tex->dest_type = nir_type_float32;
-	tex->is_array = false;
-	tex->coord_components = is_3d ? 3 : 2;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(&b, &tex->instr);
-
-	nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
-	nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
-
-	nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
-	tmp = nir_iadd(&b, tmp, pos_x);
-
-	nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
-
-	nir_ssa_def *outval = &tex->dest.ssa;
-	nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
-	                      coord, nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
-
-	return b.shader;
+   enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+   const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
+                                                  is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
+   b.shader->info.cs.local_size[2] = 1;
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 1;
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+   nir_ssa_def *offset =
+      nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16);
+   nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
+
+   nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
+   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = dim;
+   tex->op = nir_texop_txf;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
+   tex->src[1].src_type = nir_tex_src_lod;
+   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+   tex->src[2].src_type = nir_tex_src_texture_deref;
+   tex->src[2].src = nir_src_for_ssa(input_img_deref);
+   tex->dest_type = nir_type_float32;
+   tex->is_array = false;
+   tex->coord_components = is_3d ? 3 : 2;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(&b, &tex->instr);
+
+   nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
+   nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
+
+   nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
+   tmp = nir_iadd(&b, tmp, pos_x);
+
+   nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
+
+   nir_ssa_def *outval = &tex->dest.ssa;
+   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
+                         nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
+
+   return b.shader;
 }
 
 /* Image to buffer - don't write use image accessors */
 static VkResult
 radv_device_init_meta_itob_state(struct radv_device *device)
 {
-	VkResult result;
-	nir_shader *cs = build_nir_itob_compute_shader(device, false);
-	nir_shader *cs_3d = NULL;
-
-	if (device->physical_device->rad_info.chip_class >= GFX9)
-		cs_3d = build_nir_itob_compute_shader(device, true);
-
-	/*
-	 * two descriptors one for the image being sampled
-	 * one for the buffer being written.
-	 */
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 2,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.itob.img_ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.itob.img_ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.itob.img_p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	/* compute shader */
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.itob.img_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &vk_pipeline_info, NULL,
-					     &device->meta_state.itob.pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-			.module = vk_shader_module_handle_from_nir(cs_3d),
-			.pName = "main",
-			.pSpecializationInfo = NULL,
-		};
-
-		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
-			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-			.stage = pipeline_shader_stage_3d,
-			.flags = 0,
-			.layout = device->meta_state.itob.img_p_layout,
-		};
-
-		result = radv_CreateComputePipelines(radv_device_to_handle(device),
-						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-						     1, &vk_pipeline_info_3d, NULL,
-						     &device->meta_state.itob.pipeline_3d);
-		if (result != VK_SUCCESS)
-			goto fail;
-		ralloc_free(cs_3d);
-	}
-	ralloc_free(cs);
-
-	return VK_SUCCESS;
+   VkResult result;
+   nir_shader *cs = build_nir_itob_compute_shader(device, false);
+   nir_shader *cs_3d = NULL;
+
+   if (device->physical_device->rad_info.chip_class >= GFX9)
+      cs_3d = build_nir_itob_compute_shader(device, true);
+
+   /*
+    * two descriptors one for the image being sampled
+    * one for the buffer being written.
+    */
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.itob.img_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.itob.img_ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+   };
+
+   result =
+      radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                &device->meta_state.alloc, &device->meta_state.itob.img_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   /* compute shader */
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.itob.img_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+                                        &vk_pipeline_info, NULL, &device->meta_state.itob.pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+         .module = vk_shader_module_handle_from_nir(cs_3d),
+         .pName = "main",
+         .pSpecializationInfo = NULL,
+      };
+
+      VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+         .stage = pipeline_shader_stage_3d,
+         .flags = 0,
+         .layout = device->meta_state.itob.img_p_layout,
+      };
+
+      result = radv_CreateComputePipelines(
+         radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+         &vk_pipeline_info_3d, NULL, &device->meta_state.itob.pipeline_3d);
+      if (result != VK_SUCCESS)
+         goto fail;
+      ralloc_free(cs_3d);
+   }
+   ralloc_free(cs);
+
+   return VK_SUCCESS;
 fail:
-	ralloc_free(cs);
-	ralloc_free(cs_3d);
-	return result;
+   ralloc_free(cs);
+   ralloc_free(cs_3d);
+   return result;
 }
 
 static void
 radv_device_finish_meta_itob_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->itob.img_p_layout, &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-				        state->itob.img_ds_layout,
-					&state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->itob.pipeline, &state->alloc);
-	if (device->physical_device->rad_info.chip_class >= GFX9)
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->itob.pipeline_3d, &state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout,
+                                   &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc);
+   if (device->physical_device->rad_info.chip_class >= GFX9)
+      radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc);
 }
 
 static nir_shader *
 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
 {
-	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
-	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
-							     false,
-							     false,
-							     GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_image_type(dim,
-							   false,
-							   GLSL_TYPE_FLOAT);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
-	b.shader->info.cs.local_size[0] = 8;
-	b.shader->info.cs.local_size[1] = 8;
-	b.shader->info.cs.local_size[2] = 1;
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      buf_type, "s_tex");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						       img_type, "out_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 1;
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
-	nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range=16);
-	nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range=16);
-
-	nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
-	nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
-
-	nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
-	tmp = nir_iadd(&b, tmp, pos_x);
-
-	nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
-
-	nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
-	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
-	tex->op = nir_texop_txf;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
-	tex->src[1].src_type = nir_tex_src_lod;
-	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-	tex->src[2].src_type = nir_tex_src_texture_deref;
-	tex->src[2].src = nir_src_for_ssa(input_img_deref);
-	tex->dest_type = nir_type_float32;
-	tex->is_array = false;
-	tex->coord_components = 1;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(&b, &tex->instr);
-
-	nir_ssa_def *outval = &tex->dest.ssa;
-	nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
-	                      img_coord, nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
-
-	return b.shader;
+   enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+   const struct glsl_type *buf_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
+                                                  is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
+   b.shader->info.cs.local_size[2] = 1;
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 1;
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+   nir_ssa_def *offset =
+      nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16);
+   nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
+
+   nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
+   nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
+
+   nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
+   tmp = nir_iadd(&b, tmp, pos_x);
+
+   nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
+
+   nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
+   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+   tex->op = nir_texop_txf;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
+   tex->src[1].src_type = nir_tex_src_lod;
+   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+   tex->src[2].src_type = nir_tex_src_texture_deref;
+   tex->src[2].src = nir_src_for_ssa(input_img_deref);
+   tex->dest_type = nir_type_float32;
+   tex->is_array = false;
+   tex->coord_components = 1;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(&b, &tex->instr);
+
+   nir_ssa_def *outval = &tex->dest.ssa;
+   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
+                         nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
+
+   return b.shader;
 }
 
 /* Buffer to image - don't write use image accessors */
 static VkResult
 radv_device_init_meta_btoi_state(struct radv_device *device)
 {
-	VkResult result;
-	nir_shader *cs = build_nir_btoi_compute_shader(device, false);
-	nir_shader *cs_3d = NULL;
-	if (device->physical_device->rad_info.chip_class >= GFX9)
-		cs_3d = build_nir_btoi_compute_shader(device, true);
-	/*
-	 * two descriptors one for the image being sampled
-	 * one for the buffer being written.
-	 */
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 2,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.btoi.img_ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.btoi.img_ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.btoi.img_p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	/* compute shader */
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.btoi.img_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &vk_pipeline_info, NULL,
-					     &device->meta_state.btoi.pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-			.module = vk_shader_module_handle_from_nir(cs_3d),
-			.pName = "main",
-			.pSpecializationInfo = NULL,
-		};
-
-		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
-			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-			.stage = pipeline_shader_stage_3d,
-			.flags = 0,
-			.layout = device->meta_state.btoi.img_p_layout,
-		};
-
-		result = radv_CreateComputePipelines(radv_device_to_handle(device),
-						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-						     1, &vk_pipeline_info_3d, NULL,
-						     &device->meta_state.btoi.pipeline_3d);
-		ralloc_free(cs_3d);
-	}
-	ralloc_free(cs);
-
-	return VK_SUCCESS;
+   VkResult result;
+   nir_shader *cs = build_nir_btoi_compute_shader(device, false);
+   nir_shader *cs_3d = NULL;
+   if (device->physical_device->rad_info.chip_class >= GFX9)
+      cs_3d = build_nir_btoi_compute_shader(device, true);
+   /*
+    * two descriptors one for the image being sampled
+    * one for the buffer being written.
+    */
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.btoi.img_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+   };
+
+   result =
+      radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                &device->meta_state.alloc, &device->meta_state.btoi.img_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   /* compute shader */
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.btoi.img_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+                                        &vk_pipeline_info, NULL, &device->meta_state.btoi.pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+         .module = vk_shader_module_handle_from_nir(cs_3d),
+         .pName = "main",
+         .pSpecializationInfo = NULL,
+      };
+
+      VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+         .stage = pipeline_shader_stage_3d,
+         .flags = 0,
+         .layout = device->meta_state.btoi.img_p_layout,
+      };
+
+      result = radv_CreateComputePipelines(
+         radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+         &vk_pipeline_info_3d, NULL, &device->meta_state.btoi.pipeline_3d);
+      ralloc_free(cs_3d);
+   }
+   ralloc_free(cs);
+
+   return VK_SUCCESS;
 fail:
-	ralloc_free(cs_3d);
-	ralloc_free(cs);
-	return result;
+   ralloc_free(cs_3d);
+   ralloc_free(cs);
+   return result;
 }
 
 static void
 radv_device_finish_meta_btoi_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->btoi.img_p_layout, &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-				        state->btoi.img_ds_layout,
-					&state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->btoi.pipeline, &state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->btoi.pipeline_3d, &state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout,
+                                   &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc);
 }
 
 /* Buffer to image - special path for R32G32B32 */
 static nir_shader *
 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
 {
-	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
-							     false,
-							     false,
-							     GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
-							   false,
-							   GLSL_TYPE_FLOAT);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_btoi_r32g32b32_cs");
-	b.shader->info.cs.local_size[0] = 8;
-	b.shader->info.cs.local_size[1] = 8;
-	b.shader->info.cs.local_size[2] = 1;
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      buf_type, "s_tex");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						       img_type, "out_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 1;
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
-	nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range=16);
-	nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range=16);
-	nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range=16);
-
-	nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
-	nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
-
-	nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
-	tmp = nir_iadd(&b, tmp, pos_x);
-
-	nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
-
-	nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
-
-	nir_ssa_def *global_pos =
-		nir_iadd(&b,
-			 nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
-			 nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
-
-	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
-	tex->op = nir_texop_txf;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
-	tex->src[1].src_type = nir_tex_src_lod;
-	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-	tex->src[2].src_type = nir_tex_src_texture_deref;
-	tex->src[2].src = nir_src_for_ssa(input_img_deref);
-	tex->dest_type = nir_type_float32;
-	tex->is_array = false;
-	tex->coord_components = 1;
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(&b, &tex->instr);
-
-	nir_ssa_def *outval = &tex->dest.ssa;
-
-	for (int chan = 0; chan < 3; chan++) {
-		nir_ssa_def *local_pos =
-                       nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
-
-		nir_ssa_def *coord =
-                       nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
-
-		nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
-		                      coord, nir_ssa_undef(&b, 1, 32),
-		                      nir_channel(&b, outval, chan), nir_imm_int(&b, 0));
-	}
-
-	return b.shader;
+   const struct glsl_type *buf_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_btoi_r32g32b32_cs");
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
+   b.shader->info.cs.local_size[2] = 1;
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 1;
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+   nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);
+   nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 16);
+   nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
+
+   nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
+   nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
+
+   nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
+   tmp = nir_iadd(&b, tmp, pos_x);
+
+   nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
+
+   nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
+
+   nir_ssa_def *global_pos =
+      nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
+               nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
+
+   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+   tex->op = nir_texop_txf;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
+   tex->src[1].src_type = nir_tex_src_lod;
+   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+   tex->src[2].src_type = nir_tex_src_texture_deref;
+   tex->src[2].src = nir_src_for_ssa(input_img_deref);
+   tex->dest_type = nir_type_float32;
+   tex->is_array = false;
+   tex->coord_components = 1;
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(&b, &tex->instr);
+
+   nir_ssa_def *outval = &tex->dest.ssa;
+
+   for (int chan = 0; chan < 3; chan++) {
+      nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
+
+      nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
+
+      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
+                            nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, chan),
+                            nir_imm_int(&b, 0));
+   }
+
+   return b.shader;
 }
 
 static VkResult
 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
 {
-	VkResult result;
-	nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
-
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 2,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.btoi_r32g32b32.img_ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.btoi_r32g32b32.img_p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	/* compute shader */
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.btoi_r32g32b32.img_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &vk_pipeline_info, NULL,
-					     &device->meta_state.btoi_r32g32b32.pipeline);
+   VkResult result;
+   nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
+
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.btoi_r32g32b32.img_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                      &device->meta_state.alloc,
+                                      &device->meta_state.btoi_r32g32b32.img_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   /* compute shader */
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &vk_pipeline_info, NULL, &device->meta_state.btoi_r32g32b32.pipeline);
 
 fail:
-	ralloc_free(cs);
-	return result;
+   ralloc_free(cs);
+   return result;
 }
 
 static void
 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->btoi_r32g32b32.img_p_layout, &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-				        state->btoi_r32g32b32.img_ds_layout,
-					&state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->btoi_r32g32b32.pipeline, &state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+                                   state->btoi_r32g32b32.img_ds_layout, &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline,
+                        &state->alloc);
 }
 
 static nir_shader *
 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples)
 {
-	bool is_multisampled = samples > 1;
-	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : is_multisampled ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
-	const struct glsl_type *buf_type = glsl_sampler_type(dim,
-							     false,
-							     false,
-							     GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_image_type(dim,
-							   false,
-							   GLSL_TYPE_FLOAT);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples);
-	b.shader->info.cs.local_size[0] = 8;
-	b.shader->info.cs.local_size[1] = 8;
-	b.shader->info.cs.local_size[2] = 1;
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      buf_type, "s_tex");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						       img_type, "out_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 1;
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
-	nir_ssa_def *src_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range=24);
-	nir_ssa_def *dst_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range=24);
-
-	nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
-	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
-	nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
-
-	nir_tex_instr *tex_instr[8];
-	for (uint32_t i = 0; i < samples; i++) {
-		tex_instr[i] = nir_tex_instr_create(b.shader, is_multisampled ? 4 : 3);
-
-		nir_tex_instr *tex = tex_instr[i];
-		tex->sampler_dim = dim;
-		tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
-		tex->src[0].src_type = nir_tex_src_coord;
-		tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
-		tex->src[1].src_type = nir_tex_src_lod;
-		tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-		tex->src[2].src_type = nir_tex_src_texture_deref;
-		tex->src[2].src = nir_src_for_ssa(input_img_deref);
-		if (is_multisampled) {
-			tex->src[3].src_type = nir_tex_src_ms_index;
-			tex->src[3].src = nir_src_for_ssa(nir_imm_int(&b, i));
-		}
-		tex->dest_type = nir_type_float32;
-		tex->is_array = false;
-		tex->coord_components = is_3d ? 3 : 2;
-
-		nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-		nir_builder_instr_insert(&b, &tex->instr);
-	}
-
-	for (uint32_t i = 0; i < samples; i++) {
-		nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
-		nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
-		                      dst_coord, nir_imm_int(&b, i), outval, nir_imm_int(&b, 0));
-	}
-
-	return b.shader;
+   bool is_multisampled = samples > 1;
+   enum glsl_sampler_dim dim = is_3d             ? GLSL_SAMPLER_DIM_3D
+                               : is_multisampled ? GLSL_SAMPLER_DIM_MS
+                                                 : GLSL_SAMPLER_DIM_2D;
+   const struct glsl_type *buf_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
+   nir_builder b = nir_builder_init_simple_shader(
+      MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples);
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
+   b.shader->info.cs.local_size[2] = 1;
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 1;
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+   nir_ssa_def *src_offset =
+      nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 24);
+   nir_ssa_def *dst_offset =
+      nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = 24);
+
+   nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
+   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+   nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
+
+   nir_tex_instr *tex_instr[8];
+   for (uint32_t i = 0; i < samples; i++) {
+      tex_instr[i] = nir_tex_instr_create(b.shader, is_multisampled ? 4 : 3);
+
+      nir_tex_instr *tex = tex_instr[i];
+      tex->sampler_dim = dim;
+      tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
+      tex->src[0].src_type = nir_tex_src_coord;
+      tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
+      tex->src[1].src_type = nir_tex_src_lod;
+      tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+      tex->src[2].src_type = nir_tex_src_texture_deref;
+      tex->src[2].src = nir_src_for_ssa(input_img_deref);
+      if (is_multisampled) {
+         tex->src[3].src_type = nir_tex_src_ms_index;
+         tex->src[3].src = nir_src_for_ssa(nir_imm_int(&b, i));
+      }
+      tex->dest_type = nir_type_float32;
+      tex->is_array = false;
+      tex->coord_components = is_3d ? 3 : 2;
+
+      nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+      nir_builder_instr_insert(&b, &tex->instr);
+   }
+
+   for (uint32_t i = 0; i < samples; i++) {
+      nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
+      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
+                            nir_imm_int(&b, i), outval, nir_imm_int(&b, 0));
+   }
+
+   return b.shader;
 }
 
 static VkResult
-create_itoi_pipeline(struct radv_device *device,
-		     int samples,
-		     VkPipeline *pipeline)
+create_itoi_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
 {
-	struct radv_meta_state *state = &device->meta_state;
-	nir_shader *cs = build_nir_itoi_compute_shader(device, false, samples);
-	VkResult result;
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = state->itoi.img_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&state->cache),
-					     1, &vk_pipeline_info, NULL,
-					     pipeline);
-	ralloc_free(cs);
-	return result;
+   struct radv_meta_state *state = &device->meta_state;
+   nir_shader *cs = build_nir_itoi_compute_shader(device, false, samples);
+   VkResult result;
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = state->itoi.img_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                        radv_pipeline_cache_to_handle(&state->cache), 1,
+                                        &vk_pipeline_info, NULL, pipeline);
+   ralloc_free(cs);
+   return result;
 }
 
 /* image to image - don't write use image accessors */
 static VkResult
 radv_device_init_meta_itoi_state(struct radv_device *device)
 {
-	VkResult result;
-
-	/*
-	 * two descriptors one for the image being sampled
-	 * one for the buffer being written.
-	 */
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 2,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.itoi.img_ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.itoi.img_ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.itoi.img_p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
-		uint32_t samples = 1 << i;
-		result = create_itoi_pipeline(device, samples,
-					      &device->meta_state.itoi.pipeline[i]);
-		if (result != VK_SUCCESS)
-			goto fail;
-	}
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		nir_shader *cs_3d = build_nir_itoi_compute_shader(device, true, 1);
-
-		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-			.module = vk_shader_module_handle_from_nir(cs_3d),
-			.pName = "main",
-			.pSpecializationInfo = NULL,
-		};
-
-		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
-			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-			.stage = pipeline_shader_stage_3d,
-			.flags = 0,
-			.layout = device->meta_state.itoi.img_p_layout,
-		};
-
-		result = radv_CreateComputePipelines(radv_device_to_handle(device),
-						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-						     1, &vk_pipeline_info_3d, NULL,
-						     &device->meta_state.itoi.pipeline_3d);
-		ralloc_free(cs_3d);
-	}
-
-	return VK_SUCCESS;
+   VkResult result;
+
+   /*
+    * two descriptors one for the image being sampled
+    * one for the buffer being written.
+    */
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.itoi.img_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
+   };
+
+   result =
+      radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                &device->meta_state.alloc, &device->meta_state.itoi.img_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
+      uint32_t samples = 1 << i;
+      result = create_itoi_pipeline(device, samples, &device->meta_state.itoi.pipeline[i]);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      nir_shader *cs_3d = build_nir_itoi_compute_shader(device, true, 1);
+
+      VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+         .module = vk_shader_module_handle_from_nir(cs_3d),
+         .pName = "main",
+         .pSpecializationInfo = NULL,
+      };
+
+      VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+         .stage = pipeline_shader_stage_3d,
+         .flags = 0,
+         .layout = device->meta_state.itoi.img_p_layout,
+      };
+
+      result = radv_CreateComputePipelines(
+         radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+         &vk_pipeline_info_3d, NULL, &device->meta_state.itoi.pipeline_3d);
+      ralloc_free(cs_3d);
+   }
+
+   return VK_SUCCESS;
 fail:
-	return result;
+   return result;
 }
 
 static void
 radv_device_finish_meta_itoi_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->itoi.img_p_layout, &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-				        state->itoi.img_ds_layout,
-					&state->alloc);
-
-	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->itoi.pipeline[i], &state->alloc);
-	}
-
-	if (device->physical_device->rad_info.chip_class >= GFX9)
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->itoi.pipeline_3d, &state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout,
+                                   &state->alloc);
+
+   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+      radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc);
+   }
+
+   if (device->physical_device->rad_info.chip_class >= GFX9)
+      radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d, &state->alloc);
 }
 
 static nir_shader *
 build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
 {
-	const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
-							 false,
-							 false,
-							 GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
-							   false,
-							   GLSL_TYPE_FLOAT);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_itoi_r32g32b32_cs");
-	b.shader->info.cs.local_size[0] = 8;
-	b.shader->info.cs.local_size[1] = 8;
-	b.shader->info.cs.local_size[2] = 1;
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      type, "input_img");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						      img_type, "output_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 1;
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
-	nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range=24);
-	nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range=24);
-
-	nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2);
-	nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2);
-
-	nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
-	nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
-
-	nir_ssa_def *src_global_pos =
-		nir_iadd(&b,
-			 nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
-			 nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
-
-	nir_ssa_def *dst_global_pos =
-		nir_iadd(&b,
-			 nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
-			 nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
-
-	for (int chan = 0; chan < 3; chan++) {
-		/* src */
-		nir_ssa_def *src_local_pos =
-			nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
-
-		nir_ssa_def *src_coord =
-			nir_vec4(&b, src_local_pos, src_local_pos,
-				 src_local_pos, src_local_pos);
-
-		nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
-		nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-		tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
-		tex->op = nir_texop_txf;
-		tex->src[0].src_type = nir_tex_src_coord;
-		tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
-		tex->src[1].src_type = nir_tex_src_lod;
-		tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-		tex->src[2].src_type = nir_tex_src_texture_deref;
-		tex->src[2].src = nir_src_for_ssa(input_img_deref);
-		tex->dest_type = nir_type_float32;
-		tex->is_array = false;
-		tex->coord_components = 1;
-		nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-		nir_builder_instr_insert(&b, &tex->instr);
-
-		nir_ssa_def *outval = &tex->dest.ssa;
-
-		/* dst */
-		nir_ssa_def *dst_local_pos =
-			nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
-
-		nir_ssa_def *dst_coord =
-			nir_vec4(&b, dst_local_pos, dst_local_pos,
-				 dst_local_pos, dst_local_pos);
-
-		nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
-		                      dst_coord, nir_ssa_undef(&b, 1, 32),
-		                      nir_channel(&b, outval, 0), nir_imm_int(&b, 0));
-	}
-
-	return b.shader;
+   const struct glsl_type *type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_itoi_r32g32b32_cs");
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
+   b.shader->info.cs.local_size[2] = 1;
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *output_img =
+      nir_variable_create(b.shader, nir_var_uniform, img_type, "output_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 1;
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+   nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 24);
+   nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
+
+   nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2);
+   nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2);
+
+   nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
+   nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
+
+   nir_ssa_def *src_global_pos =
+      nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
+               nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
+
+   nir_ssa_def *dst_global_pos =
+      nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
+               nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
+
+   for (int chan = 0; chan < 3; chan++) {
+      /* src */
+      nir_ssa_def *src_local_pos = nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
+
+      nir_ssa_def *src_coord =
+         nir_vec4(&b, src_local_pos, src_local_pos, src_local_pos, src_local_pos);
+
+      nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+      nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+      tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+      tex->op = nir_texop_txf;
+      tex->src[0].src_type = nir_tex_src_coord;
+      tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
+      tex->src[1].src_type = nir_tex_src_lod;
+      tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+      tex->src[2].src_type = nir_tex_src_texture_deref;
+      tex->src[2].src = nir_src_for_ssa(input_img_deref);
+      tex->dest_type = nir_type_float32;
+      tex->is_array = false;
+      tex->coord_components = 1;
+      nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+      nir_builder_instr_insert(&b, &tex->instr);
+
+      nir_ssa_def *outval = &tex->dest.ssa;
+
+      /* dst */
+      nir_ssa_def *dst_local_pos = nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
+
+      nir_ssa_def *dst_coord =
+         nir_vec4(&b, dst_local_pos, dst_local_pos, dst_local_pos, dst_local_pos);
+
+      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
+                            nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, 0),
+                            nir_imm_int(&b, 0));
+   }
+
+   return b.shader;
 }
 
 /* Image to image - special path for R32G32B32 */
 static VkResult
 radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
 {
-	VkResult result;
-	nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
-
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 2,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.itoi_r32g32b32.img_ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.itoi_r32g32b32.img_p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	/* compute shader */
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.itoi_r32g32b32.img_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &vk_pipeline_info, NULL,
-					     &device->meta_state.itoi_r32g32b32.pipeline);
+   VkResult result;
+   nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
+
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.itoi_r32g32b32.img_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                      &device->meta_state.alloc,
+                                      &device->meta_state.itoi_r32g32b32.img_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   /* compute shader */
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.itoi_r32g32b32.img_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &vk_pipeline_info, NULL, &device->meta_state.itoi_r32g32b32.pipeline);
 
 fail:
-	ralloc_free(cs);
-	return result;
+   ralloc_free(cs);
+   return result;
 }
 
 static void
 radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->itoi_r32g32b32.img_p_layout, &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-				        state->itoi_r32g32b32.img_ds_layout,
-					&state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->itoi_r32g32b32.pipeline, &state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+                                   state->itoi_r32g32b32.img_ds_layout, &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline,
+                        &state->alloc);
 }
 
 static nir_shader *
 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples)
 {
-	bool is_multisampled = samples > 1;
-	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : is_multisampled ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
-	const struct glsl_type *img_type = glsl_image_type(dim,
-							   false,
-							   GLSL_TYPE_FLOAT);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
-	b.shader->info.cs.local_size[0] = 8;
-	b.shader->info.cs.local_size[1] = 8;
-	b.shader->info.cs.local_size[2] = 1;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						       img_type, "out_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 0;
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
-	nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range=20);
-	nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range=20);
-
-	nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), layer);
-
-	nir_ssa_def *comps[4];
-	comps[0] = nir_channel(&b, global_id, 0);
-	comps[1] = nir_channel(&b, global_id, 1);
-	comps[2] = global_z;
-	comps[3] = nir_imm_int(&b, 0);
-	global_id = nir_vec(&b, comps, 4);
-
-	for (uint32_t i = 0; i < samples; i++) {
-		nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
-		                      global_id, nir_imm_int(&b, i), clear_val,
-				      nir_imm_int(&b, 0));
-	}
-
-	return b.shader;
+   bool is_multisampled = samples > 1;
+   enum glsl_sampler_dim dim = is_3d             ? GLSL_SAMPLER_DIM_3D
+                               : is_multisampled ? GLSL_SAMPLER_DIM_MS
+                                                 : GLSL_SAMPLER_DIM_2D;
+   const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
+   nir_builder b = nir_builder_init_simple_shader(
+      MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
+   b.shader->info.cs.local_size[2] = 1;
+
+   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 0;
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+   nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 20);
+   nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
+
+   nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), layer);
+
+   nir_ssa_def *comps[4];
+   comps[0] = nir_channel(&b, global_id, 0);
+   comps[1] = nir_channel(&b, global_id, 1);
+   comps[2] = global_z;
+   comps[3] = nir_imm_int(&b, 0);
+   global_id = nir_vec(&b, comps, 4);
+
+   for (uint32_t i = 0; i < samples; i++) {
+      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
+                            nir_imm_int(&b, i), clear_val, nir_imm_int(&b, 0));
+   }
+
+   return b.shader;
 }
 
 static VkResult
-create_cleari_pipeline(struct radv_device *device,
-		       int samples,
-		       VkPipeline *pipeline)
+create_cleari_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
 {
-	nir_shader *cs = build_nir_cleari_compute_shader(device, false, samples);
-	VkResult result;
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.cleari.img_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &vk_pipeline_info, NULL,
-					     pipeline);
-	ralloc_free(cs);
-	return result;
+   nir_shader *cs = build_nir_cleari_compute_shader(device, false, samples);
+   VkResult result;
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.cleari.img_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+                                        &vk_pipeline_info, NULL, pipeline);
+   ralloc_free(cs);
+   return result;
 }
 
 static VkResult
 radv_device_init_meta_cleari_state(struct radv_device *device)
 {
-	VkResult result;
-
-	/*
-	 * two descriptors one for the image being sampled
-	 * one for the buffer being written.
-	 */
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 1,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.cleari.img_ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.cleari.img_ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.cleari.img_p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
-		uint32_t samples = 1 << i;
-		result = create_cleari_pipeline(device, samples,
-						&device->meta_state.cleari.pipeline[i]);
-		if (result != VK_SUCCESS)
-			goto fail;
-	}
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		nir_shader *cs_3d = build_nir_cleari_compute_shader(device, true, 1);
-
-		/* compute shader */
-		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-			.module = vk_shader_module_handle_from_nir(cs_3d),
-			.pName = "main",
-			.pSpecializationInfo = NULL,
-		};
-
-		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
-			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-			.stage = pipeline_shader_stage_3d,
-			.flags = 0,
-			.layout = device->meta_state.cleari.img_p_layout,
-		};
-
-		result = radv_CreateComputePipelines(radv_device_to_handle(device),
-						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-						     1, &vk_pipeline_info_3d, NULL,
-						     &device->meta_state.cleari.pipeline_3d);
-		ralloc_free(cs_3d);
-	}
-
-	return VK_SUCCESS;
+   VkResult result;
+
+   /*
+    * two descriptors one for the image being sampled
+    * one for the buffer being written.
+    */
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 1,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.cleari.img_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
+   };
+
+   result =
+      radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                &device->meta_state.alloc, &device->meta_state.cleari.img_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
+      uint32_t samples = 1 << i;
+      result = create_cleari_pipeline(device, samples, &device->meta_state.cleari.pipeline[i]);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      nir_shader *cs_3d = build_nir_cleari_compute_shader(device, true, 1);
+
+      /* compute shader */
+      VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+         .module = vk_shader_module_handle_from_nir(cs_3d),
+         .pName = "main",
+         .pSpecializationInfo = NULL,
+      };
+
+      VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+         .stage = pipeline_shader_stage_3d,
+         .flags = 0,
+         .layout = device->meta_state.cleari.img_p_layout,
+      };
+
+      result = radv_CreateComputePipelines(
+         radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+         &vk_pipeline_info_3d, NULL, &device->meta_state.cleari.pipeline_3d);
+      ralloc_free(cs_3d);
+   }
+
+   return VK_SUCCESS;
 fail:
-	return result;
+   return result;
 }
 
 static void
 radv_device_finish_meta_cleari_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->cleari.img_p_layout, &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-				        state->cleari.img_ds_layout,
-					&state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
 
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout,
+                                   &state->alloc);
 
-	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->cleari.pipeline[i], &state->alloc);
-	}
+   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+      radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc);
+   }
 
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->cleari.pipeline_3d, &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline_3d, &state->alloc);
 }
 
 /* Special path for clearing R32G32B32 images using a compute shader. */
 static nir_shader *
 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
 {
-	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
-							   false,
-							   GLSL_TYPE_FLOAT);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_cleari_r32g32b32_cs");
-	b.shader->info.cs.local_size[0] = 8;
-	b.shader->info.cs.local_size[1] = 8;
-	b.shader->info.cs.local_size[2] = 1;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						       img_type, "out_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 0;
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
-	nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range=16);
-	nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range=16);
-
-	nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
-	nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
-
-	nir_ssa_def *global_pos =
-		nir_iadd(&b,
-			 nir_imul(&b, global_y, stride),
-			 nir_imul(&b, global_x, nir_imm_int(&b, 3)));
-
-	for (unsigned chan = 0; chan < 3; chan++) {
-		nir_ssa_def *local_pos =
-			nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
-
-		nir_ssa_def *coord =
-			nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
-
-		nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
-		                      coord, nir_ssa_undef(&b, 1, 32),
-		                      nir_channel(&b, clear_val, chan), nir_imm_int(&b, 0));
-	}
-
-	return b.shader;
+   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_cleari_r32g32b32_cs");
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
+   b.shader->info.cs.local_size[2] = 1;
+
+   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 0;
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+   nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 16);
+   nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
+
+   nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
+   nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
+
+   nir_ssa_def *global_pos =
+      nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul(&b, global_x, nir_imm_int(&b, 3)));
+
+   for (unsigned chan = 0; chan < 3; chan++) {
+      nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
+
+      nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
+
+      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
+                            nir_ssa_undef(&b, 1, 32), nir_channel(&b, clear_val, chan),
+                            nir_imm_int(&b, 0));
+   }
+
+   return b.shader;
 }
 
 static VkResult
 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
 {
-	VkResult result;
-	nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
-
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 1,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.cleari_r32g32b32.img_ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					   &pl_create_info,
-					   &device->meta_state.alloc,
-					   &device->meta_state.cleari_r32g32b32.img_p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	/* compute shader */
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.cleari_r32g32b32.img_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &vk_pipeline_info, NULL,
-					     &device->meta_state.cleari_r32g32b32.pipeline);
+   VkResult result;
+   nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
+
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 1,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.cleari_r32g32b32.img_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                      &device->meta_state.alloc,
+                                      &device->meta_state.cleari_r32g32b32.img_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   /* compute shader */
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &vk_pipeline_info, NULL, &device->meta_state.cleari_r32g32b32.pipeline);
 
 fail:
-	ralloc_free(cs);
-	return result;
+   ralloc_free(cs);
+   return result;
 }
 
 static void
 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->cleari_r32g32b32.img_p_layout,
-				   &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-				        state->cleari_r32g32b32.img_ds_layout,
-					&state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->cleari_r32g32b32.pipeline, &state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+                                   state->cleari_r32g32b32.img_ds_layout, &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline,
+                        &state->alloc);
 }
 
 void
 radv_device_finish_meta_bufimage_state(struct radv_device *device)
 {
-	radv_device_finish_meta_itob_state(device);
-	radv_device_finish_meta_btoi_state(device);
-	radv_device_finish_meta_btoi_r32g32b32_state(device);
-	radv_device_finish_meta_itoi_state(device);
-	radv_device_finish_meta_itoi_r32g32b32_state(device);
-	radv_device_finish_meta_cleari_state(device);
-	radv_device_finish_meta_cleari_r32g32b32_state(device);
+   radv_device_finish_meta_itob_state(device);
+   radv_device_finish_meta_btoi_state(device);
+   radv_device_finish_meta_btoi_r32g32b32_state(device);
+   radv_device_finish_meta_itoi_state(device);
+   radv_device_finish_meta_itoi_r32g32b32_state(device);
+   radv_device_finish_meta_cleari_state(device);
+   radv_device_finish_meta_cleari_r32g32b32_state(device);
 }
 
 VkResult
 radv_device_init_meta_bufimage_state(struct radv_device *device)
 {
-	VkResult result;
+   VkResult result;
 
-	result = radv_device_init_meta_itob_state(device);
-	if (result != VK_SUCCESS)
-		goto fail_itob;
+   result = radv_device_init_meta_itob_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_itob;
 
-	result = radv_device_init_meta_btoi_state(device);
-	if (result != VK_SUCCESS)
-		goto fail_btoi;
+   result = radv_device_init_meta_btoi_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_btoi;
 
-	result = radv_device_init_meta_btoi_r32g32b32_state(device);
-	if (result != VK_SUCCESS)
-		goto fail_btoi_r32g32b32;
+   result = radv_device_init_meta_btoi_r32g32b32_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_btoi_r32g32b32;
 
-	result = radv_device_init_meta_itoi_state(device);
-	if (result != VK_SUCCESS)
-		goto fail_itoi;
+   result = radv_device_init_meta_itoi_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_itoi;
 
-	result = radv_device_init_meta_itoi_r32g32b32_state(device);
-	if (result != VK_SUCCESS)
-		goto fail_itoi_r32g32b32;
+   result = radv_device_init_meta_itoi_r32g32b32_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_itoi_r32g32b32;
 
-	result = radv_device_init_meta_cleari_state(device);
-	if (result != VK_SUCCESS)
-		goto fail_cleari;
+   result = radv_device_init_meta_cleari_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_cleari;
 
-	result = radv_device_init_meta_cleari_r32g32b32_state(device);
-	if (result != VK_SUCCESS)
-		goto fail_cleari_r32g32b32;
+   result = radv_device_init_meta_cleari_r32g32b32_state(device);
+   if (result != VK_SUCCESS)
+      goto fail_cleari_r32g32b32;
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 fail_cleari_r32g32b32:
-	radv_device_finish_meta_cleari_r32g32b32_state(device);
+   radv_device_finish_meta_cleari_r32g32b32_state(device);
 fail_cleari:
-	radv_device_finish_meta_cleari_state(device);
+   radv_device_finish_meta_cleari_state(device);
 fail_itoi_r32g32b32:
-	radv_device_finish_meta_itoi_r32g32b32_state(device);
+   radv_device_finish_meta_itoi_r32g32b32_state(device);
 fail_itoi:
-	radv_device_finish_meta_itoi_state(device);
+   radv_device_finish_meta_itoi_state(device);
 fail_btoi_r32g32b32:
-	radv_device_finish_meta_btoi_r32g32b32_state(device);
+   radv_device_finish_meta_btoi_r32g32b32_state(device);
 fail_btoi:
-	radv_device_finish_meta_btoi_state(device);
+   radv_device_finish_meta_btoi_state(device);
 fail_itob:
-	radv_device_finish_meta_itob_state(device);
-	return result;
+   radv_device_finish_meta_itob_state(device);
+   return result;
 }
 
 static void
-create_iview(struct radv_cmd_buffer *cmd_buffer,
-             struct radv_meta_blit2d_surf *surf,
+create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
              struct radv_image_view *iview)
 {
-	VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
-		radv_meta_get_view_type(surf->image);
-	radv_image_view_init(iview, cmd_buffer->device,
-			     &(VkImageViewCreateInfo) {
-				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-					     .image = radv_image_to_handle(surf->image),
-					     .viewType = view_type,
-					     .format = surf->format,
-					     .subresourceRange = {
-					     .aspectMask = surf->aspect_mask,
-					     .baseMipLevel = surf->level,
-					     .levelCount = 1,
-					     .baseArrayLayer = surf->layer,
-					     .layerCount = 1
-				     },
-			     }, &(struct radv_image_view_extra_create_info) {
-				     .disable_compression = surf->disable_compression,
-			     });
+   VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9
+                                  ? VK_IMAGE_VIEW_TYPE_2D
+                                  : radv_meta_get_view_type(surf->image);
+   radv_image_view_init(iview, cmd_buffer->device,
+                        &(VkImageViewCreateInfo){
+                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                           .image = radv_image_to_handle(surf->image),
+                           .viewType = view_type,
+                           .format = surf->format,
+                           .subresourceRange = {.aspectMask = surf->aspect_mask,
+                                                .baseMipLevel = surf->level,
+                                                .levelCount = 1,
+                                                .baseArrayLayer = surf->layer,
+                                                .layerCount = 1},
+                        },
+                        &(struct radv_image_view_extra_create_info){
+                           .disable_compression = surf->disable_compression,
+                        });
 }
 
 static void
-create_bview(struct radv_cmd_buffer *cmd_buffer,
-	     struct radv_buffer *buffer,
-	     unsigned offset,
-	     VkFormat format,
-	     struct radv_buffer_view *bview)
+create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset,
+             VkFormat format, struct radv_buffer_view *bview)
 {
-	radv_buffer_view_init(bview, cmd_buffer->device,
-			      &(VkBufferViewCreateInfo) {
-				      .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
-				      .flags = 0,
-				      .buffer = radv_buffer_to_handle(buffer),
-				      .format = format,
-				      .offset = offset,
-				      .range = VK_WHOLE_SIZE,
-			      });
-
+   radv_buffer_view_init(bview, cmd_buffer->device,
+                         &(VkBufferViewCreateInfo){
+                            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+                            .flags = 0,
+                            .buffer = radv_buffer_to_handle(buffer),
+                            .format = format,
+                            .offset = offset,
+                            .range = VK_WHOLE_SIZE,
+                         });
 }
 
 static void
-create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer,
-			 struct radv_meta_blit2d_surf *surf,
-			 VkBufferUsageFlagBits usage,
-			 VkBuffer *buffer)
+create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
+                         VkBufferUsageFlagBits usage, VkBuffer *buffer)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_device_memory mem = { .bo = surf->image->bo };
-
-	radv_CreateBuffer(radv_device_to_handle(device),
-			  &(VkBufferCreateInfo) {
-				.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-				.flags = 0,
-				.size = surf->image->size,
-				.usage = usage,
-				.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-			  }, NULL, buffer);
-
-	radv_BindBufferMemory2(radv_device_to_handle(device), 1,
-			       (VkBindBufferMemoryInfo[]) {
-				    {
-					.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
-					.buffer = *buffer,
-					.memory = radv_device_memory_to_handle(&mem),
-					.memoryOffset = surf->image->offset,
-				    }
-			       });
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_device_memory mem = {.bo = surf->image->bo};
+
+   radv_CreateBuffer(radv_device_to_handle(device),
+                     &(VkBufferCreateInfo){
+                        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+                        .flags = 0,
+                        .size = surf->image->size,
+                        .usage = usage,
+                        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+                     },
+                     NULL, buffer);
+
+   radv_BindBufferMemory2(radv_device_to_handle(device), 1,
+                          (VkBindBufferMemoryInfo[]){{
+                             .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
+                             .buffer = *buffer,
+                             .memory = radv_device_memory_to_handle(&mem),
+                             .memoryOffset = surf->image->offset,
+                          }});
 }
 
 static void
-create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
-			   struct radv_buffer *buffer,
-			   unsigned offset,
-			   VkFormat src_format,
-			   struct radv_buffer_view *bview)
+create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
+                           unsigned offset, VkFormat src_format, struct radv_buffer_view *bview)
 {
-	VkFormat format;
-
-	switch (src_format) {
-	case VK_FORMAT_R32G32B32_UINT:
-		format = VK_FORMAT_R32_UINT;
-		break;
-	case VK_FORMAT_R32G32B32_SINT:
-		format = VK_FORMAT_R32_SINT;
-		break;
-	case VK_FORMAT_R32G32B32_SFLOAT:
-		format = VK_FORMAT_R32_SFLOAT;
-		break;
-	default:
-		unreachable("invalid R32G32B32 format");
-	}
-
-	radv_buffer_view_init(bview, cmd_buffer->device,
-			      &(VkBufferViewCreateInfo) {
-				      .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
-				      .flags = 0,
-				      .buffer = radv_buffer_to_handle(buffer),
-				      .format = format,
-				      .offset = offset,
-				      .range = VK_WHOLE_SIZE,
-			      });
+   VkFormat format;
+
+   switch (src_format) {
+   case VK_FORMAT_R32G32B32_UINT:
+      format = VK_FORMAT_R32_UINT;
+      break;
+   case VK_FORMAT_R32G32B32_SINT:
+      format = VK_FORMAT_R32_SINT;
+      break;
+   case VK_FORMAT_R32G32B32_SFLOAT:
+      format = VK_FORMAT_R32_SFLOAT;
+      break;
+   default:
+      unreachable("invalid R32G32B32 format");
+   }
+
+   radv_buffer_view_init(bview, cmd_buffer->device,
+                         &(VkBufferViewCreateInfo){
+                            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+                            .flags = 0,
+                            .buffer = radv_buffer_to_handle(buffer),
+                            .format = format,
+                            .offset = offset,
+                            .range = VK_WHOLE_SIZE,
+                         });
 }
 
 static unsigned
 get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
-			       struct radv_meta_blit2d_surf *surf)
+                               struct radv_meta_blit2d_surf *surf)
 {
-	unsigned stride;
+   unsigned stride;
 
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
-		stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
-	} else {
-		stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
-	}
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+      stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
+   } else {
+      stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
+   }
 
-	return stride;
+   return stride;
 }
 
 static void
-itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
-		      struct radv_image_view *src,
-		      struct radv_buffer_view *dst)
+itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
+                      struct radv_buffer_view *dst)
 {
-	struct radv_device *device = cmd_buffer->device;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.itob.img_p_layout,
-				      0, /* set */
-				      2, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-				                      .pImageInfo = (VkDescriptorImageInfo[]) {
-				                              {
-				                                      .sampler = VK_NULL_HANDLE,
-				                                      .imageView = radv_image_view_to_handle(src),
-				                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-				                              },
-				                      }
-				              },
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 1,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
-				              }
-				      });
+   struct radv_device *device = cmd_buffer->device;
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, /* set */
+      2, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){
+         {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+          .dstBinding = 0,
+          .dstArrayElement = 0,
+          .descriptorCount = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+          .pImageInfo =
+             (VkDescriptorImageInfo[]){
+                {
+                   .sampler = VK_NULL_HANDLE,
+                   .imageView = radv_image_view_to_handle(src),
+                   .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                },
+             }},
+         {
+            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+            .dstBinding = 1,
+            .dstArrayElement = 0,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
+         }});
 }
 
 void
-radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
-			  struct radv_meta_blit2d_surf *src,
-			  struct radv_meta_blit2d_buffer *dst,
-			  unsigned num_rects,
-			  struct radv_meta_blit2d_rect *rects)
+radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
+                          struct radv_meta_blit2d_buffer *dst, unsigned num_rects,
+                          struct radv_meta_blit2d_rect *rects)
 {
-	VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_image_view src_view;
-	struct radv_buffer_view dst_view;
-
-	create_iview(cmd_buffer, src, &src_view);
-	create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
-	itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
-
-	if (device->physical_device->rad_info.chip_class >= GFX9 &&
-	    src->image->type == VK_IMAGE_TYPE_3D)
-		pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
-	for (unsigned r = 0; r < num_rects; ++r) {
-		unsigned push_constants[4] = {
-			rects[r].src_x,
-			rects[r].src_y,
-			src->layer,
-			dst->pitch
-		};
-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.itob.img_p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
-				      push_constants);
-
-		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
-	}
+   VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_image_view src_view;
+   struct radv_buffer_view dst_view;
+
+   create_iview(cmd_buffer, src, &src_view);
+   create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
+   itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+   if (device->physical_device->rad_info.chip_class >= GFX9 && src->image->type == VK_IMAGE_TYPE_3D)
+      pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        pipeline);
+
+   for (unsigned r = 0; r < num_rects; ++r) {
+      unsigned push_constants[4] = {rects[r].src_x, rects[r].src_y, src->layer, dst->pitch};
+      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                            device->meta_state.itob.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
+                            16, push_constants);
+
+      radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+   }
 }
 
 static void
-btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
-				struct radv_buffer_view *src,
-				struct radv_buffer_view *dst)
+btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
+                                struct radv_buffer_view *dst)
 {
-	struct radv_device *device = cmd_buffer->device;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.btoi_r32g32b32.img_p_layout,
-				      0, /* set */
-				      2, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
-				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
-				              },
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 1,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
-				              }
-				      });
+   struct radv_device *device = cmd_buffer->device;
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout,
+      0, /* set */
+      2, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){
+         {
+            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+            .dstBinding = 0,
+            .dstArrayElement = 0,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
+         },
+         {
+            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+            .dstBinding = 1,
+            .dstArrayElement = 0,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
+         }});
 }
 
 static void
 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
-				       struct radv_meta_blit2d_buffer *src,
-				       struct radv_meta_blit2d_surf *dst,
-				       unsigned num_rects,
-				       struct radv_meta_blit2d_rect *rects)
+                                       struct radv_meta_blit2d_buffer *src,
+                                       struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+                                       struct radv_meta_blit2d_rect *rects)
 {
-	VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_buffer_view src_view, dst_view;
-	unsigned dst_offset = 0;
-	unsigned stride;
-	VkBuffer buffer;
-
-	/* This special btoi path for R32G32B32 formats will write the linear
-	 * image as a buffer with the same underlying memory. The compute
-	 * shader will copy all components separately using a R32 format.
-	 */
-	create_buffer_from_image(cmd_buffer, dst,
-				 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
-				 &buffer);
-
-	create_bview(cmd_buffer, src->buffer, src->offset,
-		     src->format, &src_view);
-	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
-				   dst_offset, dst->format, &dst_view);
-	btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
-	stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
-
-	for (unsigned r = 0; r < num_rects; ++r) {
-		unsigned push_constants[4] = {
-			rects[r].dst_x,
-			rects[r].dst_y,
-			stride,
-			src->pitch,
-		};
-
-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.btoi_r32g32b32.img_p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
-				      push_constants);
-
-		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
-	}
-
-	radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
+   VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_buffer_view src_view, dst_view;
+   unsigned dst_offset = 0;
+   unsigned stride;
+   VkBuffer buffer;
+
+   /* This special btoi path for R32G32B32 formats will write the linear
+    * image as a buffer with the same underlying memory. The compute
+    * shader will copy all components separately using a R32 format.
+    */
+   create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
+
+   create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
+   create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format,
+                              &dst_view);
+   btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        pipeline);
+
+   stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
+
+   for (unsigned r = 0; r < num_rects; ++r) {
+      unsigned push_constants[4] = {
+         rects[r].dst_x,
+         rects[r].dst_y,
+         stride,
+         src->pitch,
+      };
+
+      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                            device->meta_state.btoi_r32g32b32.img_p_layout,
+                            VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
+
+      radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+   }
+
+   radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
 }
 
 static void
-btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
-		      struct radv_buffer_view *src,
-		      struct radv_image_view *dst)
+btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
+                      struct radv_image_view *dst)
 {
-	struct radv_device *device = cmd_buffer->device;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.btoi.img_p_layout,
-				      0, /* set */
-				      2, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
-				              },
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 1,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-				                      .pImageInfo = (VkDescriptorImageInfo[]) {
-				                              {
-				                                      .sampler = VK_NULL_HANDLE,
-				                                      .imageView = radv_image_view_to_handle(dst),
-				                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-				                              },
-				                      }
-				              }
-				      });
+   struct radv_device *device = cmd_buffer->device;
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, /* set */
+      2, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){
+         {
+            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+            .dstBinding = 0,
+            .dstArrayElement = 0,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
+         },
+         {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+          .dstBinding = 1,
+          .dstArrayElement = 0,
+          .descriptorCount = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+          .pImageInfo = (VkDescriptorImageInfo[]){
+             {
+                .sampler = VK_NULL_HANDLE,
+                .imageView = radv_image_view_to_handle(dst),
+                .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+             },
+          }}});
 }
 
 void
 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
-			     struct radv_meta_blit2d_buffer *src,
-			     struct radv_meta_blit2d_surf *dst,
-			     unsigned num_rects,
-			     struct radv_meta_blit2d_rect *rects)
+                             struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst,
+                             unsigned num_rects, struct radv_meta_blit2d_rect *rects)
 {
-	VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_buffer_view src_view;
-	struct radv_image_view dst_view;
-
-	if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
-	    dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
-	    dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
-		radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
-						       num_rects, rects);
-		return;
-	}
-
-	create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
-	create_iview(cmd_buffer, dst, &dst_view);
-	btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
-
-	if (device->physical_device->rad_info.chip_class >= GFX9 &&
-	    dst->image->type == VK_IMAGE_TYPE_3D)
-		pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
-	for (unsigned r = 0; r < num_rects; ++r) {
-		unsigned push_constants[4] = {
-			rects[r].dst_x,
-			rects[r].dst_y,
-			dst->layer,
-			src->pitch,
-		};
-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.btoi.img_p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
-				      push_constants);
-
-		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
-	}
+   VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_buffer_view src_view;
+   struct radv_image_view dst_view;
+
+   if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+       dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+       dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
+      radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
+      return;
+   }
+
+   create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
+   create_iview(cmd_buffer, dst, &dst_view);
+   btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+   if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D)
+      pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        pipeline);
+
+   for (unsigned r = 0; r < num_rects; ++r) {
+      unsigned push_constants[4] = {
+         rects[r].dst_x,
+         rects[r].dst_y,
+         dst->layer,
+         src->pitch,
+      };
+      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                            device->meta_state.btoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
+                            16, push_constants);
+
+      radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+   }
 }
 
 static void
-itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
-				struct radv_buffer_view *src,
-				struct radv_buffer_view *dst)
+itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
+                                struct radv_buffer_view *dst)
 {
-	struct radv_device *device = cmd_buffer->device;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.itoi_r32g32b32.img_p_layout,
-				      0, /* set */
-				      2, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
-				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
-				              },
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 1,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
-				              }
-				      });
+   struct radv_device *device = cmd_buffer->device;
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout,
+      0, /* set */
+      2, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){
+         {
+            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+            .dstBinding = 0,
+            .dstArrayElement = 0,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
+         },
+         {
+            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+            .dstBinding = 1,
+            .dstArrayElement = 0,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
+         }});
 }
 
 static void
 radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
-				      struct radv_meta_blit2d_surf *src,
-				      struct radv_meta_blit2d_surf *dst,
-				      unsigned num_rects,
-				      struct radv_meta_blit2d_rect *rects)
+                                      struct radv_meta_blit2d_surf *src,
+                                      struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+                                      struct radv_meta_blit2d_rect *rects)
 {
-	VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_buffer_view src_view, dst_view;
-	unsigned src_offset = 0, dst_offset = 0;
-	unsigned src_stride, dst_stride;
-	VkBuffer src_buffer, dst_buffer;
-
-	/* 96-bit formats are only compatible to themselves. */
-	assert(dst->format == VK_FORMAT_R32G32B32_UINT ||
-	       dst->format == VK_FORMAT_R32G32B32_SINT ||
-	       dst->format == VK_FORMAT_R32G32B32_SFLOAT);
-
-	/* This special itoi path for R32G32B32 formats will write the linear
-	 * image as a buffer with the same underlying memory. The compute
-	 * shader will copy all components separately using a R32 format.
-	 */
-	create_buffer_from_image(cmd_buffer, src,
-				 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
-				 &src_buffer);
-	create_buffer_from_image(cmd_buffer, dst,
-				 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
-				 &dst_buffer);
-
-	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer),
-				   src_offset, src->format, &src_view);
-	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer),
-				   dst_offset, dst->format, &dst_view);
-	itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
-	src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
-	dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
-
-	for (unsigned r = 0; r < num_rects; ++r) {
-		unsigned push_constants[6] = {
-			rects[r].src_x,
-			rects[r].src_y,
-			src_stride,
-			rects[r].dst_x,
-			rects[r].dst_y,
-			dst_stride,
-		};
-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.itoi_r32g32b32.img_p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
-				      push_constants);
-
-		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
-	}
-
-	radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
-	radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
+   VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_buffer_view src_view, dst_view;
+   unsigned src_offset = 0, dst_offset = 0;
+   unsigned src_stride, dst_stride;
+   VkBuffer src_buffer, dst_buffer;
+
+   /* 96-bit formats are only compatible to themselves. */
+   assert(dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
+          dst->format == VK_FORMAT_R32G32B32_SFLOAT);
+
+   /* This special itoi path for R32G32B32 formats will write the linear
+    * image as a buffer with the same underlying memory. The compute
+    * shader will copy all components separately using a R32 format.
+    */
+   create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, &src_buffer);
+   create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &dst_buffer);
+
+   create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset,
+                              src->format, &src_view);
+   create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset,
+                              dst->format, &dst_view);
+   itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        pipeline);
+
+   src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
+   dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
+
+   for (unsigned r = 0; r < num_rects; ++r) {
+      unsigned push_constants[6] = {
+         rects[r].src_x, rects[r].src_y, src_stride, rects[r].dst_x, rects[r].dst_y, dst_stride,
+      };
+      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                            device->meta_state.itoi_r32g32b32.img_p_layout,
+                            VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
+
+      radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+   }
+
+   radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
+   radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
 }
 
 static void
-itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
-		      struct radv_image_view *src,
-		      struct radv_image_view *dst)
+itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
+                      struct radv_image_view *dst)
 {
-	struct radv_device *device = cmd_buffer->device;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.itoi.img_p_layout,
-				      0, /* set */
-				      2, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                       .dstBinding = 0,
-				                       .dstArrayElement = 0,
-				                       .descriptorCount = 1,
-				                       .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-				                       .pImageInfo = (VkDescriptorImageInfo[]) {
-				                               {
-				                                       .sampler = VK_NULL_HANDLE,
-				                                       .imageView = radv_image_view_to_handle(src),
-				                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-				                               },
-				                       }
-				              },
-				              {
-				                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                       .dstBinding = 1,
-				                       .dstArrayElement = 0,
-				                       .descriptorCount = 1,
-				                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-				                       .pImageInfo = (VkDescriptorImageInfo[]) {
-				                               {
-				                                       .sampler = VK_NULL_HANDLE,
-				                                       .imageView = radv_image_view_to_handle(dst),
-				                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-				                               },
-				                       }
-				              }
-				      });
+   struct radv_device *device = cmd_buffer->device;
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, /* set */
+      2, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 0,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                .pImageInfo =
+                                   (VkDescriptorImageInfo[]){
+                                      {
+                                         .sampler = VK_NULL_HANDLE,
+                                         .imageView = radv_image_view_to_handle(src),
+                                         .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                      },
+                                   }},
+                               {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 1,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                .pImageInfo = (VkDescriptorImageInfo[]){
+                                   {
+                                      .sampler = VK_NULL_HANDLE,
+                                      .imageView = radv_image_view_to_handle(dst),
+                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                   },
+                                }}});
 }
 
 void
-radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
-			    struct radv_meta_blit2d_surf *src,
-			    struct radv_meta_blit2d_surf *dst,
-			    unsigned num_rects,
-			    struct radv_meta_blit2d_rect *rects)
+radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
+                            struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+                            struct radv_meta_blit2d_rect *rects)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_image_view src_view, dst_view;
-	uint32_t samples = src->image->info.samples;
-	uint32_t samples_log2 = ffs(samples) - 1;
-
-	if (src->format == VK_FORMAT_R32G32B32_UINT ||
-	    src->format == VK_FORMAT_R32G32B32_SINT ||
-	    src->format == VK_FORMAT_R32G32B32_SFLOAT) {
-		radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst,
-						      num_rects, rects);
-		return;
-	}
-
-	create_iview(cmd_buffer, src, &src_view);
-	create_iview(cmd_buffer, dst, &dst_view);
-
-	itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
-
-	VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2];
-	if (device->physical_device->rad_info.chip_class >= GFX9 &&
-	    (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
-		pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
-	for (unsigned r = 0; r < num_rects; ++r) {
-		unsigned push_constants[6] = {
-			rects[r].src_x,
-			rects[r].src_y,
-			src->layer,
-			rects[r].dst_x,
-			rects[r].dst_y,
-			dst->layer,
-		};
-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.itoi.img_p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
-				      push_constants);
-
-		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
-	}
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_image_view src_view, dst_view;
+   uint32_t samples = src->image->info.samples;
+   uint32_t samples_log2 = ffs(samples) - 1;
+
+   if (src->format == VK_FORMAT_R32G32B32_UINT || src->format == VK_FORMAT_R32G32B32_SINT ||
+       src->format == VK_FORMAT_R32G32B32_SFLOAT) {
+      radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
+      return;
+   }
+
+   create_iview(cmd_buffer, src, &src_view);
+   create_iview(cmd_buffer, dst, &dst_view);
+
+   itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+   VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2];
+   if (device->physical_device->rad_info.chip_class >= GFX9 &&
+       (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
+      pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        pipeline);
+
+   for (unsigned r = 0; r < num_rects; ++r) {
+      unsigned push_constants[6] = {
+         rects[r].src_x, rects[r].src_y, src->layer, rects[r].dst_x, rects[r].dst_y, dst->layer,
+      };
+      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                            device->meta_state.itoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
+                            24, push_constants);
+
+      radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+   }
 }
 
 static void
-cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
-				  struct radv_buffer_view *view)
+cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *view)
 {
-	struct radv_device *device = cmd_buffer->device;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.cleari_r32g32b32.img_p_layout,
-				      0, /* set */
-				      1, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(view) },
-				              }
-				      });
+   struct radv_device *device = cmd_buffer->device;
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari_r32g32b32.img_p_layout,
+      0, /* set */
+      1, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){{
+         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+         .dstBinding = 0,
+         .dstArrayElement = 0,
+         .descriptorCount = 1,
+         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+         .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)},
+      }});
 }
 
 static void
 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
-				   struct radv_meta_blit2d_surf *dst,
-				   const VkClearColorValue *clear_color)
+                                   struct radv_meta_blit2d_surf *dst,
+                                   const VkClearColorValue *clear_color)
 {
-	VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_buffer_view dst_view;
-	unsigned stride;
-	VkBuffer buffer;
-
-	/* This special clear path for R32G32B32 formats will write the linear
-	 * image as a buffer with the same underlying memory. The compute
-	 * shader will clear all components separately using a R32 format.
-	 */
-	create_buffer_from_image(cmd_buffer, dst,
-				 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
-				 &buffer);
-
-	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
-				   0, dst->format, &dst_view);
-	cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
-	stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
-
-	unsigned push_constants[4] = {
-		clear_color->uint32[0],
-		clear_color->uint32[1],
-		clear_color->uint32[2],
-		stride,
-	};
-
-	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-			      device->meta_state.cleari_r32g32b32.img_p_layout,
-			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
-			      push_constants);
-
-	radv_unaligned_dispatch(cmd_buffer, dst->image->info.width,
-				dst->image->info.height, 1);
-
-	radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
+   VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_buffer_view dst_view;
+   unsigned stride;
+   VkBuffer buffer;
+
+   /* This special clear path for R32G32B32 formats will write the linear
+    * image as a buffer with the same underlying memory. The compute
+    * shader will clear all components separately using a R32 format.
+    */
+   create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
+
+   create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format,
+                              &dst_view);
+   cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        pipeline);
+
+   stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
+
+   unsigned push_constants[4] = {
+      clear_color->uint32[0],
+      clear_color->uint32[1],
+      clear_color->uint32[2],
+      stride,
+   };
+
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                         device->meta_state.cleari_r32g32b32.img_p_layout,
+                         VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
+
+   radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
+
+   radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
 }
 
 static void
-cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
-	                struct radv_image_view *dst_iview)
+cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview)
 {
-	struct radv_device *device = cmd_buffer->device;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.cleari.img_p_layout,
-				      0, /* set */
-				      1, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-				                      .pImageInfo = (VkDescriptorImageInfo[]) {
-				                               {
-				                                      .sampler = VK_NULL_HANDLE,
-				                                      .imageView = radv_image_view_to_handle(dst_iview),
-				                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-				                               },
-				                      }
-				               },
-				      });
+   struct radv_device *device = cmd_buffer->device;
+
+   radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                 device->meta_state.cleari.img_p_layout, 0, /* set */
+                                 1, /* descriptorWriteCount */
+                                 (VkWriteDescriptorSet[]){
+                                    {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                     .dstBinding = 0,
+                                     .dstArrayElement = 0,
+                                     .descriptorCount = 1,
+                                     .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                     .pImageInfo =
+                                        (VkDescriptorImageInfo[]){
+                                           {
+                                              .sampler = VK_NULL_HANDLE,
+                                              .imageView = radv_image_view_to_handle(dst_iview),
+                                              .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                           },
+                                        }},
+                                 });
 }
 
 void
-radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
-			 struct radv_meta_blit2d_surf *dst,
-			 const VkClearColorValue *clear_color)
+radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
+                         const VkClearColorValue *clear_color)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_image_view dst_iview;
-	uint32_t samples = dst->image->info.samples;
-	uint32_t samples_log2 = ffs(samples) - 1;
-
-	if (dst->format == VK_FORMAT_R32G32B32_UINT ||
-	    dst->format == VK_FORMAT_R32G32B32_SINT ||
-	    dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
-		radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
-		return;
-	}
-
-	create_iview(cmd_buffer, dst, &dst_iview);
-	cleari_bind_descriptors(cmd_buffer, &dst_iview);
-
-	VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline[samples_log2];
-	if (device->physical_device->rad_info.chip_class >= GFX9 &&
-	    dst->image->type == VK_IMAGE_TYPE_3D)
-		pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
-	unsigned push_constants[5] = {
-		clear_color->uint32[0],
-		clear_color->uint32[1],
-		clear_color->uint32[2],
-		clear_color->uint32[3],
-		dst->layer,
-	};
-
-	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-			      device->meta_state.cleari.img_p_layout,
-			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
-			      push_constants);
-
-	radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_image_view dst_iview;
+   uint32_t samples = dst->image->info.samples;
+   uint32_t samples_log2 = ffs(samples) - 1;
+
+   if (dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
+       dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
+      radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
+      return;
+   }
+
+   create_iview(cmd_buffer, dst, &dst_iview);
+   cleari_bind_descriptors(cmd_buffer, &dst_iview);
+
+   VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline[samples_log2];
+   if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D)
+      pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        pipeline);
+
+   unsigned push_constants[5] = {
+      clear_color->uint32[0],
+      clear_color->uint32[1],
+      clear_color->uint32[2],
+      clear_color->uint32[3],
+      dst->layer,
+   };
+
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                         device->meta_state.cleari.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
+                         push_constants);
+
+   radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
 }
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index f49bf3b23e4..0acbfc12893 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -21,1870 +21,1696 @@
  * IN THE SOFTWARE.
  */
 
+#include "nir/nir_builder.h"
 #include "radv_debug.h"
 #include "radv_meta.h"
 #include "radv_private.h"
-#include "nir/nir_builder.h"
 
 #include "util/format_rgb9e5.h"
 #include "vk_format.h"
 
-enum {
-	DEPTH_CLEAR_SLOW,
-	DEPTH_CLEAR_FAST_EXPCLEAR,
-	DEPTH_CLEAR_FAST_NO_EXPCLEAR
-};
+enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST_EXPCLEAR, DEPTH_CLEAR_FAST_NO_EXPCLEAR };
 
 static void
-build_color_shaders(struct nir_shader **out_vs,
-                    struct nir_shader **out_fs,
-                    uint32_t frag_output)
+build_color_shaders(struct nir_shader **out_vs, struct nir_shader **out_fs, uint32_t frag_output)
 {
-	nir_builder vs_b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_clear_color_vs");
-	nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_clear_color_fs");
+   nir_builder vs_b =
+      nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_clear_color_vs");
+   nir_builder fs_b =
+      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_clear_color_fs");
 
-	const struct glsl_type *position_type = glsl_vec4_type();
-	const struct glsl_type *color_type = glsl_vec4_type();
+   const struct glsl_type *position_type = glsl_vec4_type();
+   const struct glsl_type *color_type = glsl_vec4_type();
 
-	nir_variable *vs_out_pos =
-		nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
-				    "gl_Position");
-	vs_out_pos->data.location = VARYING_SLOT_POS;
+   nir_variable *vs_out_pos =
+      nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
+   vs_out_pos->data.location = VARYING_SLOT_POS;
 
-	nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range=16);
+   nir_ssa_def *in_color_load =
+      nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
 
-	nir_variable *fs_out_color =
-		nir_variable_create(fs_b.shader, nir_var_shader_out, color_type,
-				    "f_color");
-	fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
+   nir_variable *fs_out_color =
+      nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
+   fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
 
-	nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
+   nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
 
-	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);
-	nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
+   nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);
+   nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
 
-	const struct glsl_type *layer_type = glsl_int_type();
-	nir_variable *vs_out_layer =
-		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
-				    "v_layer");
-	vs_out_layer->data.location = VARYING_SLOT_LAYER;
-	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
-	nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
-	nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
+   const struct glsl_type *layer_type = glsl_int_type();
+   nir_variable *vs_out_layer =
+      nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
+   vs_out_layer->data.location = VARYING_SLOT_LAYER;
+   vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
+   nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
+   nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
 
-	nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
-	nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
+   nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
+   nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
 
-	*out_vs = vs_b.shader;
-	*out_fs = fs_b.shader;
+   *out_vs = vs_b.shader;
+   *out_fs = fs_b.shader;
 }
 
 static VkResult
-create_pipeline(struct radv_device *device,
-		struct radv_render_pass *render_pass,
-		uint32_t samples,
-                struct nir_shader *vs_nir,
-                struct nir_shader *fs_nir,
+create_pipeline(struct radv_device *device, struct radv_render_pass *render_pass, uint32_t samples,
+                struct nir_shader *vs_nir, struct nir_shader *fs_nir,
                 const VkPipelineVertexInputStateCreateInfo *vi_state,
                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
-                const VkPipelineColorBlendStateCreateInfo *cb_state,
-		const VkPipelineLayout layout,
-		const struct radv_graphics_pipeline_create_info *extra,
-                const VkAllocationCallbacks *alloc,
-		VkPipeline *pipeline)
+                const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineLayout layout,
+                const struct radv_graphics_pipeline_create_info *extra,
+                const VkAllocationCallbacks *alloc, VkPipeline *pipeline)
 {
-	VkDevice device_h = radv_device_to_handle(device);
-	VkResult result;
-
-	result = radv_graphics_pipeline_create(device_h,
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &(VkGraphicsPipelineCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-							       .stageCount = fs_nir ? 2 : 1,
-							       .pStages = (VkPipelineShaderStageCreateInfo[]) {
-							       {
-								       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-								       .stage = VK_SHADER_STAGE_VERTEX_BIT,
-								       .module = vk_shader_module_handle_from_nir(vs_nir),
-								       .pName = "main",
-							       },
-							       {
-								       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-								       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-								       .module = vk_shader_module_handle_from_nir(fs_nir),
-								       .pName = "main",
-							       },
-						       },
-							       .pVertexInputState = vi_state,
-									.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-							       .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-							       .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-							       .primitiveRestartEnable = false,
-						       },
-									.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-							       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-							       .viewportCount = 1,
-							       .scissorCount = 1,
-						       },
-										 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-							       .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-							       .rasterizerDiscardEnable = false,
-							       .polygonMode = VK_POLYGON_MODE_FILL,
-							       .cullMode = VK_CULL_MODE_NONE,
-							       .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
-							       .depthBiasEnable = false,
-						       },
-											  .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-							       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-							       .rasterizationSamples = samples,
-							       .sampleShadingEnable = false,
-							       .pSampleMask = NULL,
-							       .alphaToCoverageEnable = false,
-							       .alphaToOneEnable = false,
-						       },
-												   .pDepthStencilState = ds_state,
-													    .pColorBlendState = cb_state,
-													    .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-							       /* The meta clear pipeline declares all state as dynamic.
-								* As a consequence, vkCmdBindPipeline writes no dynamic state
-								* to the cmd buffer. Therefore, at the end of the meta clear,
-								* we need only restore dynamic state was vkCmdSet.
-								*/
-							       .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-							       .dynamicStateCount = 8,
-							       .pDynamicStates = (VkDynamicState[]) {
-								       /* Everything except stencil write mask */
-								       VK_DYNAMIC_STATE_VIEWPORT,
-								       VK_DYNAMIC_STATE_SCISSOR,
-								       VK_DYNAMIC_STATE_LINE_WIDTH,
-								       VK_DYNAMIC_STATE_DEPTH_BIAS,
-								       VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-								       VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-								       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
-								       VK_DYNAMIC_STATE_STENCIL_REFERENCE,
-							       },
-						       },
-						    .layout = layout,
-						    .flags = 0,
-						    .renderPass = radv_render_pass_to_handle(render_pass),
-						    .subpass = 0,
-						},
-					       extra,
-					       alloc,
-					       pipeline);
-
-	ralloc_free(vs_nir);
-	ralloc_free(fs_nir);
-
-	return result;
+   VkDevice device_h = radv_device_to_handle(device);
+   VkResult result;
+
+   result = radv_graphics_pipeline_create(
+      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &(VkGraphicsPipelineCreateInfo){
+         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+         .stageCount = fs_nir ? 2 : 1,
+         .pStages =
+            (VkPipelineShaderStageCreateInfo[]){
+               {
+                  .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                  .stage = VK_SHADER_STAGE_VERTEX_BIT,
+                  .module = vk_shader_module_handle_from_nir(vs_nir),
+                  .pName = "main",
+               },
+               {
+                  .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                  .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+                  .module = vk_shader_module_handle_from_nir(fs_nir),
+                  .pName = "main",
+               },
+            },
+         .pVertexInputState = vi_state,
+         .pInputAssemblyState =
+            &(VkPipelineInputAssemblyStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+               .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+               .primitiveRestartEnable = false,
+            },
+         .pViewportState =
+            &(VkPipelineViewportStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+               .viewportCount = 1,
+               .scissorCount = 1,
+            },
+         .pRasterizationState =
+            &(VkPipelineRasterizationStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+               .rasterizerDiscardEnable = false,
+               .polygonMode = VK_POLYGON_MODE_FILL,
+               .cullMode = VK_CULL_MODE_NONE,
+               .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+               .depthBiasEnable = false,
+            },
+         .pMultisampleState =
+            &(VkPipelineMultisampleStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+               .rasterizationSamples = samples,
+               .sampleShadingEnable = false,
+               .pSampleMask = NULL,
+               .alphaToCoverageEnable = false,
+               .alphaToOneEnable = false,
+            },
+         .pDepthStencilState = ds_state,
+         .pColorBlendState = cb_state,
+         .pDynamicState =
+            &(VkPipelineDynamicStateCreateInfo){
+               /* The meta clear pipeline declares all state as dynamic.
+                * As a consequence, vkCmdBindPipeline writes no dynamic state
+                * to the cmd buffer. Therefore, at the end of the meta clear,
+                * we need only restore dynamic state was vkCmdSet.
+                */
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+               .dynamicStateCount = 8,
+               .pDynamicStates =
+                  (VkDynamicState[]){
+                     /* Everything except stencil write mask */
+                     VK_DYNAMIC_STATE_VIEWPORT,
+                     VK_DYNAMIC_STATE_SCISSOR,
+                     VK_DYNAMIC_STATE_LINE_WIDTH,
+                     VK_DYNAMIC_STATE_DEPTH_BIAS,
+                     VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+                     VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+                     VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+                     VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+                  },
+            },
+         .layout = layout,
+         .flags = 0,
+         .renderPass = radv_render_pass_to_handle(render_pass),
+         .subpass = 0,
+      },
+      extra, alloc, pipeline);
+
+   ralloc_free(vs_nir);
+   ralloc_free(fs_nir);
+
+   return result;
 }
 
 static VkResult
-create_color_renderpass(struct radv_device *device,
-			VkFormat vk_format,
-			uint32_t samples,
-			VkRenderPass *pass)
+create_color_renderpass(struct radv_device *device, VkFormat vk_format, uint32_t samples,
+                        VkRenderPass *pass)
 {
-	mtx_lock(&device->meta_state.mtx);
-	if (*pass) {
-		mtx_unlock (&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	VkResult result = radv_CreateRenderPass2(radv_device_to_handle(device),
-				       &(VkRenderPassCreateInfo2) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-						       .attachmentCount = 1,
-						       .pAttachments = &(VkAttachmentDescription2) {
-						       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-						       .format = vk_format,
-						       .samples = samples,
-						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-					       },
-						       .subpassCount = 1,
-								.pSubpasses = &(VkSubpassDescription2) {
-						       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 1,
-						       .pColorAttachments = &(VkAttachmentReference2) {
-							       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
-						       },
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference2) {
-							       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-							       .attachment = VK_ATTACHMENT_UNUSED,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
-						       },
-						       .preserveAttachmentCount = 0,
-						       .pPreserveAttachments = NULL,
-					       },
-							.dependencyCount = 2,
-							.pDependencies = (VkSubpassDependency2[]) {
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = VK_SUBPASS_EXTERNAL,
-									.dstSubpass = 0,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								},
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = 0,
-									.dstSubpass = VK_SUBPASS_EXTERNAL,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								}
-							},
-									 }, &device->meta_state.alloc, pass);
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   mtx_lock(&device->meta_state.mtx);
+   if (*pass) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   VkResult result = radv_CreateRenderPass2(
+      radv_device_to_handle(device),
+      &(VkRenderPassCreateInfo2){
+         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+         .attachmentCount = 1,
+         .pAttachments =
+            &(VkAttachmentDescription2){
+               .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+               .format = vk_format,
+               .samples = samples,
+               .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+               .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+               .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+               .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+            },
+         .subpassCount = 1,
+         .pSubpasses =
+            &(VkSubpassDescription2){
+               .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+               .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+               .inputAttachmentCount = 0,
+               .colorAttachmentCount = 1,
+               .pColorAttachments =
+                  &(VkAttachmentReference2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                     .attachment = 0,
+                     .layout = VK_IMAGE_LAYOUT_GENERAL,
+                  },
+               .pResolveAttachments = NULL,
+               .pDepthStencilAttachment =
+                  &(VkAttachmentReference2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                     .attachment = VK_ATTACHMENT_UNUSED,
+                     .layout = VK_IMAGE_LAYOUT_GENERAL,
+                  },
+               .preserveAttachmentCount = 0,
+               .pPreserveAttachments = NULL,
+            },
+         .dependencyCount = 2,
+         .pDependencies =
+            (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                      .dstSubpass = 0,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0},
+                                     {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = 0,
+                                      .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0}},
+      },
+      &device->meta_state.alloc, pass);
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 static VkResult
-create_color_pipeline(struct radv_device *device,
-		      uint32_t samples,
-                      uint32_t frag_output,
-		      VkPipeline *pipeline,
-		      VkRenderPass pass)
+create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output,
+                      VkPipeline *pipeline, VkRenderPass pass)
 {
-	struct nir_shader *vs_nir;
-	struct nir_shader *fs_nir;
-	VkResult result;
-
-	mtx_lock(&device->meta_state.mtx);
-	if (*pipeline) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	build_color_shaders(&vs_nir, &fs_nir, frag_output);
-
-	const VkPipelineVertexInputStateCreateInfo vi_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 0,
-		.vertexAttributeDescriptionCount = 0,
-	};
-
-	const VkPipelineDepthStencilStateCreateInfo ds_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-		.depthTestEnable = false,
-		.depthWriteEnable = false,
-		.depthBoundsTestEnable = false,
-		.stencilTestEnable = false,
-	};
-
-	VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 };
-	blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) {
-		.blendEnable = false,
-		.colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
-		VK_COLOR_COMPONENT_R_BIT |
-		VK_COLOR_COMPONENT_G_BIT |
-		VK_COLOR_COMPONENT_B_BIT,
-	};
-
-	const VkPipelineColorBlendStateCreateInfo cb_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-		.logicOpEnable = false,
-		.attachmentCount = MAX_RTS,
-		.pAttachments = blend_attachment_state
-	};
-
-
-	struct radv_graphics_pipeline_create_info extra = {
-		.use_rectlist = true,
-	};
-	result = create_pipeline(device, radv_render_pass_from_handle(pass),
-				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
-				 device->meta_state.clear_color_p_layout,
-				 &extra, &device->meta_state.alloc, pipeline);
-
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   struct nir_shader *vs_nir;
+   struct nir_shader *fs_nir;
+   VkResult result;
+
+   mtx_lock(&device->meta_state.mtx);
+   if (*pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   build_color_shaders(&vs_nir, &fs_nir, frag_output);
+
+   const VkPipelineVertexInputStateCreateInfo vi_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+      .vertexBindingDescriptionCount = 0,
+      .vertexAttributeDescriptionCount = 0,
+   };
+
+   const VkPipelineDepthStencilStateCreateInfo ds_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+      .depthTestEnable = false,
+      .depthWriteEnable = false,
+      .depthBoundsTestEnable = false,
+      .stencilTestEnable = false,
+   };
+
+   VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0};
+   blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){
+      .blendEnable = false,
+      .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
+                        VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,
+   };
+
+   const VkPipelineColorBlendStateCreateInfo cb_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+      .logicOpEnable = false,
+      .attachmentCount = MAX_RTS,
+      .pAttachments = blend_attachment_state};
+
+   struct radv_graphics_pipeline_create_info extra = {
+      .use_rectlist = true,
+   };
+   result =
+      create_pipeline(device, radv_render_pass_from_handle(pass), samples, vs_nir, fs_nir,
+                      &vi_state, &ds_state, &cb_state, device->meta_state.clear_color_p_layout,
+                      &extra, &device->meta_state.alloc, pipeline);
+
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 static void
 finish_meta_clear_htile_mask_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->clear_htile_mask_pipeline,
-			     &state->alloc);
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->clear_htile_mask_p_layout,
-				   &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-					state->clear_htile_mask_ds_layout,
-					&state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline,
+                        &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->clear_htile_mask_ds_layout,
+                                   &state->alloc);
 }
 
 void
 radv_device_finish_meta_clear_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
-		for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->clear[i].color_pipelines[j],
-					     &state->alloc);
-			radv_DestroyRenderPass(radv_device_to_handle(device),
-					       state->clear[i].render_pass[j],
-					       &state->alloc);
-		}
-
-		for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->clear[i].depth_only_pipeline[j],
-					     &state->alloc);
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->clear[i].stencil_only_pipeline[j],
-					     &state->alloc);
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->clear[i].depthstencil_pipeline[j],
-					     &state->alloc);
-
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->clear[i].depth_only_unrestricted_pipeline[j],
-					     &state->alloc);
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->clear[i].stencil_only_unrestricted_pipeline[j],
-					     &state->alloc);
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->clear[i].depthstencil_unrestricted_pipeline[j],
-					     &state->alloc);
-		}
-		radv_DestroyRenderPass(radv_device_to_handle(device),
-				      state->clear[i].depthstencil_rp,
-				      &state->alloc);
-	}
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->clear_color_p_layout,
-				   &state->alloc);
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->clear_depth_p_layout,
-				   &state->alloc);
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->clear_depth_unrestricted_p_layout,
-				   &state->alloc);
-
-	finish_meta_clear_htile_mask_state(device);
+   struct radv_meta_state *state = &device->meta_state;
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
+      for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
+         radv_DestroyPipeline(radv_device_to_handle(device), state->clear[i].color_pipelines[j],
+                              &state->alloc);
+         radv_DestroyRenderPass(radv_device_to_handle(device), state->clear[i].render_pass[j],
+                                &state->alloc);
+      }
+
+      for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
+         radv_DestroyPipeline(radv_device_to_handle(device), state->clear[i].depth_only_pipeline[j],
+                              &state->alloc);
+         radv_DestroyPipeline(radv_device_to_handle(device),
+                              state->clear[i].stencil_only_pipeline[j], &state->alloc);
+         radv_DestroyPipeline(radv_device_to_handle(device),
+                              state->clear[i].depthstencil_pipeline[j], &state->alloc);
+
+         radv_DestroyPipeline(radv_device_to_handle(device),
+                              state->clear[i].depth_only_unrestricted_pipeline[j], &state->alloc);
+         radv_DestroyPipeline(radv_device_to_handle(device),
+                              state->clear[i].stencil_only_unrestricted_pipeline[j], &state->alloc);
+         radv_DestroyPipeline(radv_device_to_handle(device),
+                              state->clear[i].depthstencil_unrestricted_pipeline[j], &state->alloc);
+      }
+      radv_DestroyRenderPass(radv_device_to_handle(device), state->clear[i].depthstencil_rp,
+                             &state->alloc);
+   }
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout,
+                              &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout,
+                              &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device),
+                              state->clear_depth_unrestricted_p_layout, &state->alloc);
+
+   finish_meta_clear_htile_mask_state(device);
 }
 
 static void
-emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
-                 const VkClearAttachment *clear_att,
-                 const VkClearRect *clear_rect,
-                 uint32_t view_mask)
+emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
+                 const VkClearRect *clear_rect, uint32_t view_mask)
 {
-	struct radv_device *device = cmd_buffer->device;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	const uint32_t subpass_att = clear_att->colorAttachment;
-	const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
-	const struct radv_image_view *iview = cmd_buffer->state.attachments ?
-		cmd_buffer->state.attachments[pass_att].iview : NULL;
-	uint32_t samples, samples_log2;
-	VkFormat format;
-	unsigned fs_key;
-	VkClearColorValue clear_value = clear_att->clearValue.color;
-	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-	VkPipeline pipeline;
-
-	/* When a framebuffer is bound to the current command buffer, get the
-	 * number of samples from it. Otherwise, get the number of samples from
-	 * the render pass because it's likely a secondary command buffer.
-	 */
-	if (iview) {
-		samples = iview->image->info.samples;
-		format = iview->vk_format;
-	} else {
-		samples = cmd_buffer->state.pass->attachments[pass_att].samples;
-		format = cmd_buffer->state.pass->attachments[pass_att].format;
-	}
-
-	samples_log2 = ffs(samples) - 1;
-	fs_key = radv_format_meta_fs_key(device, format);
-
-	if (fs_key == -1) {
-		radv_finishme("color clears incomplete");
-		return;
-	}
-
-	if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) {
-		VkResult ret = create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key],
-		                                       samples,
-		                                       &device->meta_state.clear[samples_log2].render_pass[fs_key]);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return;
-		}
-	}
-
-	if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) {
-		VkResult ret = create_color_pipeline(device, samples, 0,
-		                                     &device->meta_state.clear[samples_log2].color_pipelines[fs_key],
-		                                     device->meta_state.clear[samples_log2].render_pass[fs_key]);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return;
-		}
-	}
-
-	pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
-	if (!pipeline) {
-		radv_finishme("color clears incomplete");
-		return;
-	}
-	assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear));
-	assert(pipeline);
-	assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
-	assert(clear_att->colorAttachment < subpass->color_count);
-
-	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-			      device->meta_state.clear_color_p_layout,
-			      VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
-			      &clear_value);
-
-	struct radv_subpass clear_subpass = {
-		.color_count = 1,
-		.color_attachments = (struct radv_subpass_attachment[]) {
-			subpass->color_attachments[clear_att->colorAttachment]
-		},
-		.depth_stencil_attachment = NULL,
-	};
-
-	radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
-
-	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-			     pipeline);
-
-	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-			.x = clear_rect->rect.offset.x,
-			.y = clear_rect->rect.offset.y,
-			.width = clear_rect->rect.extent.width,
-			.height = clear_rect->rect.extent.height,
-			.minDepth = 0.0f,
-			.maxDepth = 1.0f
-		});
-
-	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
-
-	if (view_mask) {
-		u_foreach_bit(i, view_mask)
-			radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
-	} else {
-		radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
-	}
-
-	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+   struct radv_device *device = cmd_buffer->device;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   const uint32_t subpass_att = clear_att->colorAttachment;
+   const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
+   const struct radv_image_view *iview =
+      cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL;
+   uint32_t samples, samples_log2;
+   VkFormat format;
+   unsigned fs_key;
+   VkClearColorValue clear_value = clear_att->clearValue.color;
+   VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+   VkPipeline pipeline;
+
+   /* When a framebuffer is bound to the current command buffer, get the
+    * number of samples from it. Otherwise, get the number of samples from
+    * the render pass because it's likely a secondary command buffer.
+    */
+   if (iview) {
+      samples = iview->image->info.samples;
+      format = iview->vk_format;
+   } else {
+      samples = cmd_buffer->state.pass->attachments[pass_att].samples;
+      format = cmd_buffer->state.pass->attachments[pass_att].format;
+   }
+
+   samples_log2 = ffs(samples) - 1;
+   fs_key = radv_format_meta_fs_key(device, format);
+
+   if (fs_key == -1) {
+      radv_finishme("color clears incomplete");
+      return;
+   }
+
+   if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) {
+      VkResult ret =
+         create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key], samples,
+                                 &device->meta_state.clear[samples_log2].render_pass[fs_key]);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return;
+      }
+   }
+
+   if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) {
+      VkResult ret = create_color_pipeline(
+         device, samples, 0, &device->meta_state.clear[samples_log2].color_pipelines[fs_key],
+         device->meta_state.clear[samples_log2].render_pass[fs_key]);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return;
+      }
+   }
+
+   pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
+   if (!pipeline) {
+      radv_finishme("color clears incomplete");
+      return;
+   }
+   assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear));
+   assert(pipeline);
+   assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+   assert(clear_att->colorAttachment < subpass->color_count);
+
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                         device->meta_state.clear_color_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
+                         16, &clear_value);
+
+   struct radv_subpass clear_subpass = {
+      .color_count = 1,
+      .color_attachments =
+         (struct radv_subpass_attachment[]){subpass->color_attachments[clear_att->colorAttachment]},
+      .depth_stencil_attachment = NULL,
+   };
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
+
+   radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+
+   radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                       &(VkViewport){.x = clear_rect->rect.offset.x,
+                                     .y = clear_rect->rect.offset.y,
+                                     .width = clear_rect->rect.extent.width,
+                                     .height = clear_rect->rect.extent.height,
+                                     .minDepth = 0.0f,
+                                     .maxDepth = 1.0f});
+
+   radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
+
+   if (view_mask) {
+      u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
+   } else {
+      radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+   }
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
 }
 
-
 static void
-build_depthstencil_shader(struct nir_shader **out_vs,
-			  struct nir_shader **out_fs,
-			  bool unrestricted)
+build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs, bool unrestricted)
 {
-	nir_builder vs_b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL,
-							  unrestricted ? "meta_clear_depthstencil_unrestricted_vs"
-							  : "meta_clear_depthstencil_vs");
-	nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
-							  unrestricted ? "meta_clear_depthstencil_unrestricted_fs"
-							  : "meta_clear_depthstencil_fs");
-
-	const struct glsl_type *position_out_type = glsl_vec4_type();
-
-	nir_variable *vs_out_pos =
-		nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type,
-				    "gl_Position");
-	vs_out_pos->data.location = VARYING_SLOT_POS;
-
-	nir_ssa_def *z;
-	if (unrestricted) {
-		nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range=4);
-
-		nir_variable *fs_out_depth =
-			nir_variable_create(fs_b.shader, nir_var_shader_out,
-					    glsl_int_type(), "f_depth");
-		fs_out_depth->data.location = FRAG_RESULT_DEPTH;
-		nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
-
-		z = nir_imm_float(&vs_b, 0.0);
-	} else {
-		z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range=4);
-	}
-
-	nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, z);
-	nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
-
-	const struct glsl_type *layer_type = glsl_int_type();
-	nir_variable *vs_out_layer =
-		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
-				    "v_layer");
-	vs_out_layer->data.location = VARYING_SLOT_LAYER;
-	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
-	nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
-	nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
-
-	nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
-	nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
-
-	*out_vs = vs_b.shader;
-	*out_fs = fs_b.shader;
+   nir_builder vs_b = nir_builder_init_simple_shader(
+      MESA_SHADER_VERTEX, NULL,
+      unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
+   nir_builder fs_b = nir_builder_init_simple_shader(
+      MESA_SHADER_FRAGMENT, NULL,
+      unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
+
+   const struct glsl_type *position_out_type = glsl_vec4_type();
+
+   nir_variable *vs_out_pos =
+      nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
+   vs_out_pos->data.location = VARYING_SLOT_POS;
+
+   nir_ssa_def *z;
+   if (unrestricted) {
+      nir_ssa_def *in_color_load =
+         nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
+
+      nir_variable *fs_out_depth =
+         nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
+      fs_out_depth->data.location = FRAG_RESULT_DEPTH;
+      nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
+
+      z = nir_imm_float(&vs_b, 0.0);
+   } else {
+      z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
+   }
+
+   nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, z);
+   nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
+
+   const struct glsl_type *layer_type = glsl_int_type();
+   nir_variable *vs_out_layer =
+      nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
+   vs_out_layer->data.location = VARYING_SLOT_LAYER;
+   vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
+   nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
+   nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
+
+   nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
+   nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
+
+   *out_vs = vs_b.shader;
+   *out_fs = fs_b.shader;
 }
 
 static VkResult
-create_depthstencil_renderpass(struct radv_device *device,
-			       uint32_t samples,
-			       VkRenderPass *render_pass)
+create_depthstencil_renderpass(struct radv_device *device, uint32_t samples,
+                               VkRenderPass *render_pass)
 {
-	mtx_lock(&device->meta_state.mtx);
-	if (*render_pass) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	VkResult result = radv_CreateRenderPass2(radv_device_to_handle(device),
-				       &(VkRenderPassCreateInfo2) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-						       .attachmentCount = 1,
-						       .pAttachments = &(VkAttachmentDescription2) {
-						       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-						       .format = VK_FORMAT_D32_SFLOAT_S8_UINT,
-						       .samples = samples,
-						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-					       },
-						       .subpassCount = 1,
-								.pSubpasses = &(VkSubpassDescription2) {
-						       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 0,
-						       .pColorAttachments = NULL,
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference2) {
-							       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
-						       },
-						       .preserveAttachmentCount = 0,
-						       .pPreserveAttachments = NULL,
-					       },
-							.dependencyCount = 2,
-							.pDependencies = (VkSubpassDependency2[]) {
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = VK_SUBPASS_EXTERNAL,
-									.dstSubpass = 0,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								},
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = 0,
-									.dstSubpass = VK_SUBPASS_EXTERNAL,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								}
-							}
-									 }, &device->meta_state.alloc, render_pass);
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   mtx_lock(&device->meta_state.mtx);
+   if (*render_pass) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   VkResult result = radv_CreateRenderPass2(
+      radv_device_to_handle(device),
+      &(VkRenderPassCreateInfo2){
+         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+         .attachmentCount = 1,
+         .pAttachments =
+            &(VkAttachmentDescription2){
+               .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+               .format = VK_FORMAT_D32_SFLOAT_S8_UINT,
+               .samples = samples,
+               .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+               .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+               .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+               .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+            },
+         .subpassCount = 1,
+         .pSubpasses =
+            &(VkSubpassDescription2){
+               .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+               .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+               .inputAttachmentCount = 0,
+               .colorAttachmentCount = 0,
+               .pColorAttachments = NULL,
+               .pResolveAttachments = NULL,
+               .pDepthStencilAttachment =
+                  &(VkAttachmentReference2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                     .attachment = 0,
+                     .layout = VK_IMAGE_LAYOUT_GENERAL,
+                  },
+               .preserveAttachmentCount = 0,
+               .pPreserveAttachments = NULL,
+            },
+         .dependencyCount = 2,
+         .pDependencies =
+            (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                      .dstSubpass = 0,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0},
+                                     {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = 0,
+                                      .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0}}},
+      &device->meta_state.alloc, render_pass);
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 static VkResult
-create_depthstencil_pipeline(struct radv_device *device,
-                             VkImageAspectFlags aspects,
-			     uint32_t samples,
-			     int index,
-			     bool unrestricted,
-			     VkPipeline *pipeline,
-			     VkRenderPass render_pass)
+create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects,
+                             uint32_t samples, int index, bool unrestricted, VkPipeline *pipeline,
+                             VkRenderPass render_pass)
 {
-	struct nir_shader *vs_nir, *fs_nir;
-	VkResult result;
-
-	mtx_lock(&device->meta_state.mtx);
-	if (*pipeline) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	build_depthstencil_shader(&vs_nir, &fs_nir, unrestricted);
-
-	const VkPipelineVertexInputStateCreateInfo vi_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 0,
-		.vertexAttributeDescriptionCount = 0,
-	};
-
-	const VkPipelineDepthStencilStateCreateInfo ds_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-		.depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
-		.depthCompareOp = VK_COMPARE_OP_ALWAYS,
-		.depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
-		.depthBoundsTestEnable = false,
-		.stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
-		.front = {
-			.passOp = VK_STENCIL_OP_REPLACE,
-			.compareOp = VK_COMPARE_OP_ALWAYS,
-			.writeMask = UINT32_MAX,
-			.reference = 0, /* dynamic */
-		},
-		.back = { 0 /* dont care */ },
-	};
-
-	const VkPipelineColorBlendStateCreateInfo cb_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-		.logicOpEnable = false,
-		.attachmentCount = 0,
-		.pAttachments = NULL,
-	};
-
-	struct radv_graphics_pipeline_create_info extra = {
-		.use_rectlist = true,
-	};
-
-	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
-		extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
-		extra.db_depth_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
-	}
-	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
-		extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
-		extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
-	}
-	result = create_pipeline(device, radv_render_pass_from_handle(render_pass),
-				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
-				 device->meta_state.clear_depth_p_layout,
-				 &extra, &device->meta_state.alloc, pipeline);
-
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   struct nir_shader *vs_nir, *fs_nir;
+   VkResult result;
+
+   mtx_lock(&device->meta_state.mtx);
+   if (*pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   build_depthstencil_shader(&vs_nir, &fs_nir, unrestricted);
+
+   const VkPipelineVertexInputStateCreateInfo vi_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+      .vertexBindingDescriptionCount = 0,
+      .vertexAttributeDescriptionCount = 0,
+   };
+
+   const VkPipelineDepthStencilStateCreateInfo ds_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+      .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+      .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+      .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+      .depthBoundsTestEnable = false,
+      .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
+      .front =
+         {
+            .passOp = VK_STENCIL_OP_REPLACE,
+            .compareOp = VK_COMPARE_OP_ALWAYS,
+            .writeMask = UINT32_MAX,
+            .reference = 0, /* dynamic */
+         },
+      .back = {0 /* dont care */},
+   };
+
+   const VkPipelineColorBlendStateCreateInfo cb_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+      .logicOpEnable = false,
+      .attachmentCount = 0,
+      .pAttachments = NULL,
+   };
+
+   struct radv_graphics_pipeline_create_info extra = {
+      .use_rectlist = true,
+   };
+
+   if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
+      extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
+      extra.db_depth_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
+   }
+   if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+      extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
+      extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
+   }
+   result =
+      create_pipeline(device, radv_render_pass_from_handle(render_pass), samples, vs_nir, fs_nir,
+                      &vi_state, &ds_state, &cb_state, device->meta_state.clear_depth_p_layout,
+                      &extra, &device->meta_state.alloc, pipeline);
+
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
-static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
-				      const struct radv_image_view *iview,
-				      VkImageAspectFlags aspects,
-				      VkImageLayout layout,
-				      bool in_render_loop,
-				      const VkClearRect *clear_rect,
-				      VkClearDepthStencilValue clear_value)
+static bool
+depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+                          VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop,
+                          const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value)
 {
-	if (!iview)
-		return false;
-
-	uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
-	                                                   cmd_buffer->queue_family_index,
-	                                                   cmd_buffer->queue_family_index);
-	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
-	    clear_rect->rect.extent.width != iview->extent.width ||
-	    clear_rect->rect.extent.height != iview->extent.height)
-		return false;
-	if (radv_image_is_tc_compat_htile(iview->image) &&
-	    (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
-	      clear_value.depth != 1.0) ||
-	     ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
-		return false;
-	if (radv_htile_enabled(iview->image, iview->base_mip) &&
-	    iview->base_mip == 0 &&
-	    iview->base_layer == 0 &&
-	    iview->layer_count == iview->image->info.array_size &&
-	    radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop, queue_mask) &&
-	    radv_image_extent_compare(iview->image, &iview->extent))
-		return true;
-	return false;
+   if (!iview)
+      return false;
+
+   uint32_t queue_mask = radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,
+                                                      cmd_buffer->queue_family_index);
+   if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
+       clear_rect->rect.extent.width != iview->extent.width ||
+       clear_rect->rect.extent.height != iview->extent.height)
+      return false;
+   if (radv_image_is_tc_compat_htile(iview->image) &&
+       (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
+         clear_value.depth != 1.0) ||
+        ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
+      return false;
+   if (radv_htile_enabled(iview->image, iview->base_mip) && iview->base_mip == 0 &&
+       iview->base_layer == 0 && iview->layer_count == iview->image->info.array_size &&
+       radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop,
+                                       queue_mask) &&
+       radv_image_extent_compare(iview->image, &iview->extent))
+      return true;
+   return false;
 }
 
 static VkPipeline
-pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
-			   struct radv_meta_state *meta_state,
-			   const struct radv_image_view *iview,
-			   int samples_log2,
-			   VkImageAspectFlags aspects,
-			   VkImageLayout layout,
-			   bool in_render_loop,
-			   const VkClearRect *clear_rect,
-			   VkClearDepthStencilValue clear_value)
+pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_state *meta_state,
+                           const struct radv_image_view *iview, int samples_log2,
+                           VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop,
+                           const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value)
 {
-	bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout,
-	                                      in_render_loop, clear_rect, clear_value);
-	bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted;
-	int index = DEPTH_CLEAR_SLOW;
-	VkPipeline *pipeline;
-
-	if (fast) {
-		/* we don't know the previous clear values, so we always have
-		 * the NO_EXPCLEAR path */
-		index = DEPTH_CLEAR_FAST_NO_EXPCLEAR;
-	}
-
-	switch (aspects) {
-	case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
-		pipeline = unrestricted ?
-			   &meta_state->clear[samples_log2].depthstencil_unrestricted_pipeline[index] :
-			   &meta_state->clear[samples_log2].depthstencil_pipeline[index];
-		break;
-	case VK_IMAGE_ASPECT_DEPTH_BIT:
-		pipeline = unrestricted ?
-			   &meta_state->clear[samples_log2].depth_only_unrestricted_pipeline[index] :
-			   &meta_state->clear[samples_log2].depth_only_pipeline[index];
-		break;
-	case VK_IMAGE_ASPECT_STENCIL_BIT:
-		pipeline = unrestricted ?
-			   &meta_state->clear[samples_log2].stencil_only_unrestricted_pipeline[index] :
-			   &meta_state->clear[samples_log2].stencil_only_pipeline[index];
-		break;
-	default:
-		unreachable("expected depth or stencil aspect");
-	}
-
-	if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) {
-		VkResult ret = create_depthstencil_renderpass(cmd_buffer->device, 1u << samples_log2,
-		                                              &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return VK_NULL_HANDLE;
-		}
-	}
-
-	if (*pipeline == VK_NULL_HANDLE) {
-		VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted,
-		                                            pipeline, cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return VK_NULL_HANDLE;
-		}
-	}
-	return *pipeline;
+   bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, in_render_loop,
+                                         clear_rect, clear_value);
+   bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted;
+   int index = DEPTH_CLEAR_SLOW;
+   VkPipeline *pipeline;
+
+   if (fast) {
+      /* we don't know the previous clear values, so we always have
+       * the NO_EXPCLEAR path */
+      index = DEPTH_CLEAR_FAST_NO_EXPCLEAR;
+   }
+
+   switch (aspects) {
+   case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
+      pipeline = unrestricted
+                    ? &meta_state->clear[samples_log2].depthstencil_unrestricted_pipeline[index]
+                    : &meta_state->clear[samples_log2].depthstencil_pipeline[index];
+      break;
+   case VK_IMAGE_ASPECT_DEPTH_BIT:
+      pipeline = unrestricted
+                    ? &meta_state->clear[samples_log2].depth_only_unrestricted_pipeline[index]
+                    : &meta_state->clear[samples_log2].depth_only_pipeline[index];
+      break;
+   case VK_IMAGE_ASPECT_STENCIL_BIT:
+      pipeline = unrestricted
+                    ? &meta_state->clear[samples_log2].stencil_only_unrestricted_pipeline[index]
+                    : &meta_state->clear[samples_log2].stencil_only_pipeline[index];
+      break;
+   default:
+      unreachable("expected depth or stencil aspect");
+   }
+
+   if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) {
+      VkResult ret = create_depthstencil_renderpass(
+         cmd_buffer->device, 1u << samples_log2,
+         &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return VK_NULL_HANDLE;
+      }
+   }
+
+   if (*pipeline == VK_NULL_HANDLE) {
+      VkResult ret = create_depthstencil_pipeline(
+         cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline,
+         cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return VK_NULL_HANDLE;
+      }
+   }
+   return *pipeline;
 }
 
 static void
-emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
-                        const VkClearAttachment *clear_att,
-                        const VkClearRect *clear_rect,
-			struct radv_subpass_attachment *ds_att,
+emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
+                        const VkClearRect *clear_rect, struct radv_subpass_attachment *ds_att,
                         uint32_t view_mask)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_meta_state *meta_state = &device->meta_state;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	const uint32_t pass_att = ds_att->attachment;
-	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
-	VkImageAspectFlags aspects = clear_att->aspectMask;
-	const struct radv_image_view *iview = cmd_buffer->state.attachments ?
-		cmd_buffer->state.attachments[pass_att].iview : NULL;
-	uint32_t samples, samples_log2;
-	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-
-	/* When a framebuffer is bound to the current command buffer, get the
-	 * number of samples from it. Otherwise, get the number of samples from
-	 * the render pass because it's likely a secondary command buffer.
-	 */
-	if (iview) {
-		samples = iview->image->info.samples;
-	} else {
-		samples = cmd_buffer->state.pass->attachments[pass_att].samples;
-	}
-
-	samples_log2 = ffs(samples) - 1;
-
-	assert(pass_att != VK_ATTACHMENT_UNUSED);
-
-	if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
-		clear_value.depth = 1.0f;
-
-	if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.clear_depth_unrestricted_p_layout,
-				      VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
-				      &clear_value.depth);
-	} else {
-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.clear_depth_p_layout,
-				      VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
-				      &clear_value.depth);
-	}
-
-	uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
-	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
-		radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
-						  clear_value.stencil);
-	}
-
-	VkPipeline pipeline = pick_depthstencil_pipeline(cmd_buffer,
-							 meta_state,
-							 iview,
-							 samples_log2,
-							 aspects,
-							 ds_att->layout,
-							 ds_att->in_render_loop,
-							 clear_rect,
-							 clear_value);
-	if (!pipeline)
-		return;
-
-	struct radv_subpass clear_subpass = {
-		.color_count = 0,
-		.color_attachments = NULL,
-		.depth_stencil_attachment = ds_att,
-	};
-
-	radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
-
-	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-			     pipeline);
-
-	if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
-	                              ds_att->layout, ds_att->in_render_loop,
-	                              clear_rect, clear_value))
-		radv_update_ds_clear_metadata(cmd_buffer, iview,
-					      clear_value, aspects);
-
-	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-			.x = clear_rect->rect.offset.x,
-			.y = clear_rect->rect.offset.y,
-			.width = clear_rect->rect.extent.width,
-			.height = clear_rect->rect.extent.height,
-			.minDepth = 0.0f,
-			.maxDepth = 1.0f
-		});
-
-	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
-
-	if (view_mask) {
-		u_foreach_bit(i, view_mask)
-			radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
-	} else {
-		radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
-	}
-
-	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
-		radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
-						  prev_reference);
-	}
-
-	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_state *meta_state = &device->meta_state;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   const uint32_t pass_att = ds_att->attachment;
+   VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+   VkImageAspectFlags aspects = clear_att->aspectMask;
+   const struct radv_image_view *iview =
+      cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL;
+   uint32_t samples, samples_log2;
+   VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+
+   /* When a framebuffer is bound to the current command buffer, get the
+    * number of samples from it. Otherwise, get the number of samples from
+    * the render pass because it's likely a secondary command buffer.
+    */
+   if (iview) {
+      samples = iview->image->info.samples;
+   } else {
+      samples = cmd_buffer->state.pass->attachments[pass_att].samples;
+   }
+
+   samples_log2 = ffs(samples) - 1;
+
+   assert(pass_att != VK_ATTACHMENT_UNUSED);
+
+   if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
+      clear_value.depth = 1.0f;
+
+   if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
+      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                            device->meta_state.clear_depth_unrestricted_p_layout,
+                            VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4, &clear_value.depth);
+   } else {
+      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                            device->meta_state.clear_depth_p_layout, VK_SHADER_STAGE_VERTEX_BIT, 0,
+                            4, &clear_value.depth);
+   }
+
+   uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
+   if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+      radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil);
+   }
+
+   VkPipeline pipeline =
+      pick_depthstencil_pipeline(cmd_buffer, meta_state, iview, samples_log2, aspects,
+                                 ds_att->layout, ds_att->in_render_loop, clear_rect, clear_value);
+   if (!pipeline)
+      return;
+
+   struct radv_subpass clear_subpass = {
+      .color_count = 0,
+      .color_attachments = NULL,
+      .depth_stencil_attachment = ds_att,
+   };
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
+
+   radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+
+   if (depth_view_can_fast_clear(cmd_buffer, iview, aspects, ds_att->layout, ds_att->in_render_loop,
+                                 clear_rect, clear_value))
+      radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
+
+   radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                       &(VkViewport){.x = clear_rect->rect.offset.x,
+                                     .y = clear_rect->rect.offset.y,
+                                     .width = clear_rect->rect.extent.width,
+                                     .height = clear_rect->rect.extent.height,
+                                     .minDepth = 0.0f,
+                                     .maxDepth = 1.0f});
+
+   radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
+
+   if (view_mask) {
+      u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
+   } else {
+      radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+   }
+
+   if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+      radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, prev_reference);
+   }
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
 }
 
 static uint32_t
 clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
-		 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
-		 uint32_t htile_value, uint32_t htile_mask)
+                 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t htile_value,
+                 uint32_t htile_mask)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_meta_state *state = &device->meta_state;
-	uint64_t block_count = round_up_u64(size, 1024);
-	struct radv_meta_saved_state saved_state;
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_COMPUTE_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	struct radv_buffer dst_buffer = {
-		.bo = bo,
-		.offset = offset,
-		.size = size
-	};
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE,
-			     state->clear_htile_mask_pipeline);
-
-	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
-			              state->clear_htile_mask_p_layout,
-				      0, /* set */
-				      1, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
-				                              .buffer = radv_buffer_to_handle(&dst_buffer),
-				                              .offset = 0,
-				                              .range = size
-				                      }
-				              }
-				      });
-
-	const unsigned constants[2] = {
-		htile_value & htile_mask,
-		~htile_mask,
-	};
-
-	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-			      state->clear_htile_mask_p_layout,
-			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
-			      constants);
-
-	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
-
-	radv_meta_restore(&saved_state, cmd_buffer);
-
-	return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-	       RADV_CMD_FLAG_INV_VCACHE |
-	       radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_state *state = &device->meta_state;
+   uint64_t block_count = round_up_u64(size, 1024);
+   struct radv_meta_saved_state saved_state;
+
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   struct radv_buffer dst_buffer = {.bo = bo, .offset = offset, .size = size};
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        state->clear_htile_mask_pipeline);
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, /* set */
+      1, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){
+         {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+          .dstBinding = 0,
+          .dstArrayElement = 0,
+          .descriptorCount = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
+                                                   .offset = 0,
+                                                   .range = size}}});
+
+   const unsigned constants[2] = {
+      htile_value & htile_mask,
+      ~htile_mask,
+   };
+
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), state->clear_htile_mask_p_layout,
+                         VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, constants);
+
+   radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
+
+   return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+          radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
 }
 
 static uint32_t
-radv_get_htile_fast_clear_value(const struct radv_device *device,
-				const struct radv_image *image,
-				VkClearDepthStencilValue value)
+radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,
+                                VkClearDepthStencilValue value)
 {
-	uint32_t clear_value;
+   uint32_t clear_value;
 
-	if (radv_image_tile_stencil_disabled(device, image)) {
-		clear_value = value.depth ? 0xfffffff0 : 0;
-	} else {
-		clear_value = value.depth ? 0xfffc00f0 : 0xf0;
-	}
+   if (radv_image_tile_stencil_disabled(device, image)) {
+      clear_value = value.depth ? 0xfffffff0 : 0;
+   } else {
+      clear_value = value.depth ? 0xfffc00f0 : 0xf0;
+   }
 
-	return clear_value;
+   return clear_value;
 }
 
 static uint32_t
-radv_get_htile_mask(const struct radv_device *device,
-		    const struct radv_image *image, VkImageAspectFlags aspects)
+radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image,
+                    VkImageAspectFlags aspects)
 {
-	uint32_t mask = 0;
-
-	if (radv_image_tile_stencil_disabled(device, image)) {
-		/* All the HTILE buffer is used when there is no stencil. */
-		mask = UINT32_MAX;
-	} else {
-		if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
-			mask |= 0xfffffc0f;
-		if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
-			mask |= 0x000003f0;
-	}
-
-	return mask;
+   uint32_t mask = 0;
+
+   if (radv_image_tile_stencil_disabled(device, image)) {
+      /* All the HTILE buffer is used when there is no stencil. */
+      mask = UINT32_MAX;
+   } else {
+      if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
+         mask |= 0xfffffc0f;
+      if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
+         mask |= 0x000003f0;
+   }
+
+   return mask;
 }
 
 static bool
 radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
 {
-	return value.depth == 1.0f || value.depth == 0.0f;
+   return value.depth == 1.0f || value.depth == 0.0f;
 }
 
 static bool
 radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
 {
-	return value.stencil == 0;
+   return value.stencil == 0;
 }
 
 static bool
-radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
-			  const struct radv_image_view *iview,
-			  VkImageLayout image_layout,
-			  bool in_render_loop,
-			  VkImageAspectFlags aspects,
-			  const VkClearRect *clear_rect,
-			  const VkClearDepthStencilValue clear_value,
-			  uint32_t view_mask)
+radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+                          VkImageLayout image_layout, bool in_render_loop,
+                          VkImageAspectFlags aspects, const VkClearRect *clear_rect,
+                          const VkClearDepthStencilValue clear_value, uint32_t view_mask)
 {
-	if (!iview || !iview->support_fast_clear)
-		return false;
-
-	if (!radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, image_layout, in_render_loop,
-	                                     radv_image_queue_family_mask(iview->image,
-	                                                                  cmd_buffer->queue_family_index,
-	                                                                  cmd_buffer->queue_family_index)))
-		return false;
-
-	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
-	    clear_rect->rect.extent.width != iview->image->info.width ||
-	    clear_rect->rect.extent.height != iview->image->info.height)
-		return false;
-
-	if (view_mask && (iview->image->info.array_size >= 32 ||
-	                 (1u << iview->image->info.array_size) - 1u != view_mask))
-		return false;
-	if (!view_mask && clear_rect->baseArrayLayer != 0)
-		return false;
-	if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
-		return false;
-
-	if (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
-	    !radv_is_fast_clear_depth_allowed(clear_value)) ||
-	    ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
-	     !radv_is_fast_clear_stencil_allowed(clear_value)))
-		return false;
-
-	return true;
+   if (!iview || !iview->support_fast_clear)
+      return false;
+
+   if (!radv_layout_is_htile_compressed(
+          cmd_buffer->device, iview->image, image_layout, in_render_loop,
+          radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,
+                                       cmd_buffer->queue_family_index)))
+      return false;
+
+   if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
+       clear_rect->rect.extent.width != iview->image->info.width ||
+       clear_rect->rect.extent.height != iview->image->info.height)
+      return false;
+
+   if (view_mask && (iview->image->info.array_size >= 32 ||
+                     (1u << iview->image->info.array_size) - 1u != view_mask))
+      return false;
+   if (!view_mask && clear_rect->baseArrayLayer != 0)
+      return false;
+   if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
+      return false;
+
+   if (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) ||
+       ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+        !radv_is_fast_clear_stencil_allowed(clear_value)))
+      return false;
+
+   return true;
 }
 
 static void
-radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
-		      const struct radv_image_view *iview,
-		      const VkClearAttachment *clear_att,
-		      enum radv_cmd_flush_bits *pre_flush,
-		      enum radv_cmd_flush_bits *post_flush)
+radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+                      const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
+                      enum radv_cmd_flush_bits *post_flush)
 {
-	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
-	VkImageAspectFlags aspects = clear_att->aspectMask;
-	uint32_t clear_word, flush_bits;
-
-	clear_word = radv_get_htile_fast_clear_value(cmd_buffer->device, iview->image, clear_value);
-
-	if (pre_flush) {
-		enum radv_cmd_flush_bits bits =
-			radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, iview->image) |
-			radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, iview->image);
-		cmd_buffer->state.flush_bits |= bits & ~ *pre_flush;
-		*pre_flush |= cmd_buffer->state.flush_bits;
-	}
-
-	VkImageSubresourceRange range = {
-		.aspectMask = aspects,
-		.baseMipLevel = iview->base_mip,
-		.levelCount = iview->level_count,
-		.baseArrayLayer = iview->base_layer,
-		.layerCount = iview->layer_count,
-	};
-
-	flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
-
-	if (iview->image->planes[0].surface.has_stencil &&
-	    !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
-		/* Synchronize after performing a depth-only or a stencil-only
-		 * fast clear because the driver uses an optimized path which
-		 * performs a read-modify-write operation, and the two separate
-		 * aspects might use the same HTILE memory.
-		 */
-		cmd_buffer->state.flush_bits |= flush_bits;
-	}
-
-	radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
-	if (post_flush) {
-		*post_flush |= flush_bits;
-	}
+   VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+   VkImageAspectFlags aspects = clear_att->aspectMask;
+   uint32_t clear_word, flush_bits;
+
+   clear_word = radv_get_htile_fast_clear_value(cmd_buffer->device, iview->image, clear_value);
+
+   if (pre_flush) {
+      enum radv_cmd_flush_bits bits =
+         radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+                               iview->image) |
+         radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, iview->image);
+      cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
+      *pre_flush |= cmd_buffer->state.flush_bits;
+   }
+
+   VkImageSubresourceRange range = {
+      .aspectMask = aspects,
+      .baseMipLevel = iview->base_mip,
+      .levelCount = iview->level_count,
+      .baseArrayLayer = iview->base_layer,
+      .layerCount = iview->layer_count,
+   };
+
+   flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
+
+   if (iview->image->planes[0].surface.has_stencil &&
+       !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
+      /* Synchronize after performing a depth-only or a stencil-only
+       * fast clear because the driver uses an optimized path which
+       * performs a read-modify-write operation, and the two separate
+       * aspects might use the same HTILE memory.
+       */
+      cmd_buffer->state.flush_bits |= flush_bits;
+   }
+
+   radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
+   if (post_flush) {
+      *post_flush |= flush_bits;
+   }
 }
 
 static nir_shader *
 build_clear_htile_mask_shader()
 {
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_clear_htile_mask");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_clear_htile_mask");
+   b.shader->info.cs.local_size[0] = 64;
+   b.shader->info.cs.local_size[1] = 1;
+   b.shader->info.cs.local_size[2] = 1;
 
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
 
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
 
-	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
-	offset = nir_channel(&b, offset, 0);
+   nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
+   offset = nir_channel(&b, offset, 0);
 
-	nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0);
+   nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0);
 
-	nir_ssa_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range=8);
+   nir_ssa_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
 
-	nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul=16);
+   nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
 
-	/* data = (data & ~htile_mask) | (htile_value & htile_mask) */
-	nir_ssa_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
-	data = nir_ior(&b, data, nir_channel(&b, constants, 0));
+   /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
+   nir_ssa_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
+   data = nir_ior(&b, data, nir_channel(&b, constants, 0));
 
-	nir_store_ssbo(&b, data, buf, offset, .write_mask=0xf,
-			   .access=ACCESS_NON_READABLE, .align_mul=16);
+   nir_store_ssbo(&b, data, buf, offset, .write_mask = 0xf, .access = ACCESS_NON_READABLE,
+                  .align_mul = 16);
 
-	return b.shader;
+   return b.shader;
 }
 
 static VkResult
 init_meta_clear_htile_mask_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-	VkResult result;
-	nir_shader *cs = build_clear_htile_mask_shader();
-
-	VkDescriptorSetLayoutCreateInfo ds_layout_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 1,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_layout_info, &state->alloc,
-						&state->clear_htile_mask_ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineLayoutCreateInfo p_layout_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &state->clear_htile_mask_ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){
-			VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
-		},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &p_layout_info, &state->alloc,
-					  &state->clear_htile_mask_p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineShaderStageCreateInfo shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = shader_stage,
-		.flags = 0,
-		.layout = state->clear_htile_mask_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&state->cache),
-					     1, &pipeline_info, NULL,
-					     &state->clear_htile_mask_pipeline);
-
-	ralloc_free(cs);
-	return result;
+   struct radv_meta_state *state = &device->meta_state;
+   VkResult result;
+   nir_shader *cs = build_clear_htile_mask_shader();
+
+   VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 1,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
+                                           &state->alloc, &state->clear_htile_mask_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo p_layout_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &state->clear_htile_mask_ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges =
+         &(VkPushConstantRange){
+            VK_SHADER_STAGE_COMPUTE_BIT,
+            0,
+            8,
+         },
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc,
+                                      &state->clear_htile_mask_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineShaderStageCreateInfo shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = shader_stage,
+      .flags = 0,
+      .layout = state->clear_htile_mask_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                        radv_pipeline_cache_to_handle(&state->cache), 1,
+                                        &pipeline_info, NULL, &state->clear_htile_mask_pipeline);
+
+   ralloc_free(cs);
+   return result;
 fail:
-	ralloc_free(cs);
-	return result;
+   ralloc_free(cs);
+   return result;
 }
 
 VkResult
 radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
 {
-	VkResult res;
-	struct radv_meta_state *state = &device->meta_state;
-
-	VkPipelineLayoutCreateInfo pl_color_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 0,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
-	};
-
-	res = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					&pl_color_create_info,
-					&device->meta_state.alloc,
-					&device->meta_state.clear_color_p_layout);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineLayoutCreateInfo pl_depth_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 0,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
-	};
-
-	res = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					&pl_depth_create_info,
-					&device->meta_state.alloc,
-					&device->meta_state.clear_depth_p_layout);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 0,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},
-	};
-
-	res = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					&pl_depth_unrestricted_create_info,
-					&device->meta_state.alloc,
-					&device->meta_state.clear_depth_unrestricted_p_layout);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	res = init_meta_clear_htile_mask_state(device);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	if (on_demand)
-		return VK_SUCCESS;
-
-	for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
-		uint32_t samples = 1 << i;
-		for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
-			VkFormat format = radv_fs_key_format_exemplars[j];
-			unsigned fs_key = radv_format_meta_fs_key(device, format);
-			assert(!state->clear[i].color_pipelines[fs_key]);
-
-			res = create_color_renderpass(device, format, samples,
-						      &state->clear[i].render_pass[fs_key]);
-			if (res != VK_SUCCESS)
-				goto fail;
-
-			res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],
-						    state->clear[i].render_pass[fs_key]);
-			if (res != VK_SUCCESS)
-				goto fail;
-
-		}
-
-		res = create_depthstencil_renderpass(device,
-						     samples,
-						     &state->clear[i].depthstencil_rp);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
-			res = create_depthstencil_pipeline(device,
-							   VK_IMAGE_ASPECT_DEPTH_BIT,
-							   samples,
-							   j,
-							   false,
-							   &state->clear[i].depth_only_pipeline[j],
-							   state->clear[i].depthstencil_rp);
-			if (res != VK_SUCCESS)
-				goto fail;
-
-			res = create_depthstencil_pipeline(device,
-							   VK_IMAGE_ASPECT_STENCIL_BIT,
-							   samples,
-							   j,
-							   false,
-							   &state->clear[i].stencil_only_pipeline[j],
-							   state->clear[i].depthstencil_rp);
-			if (res != VK_SUCCESS)
-				goto fail;
-
-			res = create_depthstencil_pipeline(device,
-							   VK_IMAGE_ASPECT_DEPTH_BIT |
-							   VK_IMAGE_ASPECT_STENCIL_BIT,
-							   samples,
-							   j,
-							   false,
-							   &state->clear[i].depthstencil_pipeline[j],
-							   state->clear[i].depthstencil_rp);
-			if (res != VK_SUCCESS)
-				goto fail;
-
-			res = create_depthstencil_pipeline(device,
-							   VK_IMAGE_ASPECT_DEPTH_BIT,
-							   samples,
-							   j,
-							   true,
-							   &state->clear[i].depth_only_unrestricted_pipeline[j],
-							   state->clear[i].depthstencil_rp);
-			if (res != VK_SUCCESS)
-				goto fail;
-
-			res = create_depthstencil_pipeline(device,
-							   VK_IMAGE_ASPECT_STENCIL_BIT,
-							   samples,
-							   j,
-							   true,
-							   &state->clear[i].stencil_only_unrestricted_pipeline[j],
-							   state->clear[i].depthstencil_rp);
-			if (res != VK_SUCCESS)
-				goto fail;
-
-			res = create_depthstencil_pipeline(device,
-							   VK_IMAGE_ASPECT_DEPTH_BIT |
-							   VK_IMAGE_ASPECT_STENCIL_BIT,
-							   samples,
-							   j,
-							   true,
-							   &state->clear[i].depthstencil_unrestricted_pipeline[j],
-							   state->clear[i].depthstencil_rp);
-			if (res != VK_SUCCESS)
-				goto fail;
-		}
-	}
-	return VK_SUCCESS;
+   VkResult res;
+   struct radv_meta_state *state = &device->meta_state;
+
+   VkPipelineLayoutCreateInfo pl_color_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 0,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
+   };
+
+   res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info,
+                                   &device->meta_state.alloc,
+                                   &device->meta_state.clear_color_p_layout);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_depth_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 0,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
+   };
+
+   res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info,
+                                   &device->meta_state.alloc,
+                                   &device->meta_state.clear_depth_p_layout);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 0,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},
+   };
+
+   res = radv_CreatePipelineLayout(radv_device_to_handle(device),
+                                   &pl_depth_unrestricted_create_info, &device->meta_state.alloc,
+                                   &device->meta_state.clear_depth_unrestricted_p_layout);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   res = init_meta_clear_htile_mask_state(device);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   if (on_demand)
+      return VK_SUCCESS;
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
+      uint32_t samples = 1 << i;
+      for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
+         VkFormat format = radv_fs_key_format_exemplars[j];
+         unsigned fs_key = radv_format_meta_fs_key(device, format);
+         assert(!state->clear[i].color_pipelines[fs_key]);
+
+         res =
+            create_color_renderpass(device, format, samples, &state->clear[i].render_pass[fs_key]);
+         if (res != VK_SUCCESS)
+            goto fail;
+
+         res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],
+                                     state->clear[i].render_pass[fs_key]);
+         if (res != VK_SUCCESS)
+            goto fail;
+      }
+
+      res = create_depthstencil_renderpass(device, samples, &state->clear[i].depthstencil_rp);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
+         res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, false,
+                                            &state->clear[i].depth_only_pipeline[j],
+                                            state->clear[i].depthstencil_rp);
+         if (res != VK_SUCCESS)
+            goto fail;
+
+         res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
+                                            &state->clear[i].stencil_only_pipeline[j],
+                                            state->clear[i].depthstencil_rp);
+         if (res != VK_SUCCESS)
+            goto fail;
+
+         res = create_depthstencil_pipeline(
+            device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
+            &state->clear[i].depthstencil_pipeline[j], state->clear[i].depthstencil_rp);
+         if (res != VK_SUCCESS)
+            goto fail;
+
+         res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, true,
+                                            &state->clear[i].depth_only_unrestricted_pipeline[j],
+                                            state->clear[i].depthstencil_rp);
+         if (res != VK_SUCCESS)
+            goto fail;
+
+         res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
+                                            &state->clear[i].stencil_only_unrestricted_pipeline[j],
+                                            state->clear[i].depthstencil_rp);
+         if (res != VK_SUCCESS)
+            goto fail;
+
+         res = create_depthstencil_pipeline(
+            device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
+            &state->clear[i].depthstencil_unrestricted_pipeline[j],
+            state->clear[i].depthstencil_rp);
+         if (res != VK_SUCCESS)
+            goto fail;
+      }
+   }
+   return VK_SUCCESS;
 
 fail:
-	radv_device_finish_meta_clear_state(device);
-	return res;
+   radv_device_finish_meta_clear_state(device);
+   return res;
 }
 
 static uint32_t
 radv_get_cmask_fast_clear_value(const struct radv_image *image)
 {
-	uint32_t value = 0; /* Default value when no DCC. */
+   uint32_t value = 0; /* Default value when no DCC. */
 
-	/* The fast-clear value is different for images that have both DCC and
-	 * CMASK metadata.
-	 */
-	if (radv_image_has_dcc(image)) {
-		/* DCC fast clear with MSAA should clear CMASK to 0xC. */
-		return image->info.samples > 1 ? 0xcccccccc : 0xffffffff;
-	}
+   /* The fast-clear value is different for images that have both DCC and
+    * CMASK metadata.
+    */
+   if (radv_image_has_dcc(image)) {
+      /* DCC fast clear with MSAA should clear CMASK to 0xC. */
+      return image->info.samples > 1 ? 0xcccccccc : 0xffffffff;
+   }
 
-	return value;
+   return value;
 }
 
 uint32_t
-radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
-		 struct radv_image *image,
-		 const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                 const VkImageSubresourceRange *range, uint32_t value)
 {
-	uint64_t offset = image->offset + image->planes[0].surface.cmask_offset;
-	unsigned slice_size = image->planes[0].surface.cmask_slice_size;
-	uint64_t size;
+   uint64_t offset = image->offset + image->planes[0].surface.cmask_offset;
+   unsigned slice_size = image->planes[0].surface.cmask_slice_size;
+   uint64_t size;
 
-	offset += slice_size * range->baseArrayLayer;
-	size = slice_size * radv_get_layerCount(image, range);
+   offset += slice_size * range->baseArrayLayer;
+   size = slice_size * radv_get_layerCount(image, range);
 
-	return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
+   return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
 }
 
-
 uint32_t
-radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
-		 struct radv_image *image,
-		 const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                 const VkImageSubresourceRange *range, uint32_t value)
 {
-	uint64_t offset = image->offset + image->planes[0].surface.fmask_offset;
-	unsigned slice_size = image->planes[0].surface.fmask_slice_size;
-	uint64_t size;
+   uint64_t offset = image->offset + image->planes[0].surface.fmask_offset;
+   unsigned slice_size = image->planes[0].surface.fmask_slice_size;
+   uint64_t size;
 
-	/* MSAA images do not support mipmap levels. */
-	assert(range->baseMipLevel == 0 &&
-	       radv_get_levelCount(image, range) == 1);
+   /* MSAA images do not support mipmap levels. */
+   assert(range->baseMipLevel == 0 && radv_get_levelCount(image, range) == 1);
 
-	offset += slice_size * range->baseArrayLayer;
-	size = slice_size * radv_get_layerCount(image, range);
+   offset += slice_size * range->baseArrayLayer;
+   size = slice_size * radv_get_layerCount(image, range);
 
-	return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
+   return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
 }
 
 uint32_t
-radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
-	       struct radv_image *image,
-	       const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+               const VkImageSubresourceRange *range, uint32_t value)
 {
-	uint32_t level_count = radv_get_levelCount(image, range);
-	uint32_t layer_count = radv_get_layerCount(image, range);
-	uint32_t flush_bits = 0;
-
-	/* Mark the image as being compressed. */
-	radv_update_dcc_metadata(cmd_buffer, image, range, true);
-
-	for (uint32_t l = 0; l < level_count; l++) {
-		uint64_t offset = image->offset + image->planes[0].surface.dcc_offset;
-		uint32_t level = range->baseMipLevel + l;
-		uint64_t size;
-
-		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
-			/* DCC for mipmaps+layers is currently disabled. */
-			offset += image->planes[0].surface.dcc_slice_size * range->baseArrayLayer +
-				  image->planes[0].surface.u.gfx9.dcc_levels[level].offset;
-			size = image->planes[0].surface.u.gfx9.dcc_levels[level].size * layer_count;
-		} else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
-			/* Mipmap levels and layers aren't implemented. */
-			assert(level == 0);
-			size = image->planes[0].surface.dcc_size;
-		} else {
-			const struct legacy_surf_level *surf_level =
-				&image->planes[0].surface.u.legacy.level[level];
-
-			/* If dcc_fast_clear_size is 0 (which might happens for
-			 * mipmaps) the fill buffer operation below is a no-op.
-			 * This can only happen during initialization as the
-			 * fast clear path fallbacks to slow clears if one
-			 * level can't be fast cleared.
-			 */
-			offset += surf_level->dcc_offset +
-				  surf_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
-			size = surf_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range);
-		}
-
-		/* Do not clear this level if it can't be compressed. */
-		if (!size)
-			continue;
-
-		flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, offset,
-					       size, value);
-	}
-
-	return flush_bits;
+   uint32_t level_count = radv_get_levelCount(image, range);
+   uint32_t layer_count = radv_get_layerCount(image, range);
+   uint32_t flush_bits = 0;
+
+   /* Mark the image as being compressed. */
+   radv_update_dcc_metadata(cmd_buffer, image, range, true);
+
+   for (uint32_t l = 0; l < level_count; l++) {
+      uint64_t offset = image->offset + image->planes[0].surface.dcc_offset;
+      uint32_t level = range->baseMipLevel + l;
+      uint64_t size;
+
+      if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+         /* DCC for mipmaps+layers is currently disabled. */
+         offset += image->planes[0].surface.dcc_slice_size * range->baseArrayLayer +
+                   image->planes[0].surface.u.gfx9.dcc_levels[level].offset;
+         size = image->planes[0].surface.u.gfx9.dcc_levels[level].size * layer_count;
+      } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
+         /* Mipmap levels and layers aren't implemented. */
+         assert(level == 0);
+         size = image->planes[0].surface.dcc_size;
+      } else {
+         const struct legacy_surf_level *surf_level =
+            &image->planes[0].surface.u.legacy.level[level];
+
+         /* If dcc_fast_clear_size is 0 (which might happens for
+          * mipmaps) the fill buffer operation below is a no-op.
+          * This can only happen during initialization as the
+          * fast clear path fallbacks to slow clears if one
+          * level can't be fast cleared.
+          */
+         offset +=
+            surf_level->dcc_offset + surf_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
+         size = surf_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range);
+      }
+
+      /* Do not clear this level if it can't be compressed. */
+      if (!size)
+         continue;
+
+      flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
+   }
+
+   return flush_bits;
 }
 
 uint32_t
-radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
-		 const struct radv_image *image,
-		 const VkImageSubresourceRange *range,
-		 uint32_t value)
+radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
+                 const VkImageSubresourceRange *range, uint32_t value)
 {
-	uint32_t level_count = radv_get_levelCount(image, range);
-	uint32_t flush_bits = 0;
-	uint32_t htile_mask;
-
-	htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
-
-	if (level_count != image->info.levels) {
-		assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
-
-		/* Clear individuals levels separately. */
-		for (uint32_t l = 0; l < level_count; l++) {
-			uint32_t level = range->baseMipLevel + l;
-			uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
-					  image->planes[0].surface.u.gfx9.htile_levels[level].offset;
-			uint32_t size = image->planes[0].surface.u.gfx9.htile_levels[level].size;
-
-			/* Do not clear this level if it can be compressed. */
-			if (!size)
-				continue;
-
-			if (htile_mask == UINT_MAX) {
-				/* Clear the whole HTILE buffer. */
-				flush_bits = radv_fill_buffer(cmd_buffer, image, image->bo, offset,
-							      size, value);
-			} else {
-				/* Only clear depth or stencil bytes in the HTILE buffer. */
-				flush_bits = clear_htile_mask(cmd_buffer, image, image->bo, offset,
-							      size, value, htile_mask);
-			}
-		}
-	} else {
-		unsigned layer_count = radv_get_layerCount(image, range);
-		uint64_t size = image->planes[0].surface.htile_slice_size * layer_count;
-		uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
-		                  image->planes[0].surface.htile_slice_size * range->baseArrayLayer;
-
-		if (htile_mask == UINT_MAX) {
-			/* Clear the whole HTILE buffer. */
-			flush_bits = radv_fill_buffer(cmd_buffer, image, image->bo, offset,
-						      size, value);
-		} else {
-			/* Only clear depth or stencil bytes in the HTILE buffer. */
-			flush_bits = clear_htile_mask(cmd_buffer, image, image->bo, offset,
-						      size, value, htile_mask);
-		}
-	}
-
-	return flush_bits;
+   uint32_t level_count = radv_get_levelCount(image, range);
+   uint32_t flush_bits = 0;
+   uint32_t htile_mask;
+
+   htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
+
+   if (level_count != image->info.levels) {
+      assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
+
+      /* Clear individuals levels separately. */
+      for (uint32_t l = 0; l < level_count; l++) {
+         uint32_t level = range->baseMipLevel + l;
+         uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
+                           image->planes[0].surface.u.gfx9.htile_levels[level].offset;
+         uint32_t size = image->planes[0].surface.u.gfx9.htile_levels[level].size;
+
+         /* Do not clear this level if it can be compressed. */
+         if (!size)
+            continue;
+
+         if (htile_mask == UINT_MAX) {
+            /* Clear the whole HTILE buffer. */
+            flush_bits = radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
+         } else {
+            /* Only clear depth or stencil bytes in the HTILE buffer. */
+            flush_bits =
+               clear_htile_mask(cmd_buffer, image, image->bo, offset, size, value, htile_mask);
+         }
+      }
+   } else {
+      unsigned layer_count = radv_get_layerCount(image, range);
+      uint64_t size = image->planes[0].surface.htile_slice_size * layer_count;
+      uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
+                        image->planes[0].surface.htile_slice_size * range->baseArrayLayer;
+
+      if (htile_mask == UINT_MAX) {
+         /* Clear the whole HTILE buffer. */
+         flush_bits = radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
+      } else {
+         /* Only clear depth or stencil bytes in the HTILE buffer. */
+         flush_bits =
+            clear_htile_mask(cmd_buffer, image, image->bo, offset, size, value, htile_mask);
+      }
+   }
+
+   return flush_bits;
 }
 
 enum {
-	RADV_DCC_CLEAR_REG = 0x20202020U,
-	RADV_DCC_CLEAR_MAIN_1 = 0x80808080U,
-	RADV_DCC_CLEAR_SECONDARY_1 = 0x40404040U
+   RADV_DCC_CLEAR_REG = 0x20202020U,
+   RADV_DCC_CLEAR_MAIN_1 = 0x80808080U,
+   RADV_DCC_CLEAR_SECONDARY_1 = 0x40404040U
 };
 
-static void vi_get_fast_clear_parameters(struct radv_device *device,
-					 VkFormat image_format,
-					 VkFormat view_format,
-					 const VkClearColorValue *clear_value,
-					 uint32_t* reset_value,
-					 bool *can_avoid_fast_clear_elim)
+static void
+vi_get_fast_clear_parameters(struct radv_device *device, VkFormat image_format,
+                             VkFormat view_format, const VkClearColorValue *clear_value,
+                             uint32_t *reset_value, bool *can_avoid_fast_clear_elim)
 {
-	bool values[4] = {0};
-	int extra_channel;
-	bool main_value = false;
-	bool extra_value = false;
-	bool has_color = false;
-	bool has_alpha = false;
-	*can_avoid_fast_clear_elim = false;
-
-	*reset_value = RADV_DCC_CLEAR_REG;
-
-	const struct util_format_description *desc = vk_format_description(view_format);
-	if (view_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
-	    view_format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
-	    view_format == VK_FORMAT_B5G6R5_UNORM_PACK16)
-		extra_channel = -1;
-	else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
-		if (vi_alpha_is_on_msb(device, view_format))
-			extra_channel = desc->nr_channels - 1;
-		else
-			extra_channel = 0;
-	} else
-		return;
-
-	for (int i = 0; i < 4; i++) {
-		int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
-		if (desc->swizzle[i] < PIPE_SWIZZLE_X ||
-		    desc->swizzle[i] > PIPE_SWIZZLE_W)
-			continue;
-
-		if (desc->channel[i].pure_integer &&
-		    desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
-			/* Use the maximum value for clamping the clear color. */
-			int max = u_bit_consecutive(0, desc->channel[i].size - 1);
-
-			values[i] = clear_value->int32[i] != 0;
-			if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
-				return;
-		} else if (desc->channel[i].pure_integer &&
-			   desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
-			/* Use the maximum value for clamping the clear color. */
-			unsigned max = u_bit_consecutive(0, desc->channel[i].size);
-
-			values[i] = clear_value->uint32[i] != 0U;
-			if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
-				return;
-		} else {
-			values[i] = clear_value->float32[i] != 0.0F;
-			if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
-				return;
-		}
-
-		if (index == extra_channel) {
-			extra_value = values[i];
-			has_alpha = true;
-		} else {
-			main_value = values[i];
-			has_color = true;
-		}
-	}
-
-	/* If alpha isn't present, make it the same as color, and vice versa. */
-	if (!has_alpha)
-		extra_value = main_value;
-	else if (!has_color)
-		main_value = extra_value;
-
-	for (int i = 0; i < 4; ++i)
-		if (values[i] != main_value &&
-		    desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
-		    desc->swizzle[i] >= PIPE_SWIZZLE_X &&
-		    desc->swizzle[i] <= PIPE_SWIZZLE_W)
-			return;
-
-	*can_avoid_fast_clear_elim = true;
-	*reset_value = 0;
-	if (main_value)
-		*reset_value |= RADV_DCC_CLEAR_MAIN_1;
-
-	if (extra_value)
-		*reset_value |= RADV_DCC_CLEAR_SECONDARY_1;
-	return;
+   bool values[4] = {0};
+   int extra_channel;
+   bool main_value = false;
+   bool extra_value = false;
+   bool has_color = false;
+   bool has_alpha = false;
+   *can_avoid_fast_clear_elim = false;
+
+   *reset_value = RADV_DCC_CLEAR_REG;
+
+   const struct util_format_description *desc = vk_format_description(view_format);
+   if (view_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
+       view_format == VK_FORMAT_R5G6B5_UNORM_PACK16 || view_format == VK_FORMAT_B5G6R5_UNORM_PACK16)
+      extra_channel = -1;
+   else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
+      if (vi_alpha_is_on_msb(device, view_format))
+         extra_channel = desc->nr_channels - 1;
+      else
+         extra_channel = 0;
+   } else
+      return;
+
+   for (int i = 0; i < 4; i++) {
+      int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
+      if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)
+         continue;
+
+      if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+         /* Use the maximum value for clamping the clear color. */
+         int max = u_bit_consecutive(0, desc->channel[i].size - 1);
+
+         values[i] = clear_value->int32[i] != 0;
+         if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
+            return;
+      } else if (desc->channel[i].pure_integer &&
+                 desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         /* Use the maximum value for clamping the clear color. */
+         unsigned max = u_bit_consecutive(0, desc->channel[i].size);
+
+         values[i] = clear_value->uint32[i] != 0U;
+         if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
+            return;
+      } else {
+         values[i] = clear_value->float32[i] != 0.0F;
+         if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
+            return;
+      }
+
+      if (index == extra_channel) {
+         extra_value = values[i];
+         has_alpha = true;
+      } else {
+         main_value = values[i];
+         has_color = true;
+      }
+   }
+
+   /* If alpha isn't present, make it the same as color, and vice versa. */
+   if (!has_alpha)
+      extra_value = main_value;
+   else if (!has_color)
+      main_value = extra_value;
+
+   for (int i = 0; i < 4; ++i)
+      if (values[i] != main_value && desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
+          desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W)
+         return;
+
+   *can_avoid_fast_clear_elim = true;
+   *reset_value = 0;
+   if (main_value)
+      *reset_value |= RADV_DCC_CLEAR_MAIN_1;
+
+   if (extra_value)
+      *reset_value |= RADV_DCC_CLEAR_SECONDARY_1;
+   return;
 }
 
 static bool
-radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
-			  const struct radv_image_view *iview,
-			  VkImageLayout image_layout,
-			  bool in_render_loop,
-			  const VkClearRect *clear_rect,
-			  VkClearColorValue clear_value,
-			  uint32_t view_mask)
+radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+                          VkImageLayout image_layout, bool in_render_loop,
+                          const VkClearRect *clear_rect, VkClearColorValue clear_value,
+                          uint32_t view_mask)
 {
-	uint32_t clear_color[2];
-
-	if (!iview || !iview->support_fast_clear)
-		return false;
-
-	if (!radv_layout_can_fast_clear(cmd_buffer->device, iview->image, image_layout, in_render_loop,
-	                                radv_image_queue_family_mask(iview->image,
-	                                                             cmd_buffer->queue_family_index,
-	                                                             cmd_buffer->queue_family_index)))
-		return false;
-
-	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
-	    clear_rect->rect.extent.width != iview->image->info.width ||
-	    clear_rect->rect.extent.height != iview->image->info.height)
-		return false;
-
-	if (view_mask && (iview->image->info.array_size >= 32 ||
-	                 (1u << iview->image->info.array_size) - 1u != view_mask))
-		return false;
-	if (!view_mask && clear_rect->baseArrayLayer != 0)
-		return false;
-	if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
-		return false;
-
-	/* DCC */
-	if (!radv_format_pack_clear_color(iview->vk_format,
-					  clear_color, &clear_value))
-		return false;
-
-	if (!radv_image_has_clear_value(iview->image) &&
-	    (clear_color[0] != 0 || clear_color[1] != 0))
-		return false;
-
-	if (radv_dcc_enabled(iview->image, iview->base_mip)) {
-		bool can_avoid_fast_clear_elim;
-		uint32_t reset_value;
-
-		vi_get_fast_clear_parameters(cmd_buffer->device,
-					     iview->image->vk_format,
-					     iview->vk_format,
-					     &clear_value, &reset_value,
-					     &can_avoid_fast_clear_elim);
-
-		if (iview->image->info.samples > 1) {
-			/* DCC fast clear with MSAA should clear CMASK. */
-			/* FIXME: This doesn't work for now. There is a
-			 * hardware bug with fast clears and DCC for MSAA
-			 * textures. AMDVLK has a workaround but it doesn't
-			 * seem to work here. Note that we might emit useless
-			 * CB flushes but that shouldn't matter.
-			 */
-			if (!can_avoid_fast_clear_elim)
-				return false;
-		}
-
-		if (iview->image->info.levels > 1 &&
-		    cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
-			for (uint32_t l = 0; l < iview->level_count; l++) {
-				uint32_t level = iview->base_mip + l;
-				struct legacy_surf_level *surf_level =
-					&iview->image->planes[0].surface.u.legacy.level[level];
-
-				/* Do not fast clears if one level can't be
-				 * fast cleared.
-				 */
-				if (!surf_level->dcc_fast_clear_size)
-					return false;
-			}
-		}
-	}
-
-	return true;
+   uint32_t clear_color[2];
+
+   if (!iview || !iview->support_fast_clear)
+      return false;
+
+   if (!radv_layout_can_fast_clear(
+          cmd_buffer->device, iview->image, image_layout, in_render_loop,
+          radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,
+                                       cmd_buffer->queue_family_index)))
+      return false;
+
+   if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
+       clear_rect->rect.extent.width != iview->image->info.width ||
+       clear_rect->rect.extent.height != iview->image->info.height)
+      return false;
+
+   if (view_mask && (iview->image->info.array_size >= 32 ||
+                     (1u << iview->image->info.array_size) - 1u != view_mask))
+      return false;
+   if (!view_mask && clear_rect->baseArrayLayer != 0)
+      return false;
+   if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
+      return false;
+
+   /* DCC */
+   if (!radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value))
+      return false;
+
+   if (!radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0))
+      return false;
+
+   if (radv_dcc_enabled(iview->image, iview->base_mip)) {
+      bool can_avoid_fast_clear_elim;
+      uint32_t reset_value;
+
+      vi_get_fast_clear_parameters(cmd_buffer->device, iview->image->vk_format, iview->vk_format,
+                                   &clear_value, &reset_value, &can_avoid_fast_clear_elim);
+
+      if (iview->image->info.samples > 1) {
+         /* DCC fast clear with MSAA should clear CMASK. */
+         /* FIXME: This doesn't work for now. There is a
+          * hardware bug with fast clears and DCC for MSAA
+          * textures. AMDVLK has a workaround but it doesn't
+          * seem to work here. Note that we might emit useless
+          * CB flushes but that shouldn't matter.
+          */
+         if (!can_avoid_fast_clear_elim)
+            return false;
+      }
+
+      if (iview->image->info.levels > 1 &&
+          cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
+         for (uint32_t l = 0; l < iview->level_count; l++) {
+            uint32_t level = iview->base_mip + l;
+            struct legacy_surf_level *surf_level =
+               &iview->image->planes[0].surface.u.legacy.level[level];
+
+            /* Do not fast clears if one level can't be
+             * fast cleared.
+             */
+            if (!surf_level->dcc_fast_clear_size)
+               return false;
+         }
+      }
+   }
+
+   return true;
 }
 
-
 static void
-radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
-		      const struct radv_image_view *iview,
-		      const VkClearAttachment *clear_att,
-		      uint32_t subpass_att,
-		      enum radv_cmd_flush_bits *pre_flush,
-		      enum radv_cmd_flush_bits *post_flush)
+radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+                      const VkClearAttachment *clear_att, uint32_t subpass_att,
+                      enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
 {
-	VkClearColorValue clear_value = clear_att->clearValue.color;
-	uint32_t clear_color[2], flush_bits = 0;
-	uint32_t cmask_clear_value;
-	VkImageSubresourceRange range = {
-		.aspectMask = iview->aspect_mask,
-		.baseMipLevel = iview->base_mip,
-		.levelCount = iview->level_count,
-		.baseArrayLayer = iview->base_layer,
-		.layerCount = iview->layer_count,
-	};
-
-	if (pre_flush) {
-		enum radv_cmd_flush_bits bits =
-			radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, iview->image) |
-			radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, iview->image);
-		cmd_buffer->state.flush_bits |= bits & ~ *pre_flush;
-		*pre_flush |= cmd_buffer->state.flush_bits;
-	}
-
-	/* DCC */
-	radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value);
-
-	cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
-
-	/* clear cmask buffer */
-	bool need_decompress_pass = false;
-	if (radv_dcc_enabled(iview->image, iview->base_mip)) {
-		uint32_t reset_value;
-		bool can_avoid_fast_clear_elim;
-
-		vi_get_fast_clear_parameters(cmd_buffer->device,
-					     iview->image->vk_format,
-					     iview->vk_format,
-					     &clear_value, &reset_value,
-					     &can_avoid_fast_clear_elim);
-
-		if (radv_image_has_cmask(iview->image)) {
-			flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
-						      &range, cmask_clear_value);
-		}
-
-		if (!can_avoid_fast_clear_elim)
-			need_decompress_pass = true;
-
-		flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range,
-					     reset_value);
-	} else {
-		flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
-					      &range, cmask_clear_value);
-
-		/* Fast clearing with CMASK should always be eliminated. */
-		need_decompress_pass = true;
-	}
-
-	if (post_flush) {
-		*post_flush |= flush_bits;
-	}
-
-	/* Update the FCE predicate to perform a fast-clear eliminate. */
-	radv_update_fce_metadata(cmd_buffer, iview->image, &range,
-				 need_decompress_pass);
-
-	radv_update_color_clear_metadata(cmd_buffer, iview, subpass_att,
-					 clear_color);
+   VkClearColorValue clear_value = clear_att->clearValue.color;
+   uint32_t clear_color[2], flush_bits = 0;
+   uint32_t cmask_clear_value;
+   VkImageSubresourceRange range = {
+      .aspectMask = iview->aspect_mask,
+      .baseMipLevel = iview->base_mip,
+      .levelCount = iview->level_count,
+      .baseArrayLayer = iview->base_layer,
+      .layerCount = iview->layer_count,
+   };
+
+   if (pre_flush) {
+      enum radv_cmd_flush_bits bits =
+         radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, iview->image) |
+         radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, iview->image);
+      cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
+      *pre_flush |= cmd_buffer->state.flush_bits;
+   }
+
+   /* DCC */
+   radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value);
+
+   cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
+
+   /* clear cmask buffer */
+   bool need_decompress_pass = false;
+   if (radv_dcc_enabled(iview->image, iview->base_mip)) {
+      uint32_t reset_value;
+      bool can_avoid_fast_clear_elim;
+
+      vi_get_fast_clear_parameters(cmd_buffer->device, iview->image->vk_format, iview->vk_format,
+                                   &clear_value, &reset_value, &can_avoid_fast_clear_elim);
+
+      if (radv_image_has_cmask(iview->image)) {
+         flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
+      }
+
+      if (!can_avoid_fast_clear_elim)
+         need_decompress_pass = true;
+
+      flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range, reset_value);
+   } else {
+      flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
+
+      /* Fast clearing with CMASK should always be eliminated. */
+      need_decompress_pass = true;
+   }
+
+   if (post_flush) {
+      *post_flush |= flush_bits;
+   }
+
+   /* Update the FCE predicate to perform a fast-clear eliminate. */
+   radv_update_fce_metadata(cmd_buffer, iview->image, &range, need_decompress_pass);
+
+   radv_update_color_clear_metadata(cmd_buffer, iview, subpass_att, clear_color);
 }
 
 /**
  * The parameters mean that same as those in vkCmdClearAttachments.
  */
 static void
-emit_clear(struct radv_cmd_buffer *cmd_buffer,
-           const VkClearAttachment *clear_att,
-           const VkClearRect *clear_rect,
-           enum radv_cmd_flush_bits *pre_flush,
-           enum radv_cmd_flush_bits *post_flush,
-           uint32_t view_mask,
-	   bool ds_resolve_clear)
+emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
+           const VkClearRect *clear_rect, enum radv_cmd_flush_bits *pre_flush,
+           enum radv_cmd_flush_bits *post_flush, uint32_t view_mask, bool ds_resolve_clear)
 {
-	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	VkImageAspectFlags aspects = clear_att->aspectMask;
-
-	if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
-		const uint32_t subpass_att = clear_att->colorAttachment;
-		assert(subpass_att < subpass->color_count);
-		const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
-		if (pass_att == VK_ATTACHMENT_UNUSED)
-			return;
-
-		VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
-		bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop;
-		const struct radv_image_view *iview = fb ? cmd_buffer->state.attachments[pass_att].iview : NULL;
-		VkClearColorValue clear_value = clear_att->clearValue.color;
-
-		if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop,
-					      clear_rect, clear_value, view_mask)) {
-			radv_fast_clear_color(cmd_buffer, iview, clear_att,
-					      subpass_att, pre_flush,
-					      post_flush);
-		} else {
-			emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
-		}
-	} else {
-		struct radv_subpass_attachment *ds_att = subpass->depth_stencil_attachment;
-
-		if (ds_resolve_clear)
-			ds_att = subpass->ds_resolve_attachment;
-
-		if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED)
-			return;
-
-		VkImageLayout image_layout = ds_att->layout;
-		bool in_render_loop = ds_att->in_render_loop;
-		const struct radv_image_view *iview = fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL;
-		VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
-
-		assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
-				  VK_IMAGE_ASPECT_STENCIL_BIT));
-
-		if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout,
-		                              in_render_loop, aspects, clear_rect,
-		                              clear_value, view_mask)) {
-			radv_fast_clear_depth(cmd_buffer, iview, clear_att,
-			                      pre_flush, post_flush);
-		} else {
-			emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect,
-						ds_att, view_mask);
-		}
-	}
+   const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   VkImageAspectFlags aspects = clear_att->aspectMask;
+
+   if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
+      const uint32_t subpass_att = clear_att->colorAttachment;
+      assert(subpass_att < subpass->color_count);
+      const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
+      if (pass_att == VK_ATTACHMENT_UNUSED)
+         return;
+
+      VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
+      bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop;
+      const struct radv_image_view *iview =
+         fb ? cmd_buffer->state.attachments[pass_att].iview : NULL;
+      VkClearColorValue clear_value = clear_att->clearValue.color;
+
+      if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop, clear_rect,
+                                    clear_value, view_mask)) {
+         radv_fast_clear_color(cmd_buffer, iview, clear_att, subpass_att, pre_flush, post_flush);
+      } else {
+         emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
+      }
+   } else {
+      struct radv_subpass_attachment *ds_att = subpass->depth_stencil_attachment;
+
+      if (ds_resolve_clear)
+         ds_att = subpass->ds_resolve_attachment;
+
+      if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED)
+         return;
+
+      VkImageLayout image_layout = ds_att->layout;
+      bool in_render_loop = ds_att->in_render_loop;
+      const struct radv_image_view *iview =
+         fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL;
+      VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+
+      assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
+
+      if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout, in_render_loop, aspects,
+                                    clear_rect, clear_value, view_mask)) {
+         radv_fast_clear_depth(cmd_buffer, iview, clear_att, pre_flush, post_flush);
+      } else {
+         emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect, ds_att, view_mask);
+      }
+   }
 }
 
 static inline bool
 radv_attachment_needs_clear(struct radv_cmd_state *cmd_state, uint32_t a)
 {
-	uint32_t view_mask = cmd_state->subpass->view_mask;
-	return (a != VK_ATTACHMENT_UNUSED &&
-		cmd_state->attachments[a].pending_clear_aspects &&
-		(!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));
+   uint32_t view_mask = cmd_state->subpass->view_mask;
+   return (a != VK_ATTACHMENT_UNUSED && cmd_state->attachments[a].pending_clear_aspects &&
+           (!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));
 }
 
 static bool
 radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_cmd_state *cmd_state = &cmd_buffer->state;
-	uint32_t a;
+   struct radv_cmd_state *cmd_state = &cmd_buffer->state;
+   uint32_t a;
 
-	if (!cmd_state->subpass)
-		return false;
+   if (!cmd_state->subpass)
+      return false;
 
-	for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
-		a = cmd_state->subpass->color_attachments[i].attachment;
-		if (radv_attachment_needs_clear(cmd_state, a))
-			return true;
-	}
+   for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
+      a = cmd_state->subpass->color_attachments[i].attachment;
+      if (radv_attachment_needs_clear(cmd_state, a))
+         return true;
+   }
 
-	if (cmd_state->subpass->depth_stencil_attachment) {
-		a = cmd_state->subpass->depth_stencil_attachment->attachment;
-		if (radv_attachment_needs_clear(cmd_state, a))
-			return true;
-	}
+   if (cmd_state->subpass->depth_stencil_attachment) {
+      a = cmd_state->subpass->depth_stencil_attachment->attachment;
+      if (radv_attachment_needs_clear(cmd_state, a))
+         return true;
+   }
 
-	if (!cmd_state->subpass->ds_resolve_attachment)
-		return false;
+   if (!cmd_state->subpass->ds_resolve_attachment)
+      return false;
 
-	a = cmd_state->subpass->ds_resolve_attachment->attachment;
-	return radv_attachment_needs_clear(cmd_state, a);
+   a = cmd_state->subpass->ds_resolve_attachment->attachment;
+   return radv_attachment_needs_clear(cmd_state, a);
 }
 
 static void
 radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
-			      struct radv_attachment_state *attachment,
-			      const VkClearAttachment *clear_att,
-			      enum radv_cmd_flush_bits *pre_flush,
-			      enum radv_cmd_flush_bits *post_flush,
-			      bool ds_resolve_clear)
+                              struct radv_attachment_state *attachment,
+                              const VkClearAttachment *clear_att,
+                              enum radv_cmd_flush_bits *pre_flush,
+                              enum radv_cmd_flush_bits *post_flush, bool ds_resolve_clear)
 {
-	struct radv_cmd_state *cmd_state = &cmd_buffer->state;
-	uint32_t view_mask = cmd_state->subpass->view_mask;
+   struct radv_cmd_state *cmd_state = &cmd_buffer->state;
+   uint32_t view_mask = cmd_state->subpass->view_mask;
 
-	VkClearRect clear_rect = {
-		.rect = cmd_state->render_area,
-		.baseArrayLayer = 0,
-		.layerCount = cmd_state->framebuffer->layers,
-	};
+   VkClearRect clear_rect = {
+      .rect = cmd_state->render_area,
+      .baseArrayLayer = 0,
+      .layerCount = cmd_state->framebuffer->layers,
+   };
 
-	radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
+   radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
 
-	emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
-		   view_mask & ~attachment->cleared_views, ds_resolve_clear);
-	if (view_mask)
-		attachment->cleared_views |= view_mask;
-	else
-		attachment->pending_clear_aspects = 0;
+   emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
+              view_mask & ~attachment->cleared_views, ds_resolve_clear);
+   if (view_mask)
+      attachment->cleared_views |= view_mask;
+   else
+      attachment->pending_clear_aspects = 0;
 
-	radv_describe_end_render_pass_clear(cmd_buffer);
+   radv_describe_end_render_pass_clear(cmd_buffer);
 }
 
 /**
@@ -1895,456 +1721,418 @@ radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
 void
 radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_cmd_state *cmd_state = &cmd_buffer->state;
-	struct radv_meta_saved_state saved_state;
-	enum radv_cmd_flush_bits pre_flush = 0;
-	enum radv_cmd_flush_bits post_flush = 0;
-
-	if (!radv_subpass_needs_clear(cmd_buffer))
-		return;
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS);
-
-	for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
-		uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
-
-		if (!radv_attachment_needs_clear(cmd_state, a))
-			continue;
-
-		assert(cmd_state->attachments[a].pending_clear_aspects ==
-		       VK_IMAGE_ASPECT_COLOR_BIT);
-
-		VkClearAttachment clear_att = {
-			.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-			.colorAttachment = i, /* Use attachment index relative to subpass */
-			.clearValue = cmd_state->attachments[a].clear_value,
-		};
-
-		radv_subpass_clear_attachment(cmd_buffer,
-					      &cmd_state->attachments[a],
-					      &clear_att, &pre_flush,
-					      &post_flush, false);
-	}
-
-	if (cmd_state->subpass->depth_stencil_attachment) {
-		uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;
-		if (radv_attachment_needs_clear(cmd_state, ds)) {
-			VkClearAttachment clear_att = {
-				.aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
-				.clearValue = cmd_state->attachments[ds].clear_value,
-			};
-
-			radv_subpass_clear_attachment(cmd_buffer,
-						      &cmd_state->attachments[ds],
-						      &clear_att, &pre_flush,
-						      &post_flush, false);
-		}
-	}
-
-	if (cmd_state->subpass->ds_resolve_attachment) {
-		uint32_t ds_resolve = cmd_state->subpass->ds_resolve_attachment->attachment;
-		if (radv_attachment_needs_clear(cmd_state, ds_resolve)) {
-			VkClearAttachment clear_att = {
-				.aspectMask = cmd_state->attachments[ds_resolve].pending_clear_aspects,
-				.clearValue = cmd_state->attachments[ds_resolve].clear_value,
-			};
-
-			radv_subpass_clear_attachment(cmd_buffer,
-						      &cmd_state->attachments[ds_resolve],
-						      &clear_att, &pre_flush,
-						      &post_flush, true);
-		}
-	}
-
-	radv_meta_restore(&saved_state, cmd_buffer);
-	cmd_buffer->state.flush_bits |= post_flush;
+   struct radv_cmd_state *cmd_state = &cmd_buffer->state;
+   struct radv_meta_saved_state saved_state;
+   enum radv_cmd_flush_bits pre_flush = 0;
+   enum radv_cmd_flush_bits post_flush = 0;
+
+   if (!radv_subpass_needs_clear(cmd_buffer))
+      return;
+
+   radv_meta_save(&saved_state, cmd_buffer,
+                  RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
+
+   for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
+      uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
+
+      if (!radv_attachment_needs_clear(cmd_state, a))
+         continue;
+
+      assert(cmd_state->attachments[a].pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+
+      VkClearAttachment clear_att = {
+         .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+         .colorAttachment = i, /* Use attachment index relative to subpass */
+         .clearValue = cmd_state->attachments[a].clear_value,
+      };
+
+      radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[a], &clear_att, &pre_flush,
+                                    &post_flush, false);
+   }
+
+   if (cmd_state->subpass->depth_stencil_attachment) {
+      uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;
+      if (radv_attachment_needs_clear(cmd_state, ds)) {
+         VkClearAttachment clear_att = {
+            .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
+            .clearValue = cmd_state->attachments[ds].clear_value,
+         };
+
+         radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds], &clear_att,
+                                       &pre_flush, &post_flush, false);
+      }
+   }
+
+   if (cmd_state->subpass->ds_resolve_attachment) {
+      uint32_t ds_resolve = cmd_state->subpass->ds_resolve_attachment->attachment;
+      if (radv_attachment_needs_clear(cmd_state, ds_resolve)) {
+         VkClearAttachment clear_att = {
+            .aspectMask = cmd_state->attachments[ds_resolve].pending_clear_aspects,
+            .clearValue = cmd_state->attachments[ds_resolve].clear_value,
+         };
+
+         radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds_resolve], &clear_att,
+                                       &pre_flush, &post_flush, true);
+      }
+   }
+
+   radv_meta_restore(&saved_state, cmd_buffer);
+   cmd_buffer->state.flush_bits |= post_flush;
 }
 
 static void
-radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
-		       struct radv_image *image,
-		       VkImageLayout image_layout,
-		       const VkImageSubresourceRange *range,
-		       VkFormat format, int level, int layer,
-		       const VkClearValue *clear_val)
+radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                       VkImageLayout image_layout, const VkImageSubresourceRange *range,
+                       VkFormat format, int level, int layer, const VkClearValue *clear_val)
 {
-	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
-	struct radv_image_view iview;
-	uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
-	uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
-
-	radv_image_view_init(&iview, cmd_buffer->device,
-			     &(VkImageViewCreateInfo) {
-				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-					     .image = radv_image_to_handle(image),
-					     .viewType = radv_meta_get_view_type(image),
-					     .format = format,
-					     .subresourceRange = {
-					     .aspectMask = range->aspectMask,
-					     .baseMipLevel = range->baseMipLevel + level,
-					     .levelCount = 1,
-					     .baseArrayLayer = range->baseArrayLayer + layer,
-					     .layerCount = 1
-				     },
-			     }, NULL);
-
-	VkFramebuffer fb;
-	radv_CreateFramebuffer(device_h,
-			       &(VkFramebufferCreateInfo) {
-				       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
-					       .attachmentCount = 1,
-					       .pAttachments = (VkImageView[]) {
-					       radv_image_view_to_handle(&iview),
-				       },
-					       .width = width,
-					       .height = height,
-					       .layers = 1
-			       },
-			       &cmd_buffer->pool->alloc,
-			       &fb);
-
-	VkAttachmentDescription2 att_desc = {
-		.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-		.format = iview.vk_format,
-		.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-		.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-		.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-		.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
-		.initialLayout = image_layout,
-		.finalLayout = image_layout,
-	};
-
-	VkSubpassDescription2 subpass_desc = {
-		.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-		.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-		.inputAttachmentCount = 0,
-		.colorAttachmentCount = 0,
-		.pColorAttachments = NULL,
-		.pResolveAttachments = NULL,
-		.pDepthStencilAttachment = NULL,
-		.preserveAttachmentCount = 0,
-		.pPreserveAttachments = NULL,
-	};
-
-	const VkAttachmentReference2 att_ref = {
-		.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-		.attachment = 0,
-		.layout = image_layout,
-	};
-
-	if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
-		subpass_desc.colorAttachmentCount = 1;
-		subpass_desc.pColorAttachments = &att_ref;
-	} else {
-		subpass_desc.pDepthStencilAttachment = &att_ref;
-	}
-
-	VkRenderPass pass;
-	radv_CreateRenderPass2(device_h,
-			      &(VkRenderPassCreateInfo2) {
-				      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-					      .attachmentCount = 1,
-					      .pAttachments = &att_desc,
-					      .subpassCount = 1,
-					      .pSubpasses = &subpass_desc,
-					      .dependencyCount = 2,
-						.pDependencies = (VkSubpassDependency2[]) {
-							{
-								.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-								.srcSubpass = VK_SUBPASS_EXTERNAL,
-								.dstSubpass = 0,
-								.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-								.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-								.srcAccessMask = 0,
-								.dstAccessMask = 0,
-								.dependencyFlags = 0
-							},
-							{
-								.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-								.srcSubpass = 0,
-								.dstSubpass = VK_SUBPASS_EXTERNAL,
-								.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-								.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-								.srcAccessMask = 0,
-								.dstAccessMask = 0,
-								.dependencyFlags = 0
-							}
-						}
-					},
-			      &cmd_buffer->pool->alloc,
-			      &pass);
-
-	radv_cmd_buffer_begin_render_pass(cmd_buffer,
-					  &(VkRenderPassBeginInfo) {
-						.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-						.renderArea = {
-						.offset = { 0, 0, },
-						.extent = {
-							.width = width,
-							.height = height,
-							},
-						},
-						.renderPass = pass,
-						.framebuffer = fb,
-						.clearValueCount = 0,
-						.pClearValues = NULL,
-					 }, NULL);
-
-	radv_cmd_buffer_set_subpass(cmd_buffer,
-				    &cmd_buffer->state.pass->subpasses[0]);
-
-	VkClearAttachment clear_att = {
-		.aspectMask = range->aspectMask,
-		.colorAttachment = 0,
-		.clearValue = *clear_val,
-	};
-
-	VkClearRect clear_rect = {
-		.rect = {
-			.offset = { 0, 0 },
-			.extent = { width, height },
-		},
-		.baseArrayLayer = range->baseArrayLayer,
-		.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
-	};
-
-	emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);
-
-	radv_cmd_buffer_end_render_pass(cmd_buffer);
-	radv_DestroyRenderPass(device_h, pass,
-			       &cmd_buffer->pool->alloc);
-	radv_DestroyFramebuffer(device_h, fb,
-				&cmd_buffer->pool->alloc);
+   VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
+   struct radv_image_view iview;
+   uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
+   uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
+
+   radv_image_view_init(&iview, cmd_buffer->device,
+                        &(VkImageViewCreateInfo){
+                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                           .image = radv_image_to_handle(image),
+                           .viewType = radv_meta_get_view_type(image),
+                           .format = format,
+                           .subresourceRange = {.aspectMask = range->aspectMask,
+                                                .baseMipLevel = range->baseMipLevel + level,
+                                                .levelCount = 1,
+                                                .baseArrayLayer = range->baseArrayLayer + layer,
+                                                .layerCount = 1},
+                        },
+                        NULL);
+
+   VkFramebuffer fb;
+   radv_CreateFramebuffer(
+      device_h,
+      &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+                                 .attachmentCount = 1,
+                                 .pAttachments =
+                                    (VkImageView[]){
+                                       radv_image_view_to_handle(&iview),
+                                    },
+                                 .width = width,
+                                 .height = height,
+                                 .layers = 1},
+      &cmd_buffer->pool->alloc, &fb);
+
+   VkAttachmentDescription2 att_desc = {
+      .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+      .format = iview.vk_format,
+      .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+      .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+      .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+      .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+      .initialLayout = image_layout,
+      .finalLayout = image_layout,
+   };
+
+   VkSubpassDescription2 subpass_desc = {
+      .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+      .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+      .inputAttachmentCount = 0,
+      .colorAttachmentCount = 0,
+      .pColorAttachments = NULL,
+      .pResolveAttachments = NULL,
+      .pDepthStencilAttachment = NULL,
+      .preserveAttachmentCount = 0,
+      .pPreserveAttachments = NULL,
+   };
+
+   const VkAttachmentReference2 att_ref = {
+      .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+      .attachment = 0,
+      .layout = image_layout,
+   };
+
+   if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+      subpass_desc.colorAttachmentCount = 1;
+      subpass_desc.pColorAttachments = &att_ref;
+   } else {
+      subpass_desc.pDepthStencilAttachment = &att_ref;
+   }
+
+   VkRenderPass pass;
+   radv_CreateRenderPass2(
+      device_h,
+      &(VkRenderPassCreateInfo2){
+         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+         .attachmentCount = 1,
+         .pAttachments = &att_desc,
+         .subpassCount = 1,
+         .pSubpasses = &subpass_desc,
+         .dependencyCount = 2,
+         .pDependencies =
+            (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                      .dstSubpass = 0,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0},
+                                     {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = 0,
+                                      .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0}}},
+      &cmd_buffer->pool->alloc, &pass);
+
+   radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                     &(VkRenderPassBeginInfo){
+                                        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                                        .renderArea =
+                                           {
+                                              .offset =
+                                                 {
+                                                    0,
+                                                    0,
+                                                 },
+                                              .extent =
+                                                 {
+                                                    .width = width,
+                                                    .height = height,
+                                                 },
+                                           },
+                                        .renderPass = pass,
+                                        .framebuffer = fb,
+                                        .clearValueCount = 0,
+                                        .pClearValues = NULL,
+                                     },
+                                     NULL);
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+   VkClearAttachment clear_att = {
+      .aspectMask = range->aspectMask,
+      .colorAttachment = 0,
+      .clearValue = *clear_val,
+   };
+
+   VkClearRect clear_rect = {
+      .rect =
+         {
+            .offset = {0, 0},
+            .extent = {width, height},
+         },
+      .baseArrayLayer = range->baseArrayLayer,
+      .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+   };
+
+   emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);
+
+   radv_cmd_buffer_end_render_pass(cmd_buffer);
+   radv_DestroyRenderPass(device_h, pass, &cmd_buffer->pool->alloc);
+   radv_DestroyFramebuffer(device_h, fb, &cmd_buffer->pool->alloc);
 }
 
 /**
  * Return TRUE if a fast color or depth clear has been performed.
  */
 static bool
-radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer,
-		      struct radv_image *image,
-		      VkFormat format,
-		      VkImageLayout image_layout,
-		      bool in_render_loop,
-		      const VkImageSubresourceRange *range,
-		      const VkClearValue *clear_val)
+radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format,
+                      VkImageLayout image_layout, bool in_render_loop,
+                      const VkImageSubresourceRange *range, const VkClearValue *clear_val)
 {
-	struct radv_image_view iview;
-
-	radv_image_view_init(&iview, cmd_buffer->device,
-			     &(VkImageViewCreateInfo) {
-					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-					.image = radv_image_to_handle(image),
-					.viewType = radv_meta_get_view_type(image),
-					.format = image->vk_format,
-					.subresourceRange = {
-					.aspectMask = range->aspectMask,
-					.baseMipLevel = range->baseMipLevel,
-					.levelCount = range->levelCount,
-					.baseArrayLayer = range->baseArrayLayer,
-					.layerCount = range->layerCount,
-				   },
-			     }, NULL);
-
-	VkClearRect clear_rect = {
-		.rect = {
-			.offset = { 0, 0 },
-			.extent = {
-				radv_minify(image->info.width, range->baseMipLevel),
-				radv_minify(image->info.height, range->baseMipLevel),
-			},
-		},
-		.baseArrayLayer = range->baseArrayLayer,
-		.layerCount = range->layerCount,
-	};
-
-	VkClearAttachment clear_att = {
-		.aspectMask = range->aspectMask,
-		.colorAttachment = 0,
-		.clearValue = *clear_val,
-	};
-
-	if (vk_format_is_color(format)) {
-		if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout,
-					      in_render_loop, &clear_rect,
-					      clear_att.clearValue.color, 0)) {
-			radv_fast_clear_color(cmd_buffer, &iview, &clear_att,
-					      clear_att.colorAttachment,
-					      NULL, NULL);
-			return true;
-		}
-	} else {
-		if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout,
-					      in_render_loop,range->aspectMask,
-					      &clear_rect, clear_att.clearValue.depthStencil,
-					      0)) {
-			radv_fast_clear_depth(cmd_buffer, &iview, &clear_att,
-			                      NULL, NULL);
-			return true;
-		}
-	}
-
-	return false;
+   struct radv_image_view iview;
+
+   radv_image_view_init(&iview, cmd_buffer->device,
+                        &(VkImageViewCreateInfo){
+                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                           .image = radv_image_to_handle(image),
+                           .viewType = radv_meta_get_view_type(image),
+                           .format = image->vk_format,
+                           .subresourceRange =
+                              {
+                                 .aspectMask = range->aspectMask,
+                                 .baseMipLevel = range->baseMipLevel,
+                                 .levelCount = range->levelCount,
+                                 .baseArrayLayer = range->baseArrayLayer,
+                                 .layerCount = range->layerCount,
+                              },
+                        },
+                        NULL);
+
+   VkClearRect clear_rect = {
+      .rect =
+         {
+            .offset = {0, 0},
+            .extent =
+               {
+                  radv_minify(image->info.width, range->baseMipLevel),
+                  radv_minify(image->info.height, range->baseMipLevel),
+               },
+         },
+      .baseArrayLayer = range->baseArrayLayer,
+      .layerCount = range->layerCount,
+   };
+
+   VkClearAttachment clear_att = {
+      .aspectMask = range->aspectMask,
+      .colorAttachment = 0,
+      .clearValue = *clear_val,
+   };
+
+   if (vk_format_is_color(format)) {
+      if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, in_render_loop, &clear_rect,
+                                    clear_att.clearValue.color, 0)) {
+         radv_fast_clear_color(cmd_buffer, &iview, &clear_att, clear_att.colorAttachment, NULL,
+                               NULL);
+         return true;
+      }
+   } else {
+      if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, in_render_loop,
+                                    range->aspectMask, &clear_rect,
+                                    clear_att.clearValue.depthStencil, 0)) {
+         radv_fast_clear_depth(cmd_buffer, &iview, &clear_att, NULL, NULL);
+         return true;
+      }
+   }
+
+   return false;
 }
 
 static void
-radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
-		     struct radv_image *image,
-		     VkImageLayout image_layout,
-		     const VkClearValue *clear_value,
-		     uint32_t range_count,
-		     const VkImageSubresourceRange *ranges,
-		     bool cs)
+radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                     VkImageLayout image_layout, const VkClearValue *clear_value,
+                     uint32_t range_count, const VkImageSubresourceRange *ranges, bool cs)
 {
-	VkFormat format = image->vk_format;
-	VkClearValue internal_clear_value;
-
-        if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
-                internal_clear_value.color = clear_value->color;
-        else
-                internal_clear_value.depthStencil = clear_value->depthStencil;
-
-	if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
-		uint32_t value;
-		format = VK_FORMAT_R32_UINT;
-		value = float3_to_rgb9e5(clear_value->color.float32);
-		internal_clear_value.color.uint32[0] = value;
-	}
-
-	if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
-		uint8_t r, g;
-		format = VK_FORMAT_R8_UINT;
-		r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
-		g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
-		internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
-	}
-
-	for (uint32_t r = 0; r < range_count; r++) {
-		const VkImageSubresourceRange *range = &ranges[r];
-
-		/* Try to perform a fast clear first, otherwise fallback to
-		 * the legacy path.
-		 */
-		if (!cs &&
-		    radv_fast_clear_range(cmd_buffer, image, format,
-					  image_layout, false, range,
-					  &internal_clear_value)) {
-			continue;
-		}
-
-		for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
-			const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
-				radv_minify(image->info.depth, range->baseMipLevel + l) :
-				radv_get_layerCount(image, range);
-			for (uint32_t s = 0; s < layer_count; ++s) {
-
-				if (cs) {
-					struct radv_meta_blit2d_surf surf;
-					surf.format = format;
-					surf.image = image;
-					surf.level = range->baseMipLevel + l;
-					surf.layer = range->baseArrayLayer + s;
-					surf.aspect_mask = range->aspectMask;
-					surf.disable_compression = true;
-					radv_meta_clear_image_cs(cmd_buffer, &surf,
-								 &internal_clear_value.color);
-				} else {
-					radv_clear_image_layer(cmd_buffer, image, image_layout,
-							       range, format, l, s, &internal_clear_value);
-				}
-			}
-		}
-	}
+   VkFormat format = image->vk_format;
+   VkClearValue internal_clear_value;
+
+   if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
+      internal_clear_value.color = clear_value->color;
+   else
+      internal_clear_value.depthStencil = clear_value->depthStencil;
+
+   if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+      uint32_t value;
+      format = VK_FORMAT_R32_UINT;
+      value = float3_to_rgb9e5(clear_value->color.float32);
+      internal_clear_value.color.uint32[0] = value;
+   }
+
+   if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
+      uint8_t r, g;
+      format = VK_FORMAT_R8_UINT;
+      r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
+      g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
+      internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
+   }
+
+   for (uint32_t r = 0; r < range_count; r++) {
+      const VkImageSubresourceRange *range = &ranges[r];
+
+      /* Try to perform a fast clear first, otherwise fallback to
+       * the legacy path.
+       */
+      if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, false, range,
+                                       &internal_clear_value)) {
+         continue;
+      }
+
+      for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
+         const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D
+                                         ? radv_minify(image->info.depth, range->baseMipLevel + l)
+                                         : radv_get_layerCount(image, range);
+         for (uint32_t s = 0; s < layer_count; ++s) {
+
+            if (cs) {
+               struct radv_meta_blit2d_surf surf;
+               surf.format = format;
+               surf.image = image;
+               surf.level = range->baseMipLevel + l;
+               surf.layer = range->baseArrayLayer + s;
+               surf.aspect_mask = range->aspectMask;
+               surf.disable_compression = true;
+               radv_meta_clear_image_cs(cmd_buffer, &surf, &internal_clear_value.color);
+            } else {
+               radv_clear_image_layer(cmd_buffer, image, image_layout, range, format, l, s,
+                                      &internal_clear_value);
+            }
+         }
+      }
+   }
 }
 
-void radv_CmdClearColorImage(
-	VkCommandBuffer                             commandBuffer,
-	VkImage                                     image_h,
-	VkImageLayout                               imageLayout,
-	const VkClearColorValue*                    pColor,
-	uint32_t                                    rangeCount,
-	const VkImageSubresourceRange*              pRanges)
+void
+radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
+                        const VkClearColorValue *pColor, uint32_t rangeCount,
+                        const VkImageSubresourceRange *pRanges)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_image, image, image_h);
-	struct radv_meta_saved_state saved_state;
-	bool cs;
-
-	cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
-	     !radv_image_is_renderable(cmd_buffer->device, image);
-
-	if (cs) {
-		radv_meta_save(&saved_state, cmd_buffer,
-			       RADV_META_SAVE_COMPUTE_PIPELINE |
-			       RADV_META_SAVE_CONSTANTS |
-			       RADV_META_SAVE_DESCRIPTORS);
-	} else {
-		radv_meta_save(&saved_state, cmd_buffer,
-			       RADV_META_SAVE_GRAPHICS_PIPELINE |
-			       RADV_META_SAVE_CONSTANTS);
-	}
-
-	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
-			     (const VkClearValue *) pColor,
-			     rangeCount, pRanges, cs);
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_image, image, image_h);
+   struct radv_meta_saved_state saved_state;
+   bool cs;
+
+   cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
+        !radv_image_is_renderable(cmd_buffer->device, image);
+
+   if (cs) {
+      radv_meta_save(
+         &saved_state, cmd_buffer,
+         RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+   } else {
+      radv_meta_save(&saved_state, cmd_buffer,
+                     RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
+   }
+
+   radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount,
+                        pRanges, cs);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
-void radv_CmdClearDepthStencilImage(
-	VkCommandBuffer                             commandBuffer,
-	VkImage                                     image_h,
-	VkImageLayout                               imageLayout,
-	const VkClearDepthStencilValue*             pDepthStencil,
-	uint32_t                                    rangeCount,
-	const VkImageSubresourceRange*              pRanges)
+void
+radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,
+                               VkImageLayout imageLayout,
+                               const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
+                               const VkImageSubresourceRange *pRanges)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_image, image, image_h);
-	struct radv_meta_saved_state saved_state;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_image, image, image_h);
+   struct radv_meta_saved_state saved_state;
 
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS);
+   radv_meta_save(&saved_state, cmd_buffer,
+                  RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
 
-	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
-			     (const VkClearValue *) pDepthStencil,
-			     rangeCount, pRanges, false);
+   radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil,
+                        rangeCount, pRanges, false);
 
-	radv_meta_restore(&saved_state, cmd_buffer);
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
-void radv_CmdClearAttachments(
-	VkCommandBuffer                             commandBuffer,
-	uint32_t                                    attachmentCount,
-	const VkClearAttachment*                    pAttachments,
-	uint32_t                                    rectCount,
-	const VkClearRect*                          pRects)
+void
+radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
+                         const VkClearAttachment *pAttachments, uint32_t rectCount,
+                         const VkClearRect *pRects)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	struct radv_meta_saved_state saved_state;
-	enum radv_cmd_flush_bits pre_flush = 0;
-	enum radv_cmd_flush_bits post_flush = 0;
-
-	if (!cmd_buffer->state.subpass)
-		return;
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS);
-
-	/* FINISHME: We can do better than this dumb loop. It thrashes too much
-	 * state.
-	 */
-	for (uint32_t a = 0; a < attachmentCount; ++a) {
-		for (uint32_t r = 0; r < rectCount; ++r) {
-			emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
-			           cmd_buffer->state.subpass->view_mask, false);
-		}
-	}
-
-	radv_meta_restore(&saved_state, cmd_buffer);
-	cmd_buffer->state.flush_bits |= post_flush;
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_meta_saved_state saved_state;
+   enum radv_cmd_flush_bits pre_flush = 0;
+   enum radv_cmd_flush_bits post_flush = 0;
+
+   if (!cmd_buffer->state.subpass)
+      return;
+
+   radv_meta_save(&saved_state, cmd_buffer,
+                  RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
+
+   /* FINISHME: We can do better than this dumb loop. It thrashes too much
+    * state.
+    */
+   for (uint32_t a = 0; a < attachmentCount; ++a) {
+      for (uint32_t r = 0; r < rectCount; ++r) {
+         emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
+                    cmd_buffer->state.subpass->view_mask, false);
+      }
+   }
+
+   radv_meta_restore(&saved_state, cmd_buffer);
+   cmd_buffer->state.flush_bits |= post_flush;
 }
diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c
index 44e61ca4b0a..224419de139 100644
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -27,8 +27,8 @@
 static VkExtent3D
 meta_image_block_size(const struct radv_image *image)
 {
-	const struct util_format_description *desc = vk_format_description(image->vk_format);
-	return (VkExtent3D) { desc->block.width, desc->block.height, 1 };
+   const struct util_format_description *desc = vk_format_description(image->vk_format);
+   return (VkExtent3D){desc->block.width, desc->block.height, 1};
 }
 
 /* Returns the user-provided VkBufferImageCopy::imageExtent in units of
@@ -36,16 +36,16 @@ meta_image_block_size(const struct radv_image *image)
  * if Image is uncompressed or compressed, respectively.
  */
 static struct VkExtent3D
-meta_region_extent_el(const struct radv_image *image,
-                      const VkImageType imageType,
+meta_region_extent_el(const struct radv_image *image, const VkImageType imageType,
                       const struct VkExtent3D *extent)
 {
-	const VkExtent3D block = meta_image_block_size(image);
-	return radv_sanitize_image_extent(imageType, (VkExtent3D) {
-			.width  = DIV_ROUND_UP(extent->width , block.width),
-				.height = DIV_ROUND_UP(extent->height, block.height),
-				.depth  = DIV_ROUND_UP(extent->depth , block.depth),
-				});
+   const VkExtent3D block = meta_image_block_size(image);
+   return radv_sanitize_image_extent(imageType,
+                                     (VkExtent3D){
+                                        .width = DIV_ROUND_UP(extent->width, block.width),
+                                        .height = DIV_ROUND_UP(extent->height, block.height),
+                                        .depth = DIV_ROUND_UP(extent->depth, block.depth),
+                                     });
 }
 
 /* Returns the user-provided VkBufferImageCopy::imageOffset in units of
@@ -53,517 +53,481 @@ meta_region_extent_el(const struct radv_image *image,
  * if Image is uncompressed or compressed, respectively.
  */
 static struct VkOffset3D
-meta_region_offset_el(const struct radv_image *image,
-                      const struct VkOffset3D *offset)
+meta_region_offset_el(const struct radv_image *image, const struct VkOffset3D *offset)
 {
-	const VkExtent3D block = meta_image_block_size(image);
-	return radv_sanitize_image_offset(image->type, (VkOffset3D) {
-			.x = offset->x / block.width,
-				.y = offset->y / block.height,
-				.z = offset->z / block.depth,
-				});
+   const VkExtent3D block = meta_image_block_size(image);
+   return radv_sanitize_image_offset(image->type, (VkOffset3D){
+                                                     .x = offset->x / block.width,
+                                                     .y = offset->y / block.height,
+                                                     .z = offset->z / block.depth,
+                                                  });
 }
 
 static VkFormat
 vk_format_for_size(int bs)
 {
-	switch (bs) {
-	case 1: return VK_FORMAT_R8_UINT;
-	case 2: return VK_FORMAT_R8G8_UINT;
-	case 4: return VK_FORMAT_R8G8B8A8_UINT;
-	case 8: return VK_FORMAT_R16G16B16A16_UINT;
-	case 12: return VK_FORMAT_R32G32B32_UINT;
-	case 16: return VK_FORMAT_R32G32B32A32_UINT;
-	default:
-		unreachable("Invalid format block size");
-	}
+   switch (bs) {
+   case 1:
+      return VK_FORMAT_R8_UINT;
+   case 2:
+      return VK_FORMAT_R8G8_UINT;
+   case 4:
+      return VK_FORMAT_R8G8B8A8_UINT;
+   case 8:
+      return VK_FORMAT_R16G16B16A16_UINT;
+   case 12:
+      return VK_FORMAT_R32G32B32_UINT;
+   case 16:
+      return VK_FORMAT_R32G32B32A32_UINT;
+   default:
+      unreachable("Invalid format block size");
+   }
 }
 
 static struct radv_meta_blit2d_surf
-blit_surf_for_image_level_layer(struct radv_image *image,
-				VkImageLayout layout,
-				const VkImageSubresourceLayers *subres,
-				VkImageAspectFlags aspect_mask)
+blit_surf_for_image_level_layer(struct radv_image *image, VkImageLayout layout,
+                                const VkImageSubresourceLayers *subres,
+                                VkImageAspectFlags aspect_mask)
 {
-	VkFormat format = radv_get_aspect_format(image, aspect_mask);
-
-	if (!radv_dcc_enabled(image, subres->mipLevel) &&
-	    !(radv_image_is_tc_compat_htile(image)))
-		format = vk_format_for_size(vk_format_get_blocksize(format));
-
-	format = vk_format_no_srgb(format);
-
-	return (struct radv_meta_blit2d_surf) {
-		.format = format,
-		.bs = vk_format_get_blocksize(format),
-		.level = subres->mipLevel,
-		.layer = subres->baseArrayLayer,
-		.image = image,
-		.aspect_mask = aspect_mask,
-		.current_layout = layout,
-	};
+   VkFormat format = radv_get_aspect_format(image, aspect_mask);
+
+   if (!radv_dcc_enabled(image, subres->mipLevel) && !(radv_image_is_tc_compat_htile(image)))
+      format = vk_format_for_size(vk_format_get_blocksize(format));
+
+   format = vk_format_no_srgb(format);
+
+   return (struct radv_meta_blit2d_surf){
+      .format = format,
+      .bs = vk_format_get_blocksize(format),
+      .level = subres->mipLevel,
+      .layer = subres->baseArrayLayer,
+      .image = image,
+      .aspect_mask = aspect_mask,
+      .current_layout = layout,
+   };
 }
 
 bool
 radv_image_is_renderable(struct radv_device *device, struct radv_image *image)
 {
-	if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
-	    image->vk_format == VK_FORMAT_R32G32B32_SINT ||
-	    image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)
-		return false;
-
-	if (device->physical_device->rad_info.chip_class >= GFX9 &&
-	    image->type == VK_IMAGE_TYPE_3D &&
-	    vk_format_get_blocksizebits(image->vk_format) == 128 &&
-	    vk_format_is_compressed(image->vk_format))
-		return false;
-	return true;
+   if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+       image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+       image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)
+      return false;
+
+   if (device->physical_device->rad_info.chip_class >= GFX9 && image->type == VK_IMAGE_TYPE_3D &&
+       vk_format_get_blocksizebits(image->vk_format) == 128 &&
+       vk_format_is_compressed(image->vk_format))
+      return false;
+   return true;
 }
 
 static void
-copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
-                     struct radv_buffer* buffer,
-		     struct radv_image* image,
-		     VkImageLayout layout,
-		     const VkBufferImageCopy2KHR* region)
+copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
+                     struct radv_image *image, VkImageLayout layout,
+                     const VkBufferImageCopy2KHR *region)
 {
-	struct radv_meta_saved_state saved_state;
-	bool old_predicating;
-	bool cs;
-
-	/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
-	 * VK_SAMPLE_COUNT_1_BIT."
-	 */
-	assert(image->info.samples == 1);
-
-	cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
-	     !radv_image_is_renderable(cmd_buffer->device, image);
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       (cs ? RADV_META_SAVE_COMPUTE_PIPELINE :
-			RADV_META_SAVE_GRAPHICS_PIPELINE) |
-		       RADV_META_SAVE_CONSTANTS |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	/* VK_EXT_conditional_rendering says that copy commands should not be
-	 * affected by conditional rendering.
-	 */
-	old_predicating = cmd_buffer->state.predicating;
-	cmd_buffer->state.predicating = false;
-
-	/**
-	 * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
-	 *    extent is the size in texels of the source image to copy in width,
-	 *    height and depth. 1D images use only x and width. 2D images use x, y,
-	 *    width and height. 3D images use x, y, z, width, height and depth.
-	 *
-	 *
-	 * Also, convert the offsets and extent from units of texels to units of
-	 * blocks - which is the highest resolution accessible in this command.
-	 */
-	const VkOffset3D img_offset_el =
-		meta_region_offset_el(image, &region->imageOffset);
-	const VkExtent3D bufferExtent = {
-		.width  = region->bufferRowLength ?
-		region->bufferRowLength : region->imageExtent.width,
-		.height = region->bufferImageHeight ?
-		region->bufferImageHeight : region->imageExtent.height,
-	};
-	const VkExtent3D buf_extent_el =
-		meta_region_extent_el(image, image->type, &bufferExtent);
-
-	/* Start creating blit rect */
-	const VkExtent3D img_extent_el =
-		meta_region_extent_el(image, image->type, &region->imageExtent);
-	struct radv_meta_blit2d_rect rect = {
-		.width = img_extent_el.width,
-		.height =  img_extent_el.height,
-	};
-
-	/* Create blit surfaces */
-	struct radv_meta_blit2d_surf img_bsurf =
-		blit_surf_for_image_level_layer(image,
-						layout,
-						&region->imageSubresource,
-						region->imageSubresource.aspectMask);
-
-	if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) {
-		uint32_t queue_mask = radv_image_queue_family_mask(image,
-								   cmd_buffer->queue_family_index,
-								   cmd_buffer->queue_family_index);
-		bool compressed = radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
-		if (compressed) {
-			radv_decompress_dcc(cmd_buffer, image, &(VkImageSubresourceRange) {
-							.aspectMask = region->imageSubresource.aspectMask,
-							.baseMipLevel = region->imageSubresource.mipLevel,
-							.levelCount = 1,
-							.baseArrayLayer = region->imageSubresource.baseArrayLayer,
-							.layerCount = region->imageSubresource.layerCount,
-						});
-			img_bsurf.disable_compression = true;
-		}
-		img_bsurf.format = vk_format_for_size(vk_format_get_blocksize(img_bsurf.format));
-	}
-
-	struct radv_meta_blit2d_buffer buf_bsurf = {
-		.bs = img_bsurf.bs,
-		.format = img_bsurf.format,
-		.buffer = buffer,
-		.offset = region->bufferOffset,
-		.pitch = buf_extent_el.width,
-	};
-
-	if (image->type == VK_IMAGE_TYPE_3D)
-		img_bsurf.layer = img_offset_el.z;
-	/* Loop through each 3D or array slice */
-	unsigned num_slices_3d = img_extent_el.depth;
-	unsigned num_slices_array = region->imageSubresource.layerCount;
-	unsigned slice_3d = 0;
-	unsigned slice_array = 0;
-	while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
-
-		rect.dst_x = img_offset_el.x;
-		rect.dst_y = img_offset_el.y;
-
-
-		/* Perform Blit */
-		if (cs) {
-			radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
-		} else {
-			radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
-		}
-
-		/* Once we've done the blit, all of the actual information about
-		 * the image is embedded in the command buffer so we can just
-		 * increment the offset directly in the image effectively
-		 * re-binding it to different backing memory.
-		 */
-		buf_bsurf.offset += buf_extent_el.width *
-				    buf_extent_el.height * buf_bsurf.bs;
-		img_bsurf.layer++;
-		if (image->type == VK_IMAGE_TYPE_3D)
-			slice_3d++;
-		else
-			slice_array++;
-	}
-
-	/* Restore conditional rendering. */
-	cmd_buffer->state.predicating = old_predicating;
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_meta_saved_state saved_state;
+   bool old_predicating;
+   bool cs;
+
+   /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
+    * VK_SAMPLE_COUNT_1_BIT."
+    */
+   assert(image->info.samples == 1);
+
+   cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
+        !radv_image_is_renderable(cmd_buffer->device, image);
+
+   radv_meta_save(&saved_state, cmd_buffer,
+                  (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) |
+                     RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   /* VK_EXT_conditional_rendering says that copy commands should not be
+    * affected by conditional rendering.
+    */
+   old_predicating = cmd_buffer->state.predicating;
+   cmd_buffer->state.predicating = false;
+
+   /**
+    * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
+    *    extent is the size in texels of the source image to copy in width,
+    *    height and depth. 1D images use only x and width. 2D images use x, y,
+    *    width and height. 3D images use x, y, z, width, height and depth.
+    *
+    *
+    * Also, convert the offsets and extent from units of texels to units of
+    * blocks - which is the highest resolution accessible in this command.
+    */
+   const VkOffset3D img_offset_el = meta_region_offset_el(image, &region->imageOffset);
+   const VkExtent3D bufferExtent = {
+      .width = region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width,
+      .height = region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height,
+   };
+   const VkExtent3D buf_extent_el = meta_region_extent_el(image, image->type, &bufferExtent);
+
+   /* Start creating blit rect */
+   const VkExtent3D img_extent_el = meta_region_extent_el(image, image->type, &region->imageExtent);
+   struct radv_meta_blit2d_rect rect = {
+      .width = img_extent_el.width,
+      .height = img_extent_el.height,
+   };
+
+   /* Create blit surfaces */
+   struct radv_meta_blit2d_surf img_bsurf = blit_surf_for_image_level_layer(
+      image, layout, &region->imageSubresource, region->imageSubresource.aspectMask);
+
+   if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) {
+      uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+                                                         cmd_buffer->queue_family_index);
+      bool compressed =
+         radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
+      if (compressed) {
+         radv_decompress_dcc(cmd_buffer, image,
+                             &(VkImageSubresourceRange){
+                                .aspectMask = region->imageSubresource.aspectMask,
+                                .baseMipLevel = region->imageSubresource.mipLevel,
+                                .levelCount = 1,
+                                .baseArrayLayer = region->imageSubresource.baseArrayLayer,
+                                .layerCount = region->imageSubresource.layerCount,
+                             });
+         img_bsurf.disable_compression = true;
+      }
+      img_bsurf.format = vk_format_for_size(vk_format_get_blocksize(img_bsurf.format));
+   }
+
+   struct radv_meta_blit2d_buffer buf_bsurf = {
+      .bs = img_bsurf.bs,
+      .format = img_bsurf.format,
+      .buffer = buffer,
+      .offset = region->bufferOffset,
+      .pitch = buf_extent_el.width,
+   };
+
+   if (image->type == VK_IMAGE_TYPE_3D)
+      img_bsurf.layer = img_offset_el.z;
+   /* Loop through each 3D or array slice */
+   unsigned num_slices_3d = img_extent_el.depth;
+   unsigned num_slices_array = region->imageSubresource.layerCount;
+   unsigned slice_3d = 0;
+   unsigned slice_array = 0;
+   while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
+
+      rect.dst_x = img_offset_el.x;
+      rect.dst_y = img_offset_el.y;
+
+      /* Perform Blit */
+      if (cs) {
+         radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
+      } else {
+         radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
+      }
+
+      /* Once we've done the blit, all of the actual information about
+       * the image is embedded in the command buffer so we can just
+       * increment the offset directly in the image effectively
+       * re-binding it to different backing memory.
+       */
+      buf_bsurf.offset += buf_extent_el.width * buf_extent_el.height * buf_bsurf.bs;
+      img_bsurf.layer++;
+      if (image->type == VK_IMAGE_TYPE_3D)
+         slice_3d++;
+      else
+         slice_array++;
+   }
+
+   /* Restore conditional rendering. */
+   cmd_buffer->state.predicating = old_predicating;
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
-void radv_CmdCopyBufferToImage2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkCopyBufferToImageInfo2KHR*          pCopyBufferToImageInfo)
+void
+radv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
+                              const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
-	RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
-
-	for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
-		copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
-				     pCopyBufferToImageInfo->dstImageLayout,
-				     &pCopyBufferToImageInfo->pRegions[r]);
-	}
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
+   RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
+
+   for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
+      copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
+                           pCopyBufferToImageInfo->dstImageLayout,
+                           &pCopyBufferToImageInfo->pRegions[r]);
+   }
 }
 
 static void
-copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
-		     struct radv_buffer *buffer,
-		     struct radv_image *image,
-		     VkImageLayout layout,
-		     const VkBufferImageCopy2KHR *region)
+copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
+                     struct radv_image *image, VkImageLayout layout,
+                     const VkBufferImageCopy2KHR *region)
 {
-	struct radv_meta_saved_state saved_state;
-	bool old_predicating;
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_COMPUTE_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	/* VK_EXT_conditional_rendering says that copy commands should not be
-	 * affected by conditional rendering.
-	 */
-	old_predicating = cmd_buffer->state.predicating;
-	cmd_buffer->state.predicating = false;
-
-	/**
-	 * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
-	 *    extent is the size in texels of the source image to copy in width,
-	 *    height and depth. 1D images use only x and width. 2D images use x, y,
-	 *    width and height. 3D images use x, y, z, width, height and depth.
-	 *
-	 *
-	 * Also, convert the offsets and extent from units of texels to units of
-	 * blocks - which is the highest resolution accessible in this command.
-	 */
-	const VkOffset3D img_offset_el =
-		meta_region_offset_el(image, &region->imageOffset);
-	const VkExtent3D bufferExtent = {
-		.width  = region->bufferRowLength ?
-		region->bufferRowLength : region->imageExtent.width,
-		.height = region->bufferImageHeight ?
-		region->bufferImageHeight : region->imageExtent.height,
-	};
-	const VkExtent3D buf_extent_el =
-		meta_region_extent_el(image, image->type, &bufferExtent);
-
-	/* Start creating blit rect */
-	const VkExtent3D img_extent_el =
-		meta_region_extent_el(image, image->type, &region->imageExtent);
-	struct radv_meta_blit2d_rect rect = {
-		.width = img_extent_el.width,
-		.height =  img_extent_el.height,
-	};
-
-	/* Create blit surfaces */
-	struct radv_meta_blit2d_surf img_info =
-		blit_surf_for_image_level_layer(image,
-						layout,
-						&region->imageSubresource,
-						region->imageSubresource.aspectMask);
-
-	if (!radv_is_buffer_format_supported(img_info.format, NULL)) {
-		uint32_t queue_mask = radv_image_queue_family_mask(image,
-								   cmd_buffer->queue_family_index,
-								   cmd_buffer->queue_family_index);
-		bool compressed = radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
-		if (compressed) {
-			radv_decompress_dcc(cmd_buffer, image, &(VkImageSubresourceRange) {
-							.aspectMask = region->imageSubresource.aspectMask,
-							.baseMipLevel = region->imageSubresource.mipLevel,
-							.levelCount = 1,
-							.baseArrayLayer = region->imageSubresource.baseArrayLayer,
-							.layerCount = region->imageSubresource.layerCount,
-						});
-			img_info.disable_compression = true;
-		}
-		img_info.format = vk_format_for_size(vk_format_get_blocksize(img_info.format));
-	}
-
-	struct radv_meta_blit2d_buffer buf_info = {
-		.bs = img_info.bs,
-		.format = img_info.format,
-		.buffer = buffer,
-		.offset = region->bufferOffset,
-		.pitch = buf_extent_el.width,
-	};
-
-	if (image->type == VK_IMAGE_TYPE_3D)
-		img_info.layer = img_offset_el.z;
-	/* Loop through each 3D or array slice */
-	unsigned num_slices_3d = img_extent_el.depth;
-	unsigned num_slices_array = region->imageSubresource.layerCount;
-	unsigned slice_3d = 0;
-	unsigned slice_array = 0;
-	while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
-
-		rect.src_x = img_offset_el.x;
-		rect.src_y = img_offset_el.y;
-
-
-		/* Perform Blit */
-		radv_meta_image_to_buffer(cmd_buffer, &img_info, &buf_info, 1, &rect);
-
-		buf_info.offset += buf_extent_el.width *
-				    buf_extent_el.height * buf_info.bs;
-		img_info.layer++;
-		if (image->type == VK_IMAGE_TYPE_3D)
-			slice_3d++;
-		else
-			slice_array++;
-	}
-
-	/* Restore conditional rendering. */
-	cmd_buffer->state.predicating = old_predicating;
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_meta_saved_state saved_state;
+   bool old_predicating;
+
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   /* VK_EXT_conditional_rendering says that copy commands should not be
+    * affected by conditional rendering.
+    */
+   old_predicating = cmd_buffer->state.predicating;
+   cmd_buffer->state.predicating = false;
+
+   /**
+    * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
+    *    extent is the size in texels of the source image to copy in width,
+    *    height and depth. 1D images use only x and width. 2D images use x, y,
+    *    width and height. 3D images use x, y, z, width, height and depth.
+    *
+    *
+    * Also, convert the offsets and extent from units of texels to units of
+    * blocks - which is the highest resolution accessible in this command.
+    */
+   const VkOffset3D img_offset_el = meta_region_offset_el(image, &region->imageOffset);
+   const VkExtent3D bufferExtent = {
+      .width = region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width,
+      .height = region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height,
+   };
+   const VkExtent3D buf_extent_el = meta_region_extent_el(image, image->type, &bufferExtent);
+
+   /* Start creating blit rect */
+   const VkExtent3D img_extent_el = meta_region_extent_el(image, image->type, &region->imageExtent);
+   struct radv_meta_blit2d_rect rect = {
+      .width = img_extent_el.width,
+      .height = img_extent_el.height,
+   };
+
+   /* Create blit surfaces */
+   struct radv_meta_blit2d_surf img_info = blit_surf_for_image_level_layer(
+      image, layout, &region->imageSubresource, region->imageSubresource.aspectMask);
+
+   if (!radv_is_buffer_format_supported(img_info.format, NULL)) {
+      uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+                                                         cmd_buffer->queue_family_index);
+      bool compressed =
+         radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
+      if (compressed) {
+         radv_decompress_dcc(cmd_buffer, image,
+                             &(VkImageSubresourceRange){
+                                .aspectMask = region->imageSubresource.aspectMask,
+                                .baseMipLevel = region->imageSubresource.mipLevel,
+                                .levelCount = 1,
+                                .baseArrayLayer = region->imageSubresource.baseArrayLayer,
+                                .layerCount = region->imageSubresource.layerCount,
+                             });
+         img_info.disable_compression = true;
+      }
+      img_info.format = vk_format_for_size(vk_format_get_blocksize(img_info.format));
+   }
+
+   struct radv_meta_blit2d_buffer buf_info = {
+      .bs = img_info.bs,
+      .format = img_info.format,
+      .buffer = buffer,
+      .offset = region->bufferOffset,
+      .pitch = buf_extent_el.width,
+   };
+
+   if (image->type == VK_IMAGE_TYPE_3D)
+      img_info.layer = img_offset_el.z;
+   /* Loop through each 3D or array slice */
+   unsigned num_slices_3d = img_extent_el.depth;
+   unsigned num_slices_array = region->imageSubresource.layerCount;
+   unsigned slice_3d = 0;
+   unsigned slice_array = 0;
+   while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
+
+      rect.src_x = img_offset_el.x;
+      rect.src_y = img_offset_el.y;
+
+      /* Perform Blit */
+      radv_meta_image_to_buffer(cmd_buffer, &img_info, &buf_info, 1, &rect);
+
+      buf_info.offset += buf_extent_el.width * buf_extent_el.height * buf_info.bs;
+      img_info.layer++;
+      if (image->type == VK_IMAGE_TYPE_3D)
+         slice_3d++;
+      else
+         slice_array++;
+   }
+
+   /* Restore conditional rendering. */
+   cmd_buffer->state.predicating = old_predicating;
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
-void radv_CmdCopyImageToBuffer2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkCopyImageToBufferInfo2KHR*          pCopyImageToBufferInfo)
+void
+radv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
+                              const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage);
-	RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
-
-	for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
-		copy_image_to_buffer(cmd_buffer, dst_buffer, src_image,
-				     pCopyImageToBufferInfo->srcImageLayout,
-				     &pCopyImageToBufferInfo->pRegions[r]);
-	}
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage);
+   RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
+
+   for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
+      copy_image_to_buffer(cmd_buffer, dst_buffer, src_image,
+                           pCopyImageToBufferInfo->srcImageLayout,
+                           &pCopyImageToBufferInfo->pRegions[r]);
+   }
 }
 
 static void
-copy_image(struct radv_cmd_buffer *cmd_buffer,
-	   struct radv_image *src_image,
-	   VkImageLayout src_image_layout,
-	   struct radv_image *dst_image,
-	   VkImageLayout dst_image_layout,
-	   const VkImageCopy2KHR *region)
+copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+           VkImageLayout src_image_layout, struct radv_image *dst_image,
+           VkImageLayout dst_image_layout, const VkImageCopy2KHR *region)
 {
-	struct radv_meta_saved_state saved_state;
-	bool old_predicating;
-	bool cs;
-
-	/* From the Vulkan 1.0 spec:
-	 *
-	 *    vkCmdCopyImage can be used to copy image data between multisample
-	 *    images, but both images must have the same number of samples.
-	 */
-	assert(src_image->info.samples == dst_image->info.samples);
-
-	cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
-	     !radv_image_is_renderable(cmd_buffer->device, dst_image);
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       (cs ? RADV_META_SAVE_COMPUTE_PIPELINE :
-			RADV_META_SAVE_GRAPHICS_PIPELINE) |
-		       RADV_META_SAVE_CONSTANTS |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	/* VK_EXT_conditional_rendering says that copy commands should not be
-	 * affected by conditional rendering.
-	 */
-	old_predicating = cmd_buffer->state.predicating;
-	cmd_buffer->state.predicating = false;
-
-	VkImageAspectFlags src_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT, VK_IMAGE_ASPECT_PLANE_2_BIT};
-	VkImageAspectFlags dst_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT, VK_IMAGE_ASPECT_PLANE_2_BIT};
-	unsigned aspect_count = region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT ? src_image->plane_count : 1;
-	if (region->srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
-		src_aspects[0] = region->srcSubresource.aspectMask;
-	if (region->dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
-		dst_aspects[0] = region->dstSubresource.aspectMask;
-
-	for (unsigned a = 0; a < aspect_count; ++a) {
-		/* Create blit surfaces */
-		struct radv_meta_blit2d_surf b_src =
-			blit_surf_for_image_level_layer(src_image,
-							src_image_layout,
-							&region->srcSubresource,
-							src_aspects[a]);
-
-		struct radv_meta_blit2d_surf b_dst =
-			blit_surf_for_image_level_layer(dst_image,
-							dst_image_layout,
-							&region->dstSubresource,
-							dst_aspects[a]);
-
-		uint32_t dst_queue_mask = radv_image_queue_family_mask(dst_image,
-								       cmd_buffer->queue_family_index,
-								       cmd_buffer->queue_family_index);
-		bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image, dst_image_layout, false, dst_queue_mask);
-		uint32_t src_queue_mask = radv_image_queue_family_mask(src_image,
-								       cmd_buffer->queue_family_index,
-								       cmd_buffer->queue_family_index);
-		bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image, src_image_layout, false, src_queue_mask);
-
-		if (!src_compressed || radv_dcc_formats_compatible(b_src.format, b_dst.format)) {
-			b_src.format = b_dst.format;
-		} else if (!dst_compressed) {
-			b_dst.format = b_src.format;
-		} else {
-			radv_decompress_dcc(cmd_buffer, dst_image, &(VkImageSubresourceRange) {
-						.aspectMask = dst_aspects[a],
-						.baseMipLevel = region->dstSubresource.mipLevel,
-						.levelCount = 1,
-						.baseArrayLayer = region->dstSubresource.baseArrayLayer,
-						.layerCount = region->dstSubresource.layerCount,
-				});
-			b_dst.format = b_src.format;
-			b_dst.disable_compression = true;
-		}
-
-
-		/**
-		 * From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images
-		 *    imageExtent is the size in texels of the image to copy in width, height
-		 *    and depth. 1D images use only x and width. 2D images use x, y, width
-		 *    and height. 3D images use x, y, z, width, height and depth.
-		 *
-		 * Also, convert the offsets and extent from units of texels to units of
-		 * blocks - which is the highest resolution accessible in this command.
-		 */
-		const VkOffset3D dst_offset_el =
-			meta_region_offset_el(dst_image, &region->dstOffset);
-		const VkOffset3D src_offset_el =
-			meta_region_offset_el(src_image, &region->srcOffset);
-
-		/*
-		 * From Vulkan 1.0.68, "Copying Data Between Images":
-		 *    "When copying between compressed and uncompressed formats
-		 *     the extent members represent the texel dimensions of the
-		 *     source image and not the destination."
-		 * However, we must use the destination image type to avoid
-		 * clamping depth when copying multiple layers of a 2D image to
-		 * a 3D image.
-		 */
-		const VkExtent3D img_extent_el =
-			meta_region_extent_el(src_image, dst_image->type, &region->extent);
-
-		/* Start creating blit rect */
-		struct radv_meta_blit2d_rect rect = {
-			.width = img_extent_el.width,
-			.height = img_extent_el.height,
-		};
-
-		if (src_image->type == VK_IMAGE_TYPE_3D)
-			b_src.layer = src_offset_el.z;
-
-		if (dst_image->type == VK_IMAGE_TYPE_3D)
-			b_dst.layer = dst_offset_el.z;
-
-		/* Loop through each 3D or array slice */
-		unsigned num_slices_3d = img_extent_el.depth;
-		unsigned num_slices_array = region->dstSubresource.layerCount;
-		unsigned slice_3d = 0;
-		unsigned slice_array = 0;
-		while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
-
-			/* Finish creating blit rect */
-			rect.dst_x = dst_offset_el.x;
-			rect.dst_y = dst_offset_el.y;
-			rect.src_x = src_offset_el.x;
-			rect.src_y = src_offset_el.y;
-
-			/* Perform Blit */
-			if (cs) {
-				radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect);
-			} else {
-				radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
-			}
-
-			b_src.layer++;
-			b_dst.layer++;
-			if (dst_image->type == VK_IMAGE_TYPE_3D)
-				slice_3d++;
-			else
-				slice_array++;
-		}
-	}
-
-	/* Restore conditional rendering. */
-	cmd_buffer->state.predicating = old_predicating;
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_meta_saved_state saved_state;
+   bool old_predicating;
+   bool cs;
+
+   /* From the Vulkan 1.0 spec:
+    *
+    *    vkCmdCopyImage can be used to copy image data between multisample
+    *    images, but both images must have the same number of samples.
+    */
+   assert(src_image->info.samples == dst_image->info.samples);
+
+   cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
+        !radv_image_is_renderable(cmd_buffer->device, dst_image);
+
+   radv_meta_save(&saved_state, cmd_buffer,
+                  (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) |
+                     RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   /* VK_EXT_conditional_rendering says that copy commands should not be
+    * affected by conditional rendering.
+    */
+   old_predicating = cmd_buffer->state.predicating;
+   cmd_buffer->state.predicating = false;
+
+   VkImageAspectFlags src_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT,
+                                        VK_IMAGE_ASPECT_PLANE_2_BIT};
+   VkImageAspectFlags dst_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT,
+                                        VK_IMAGE_ASPECT_PLANE_2_BIT};
+   unsigned aspect_count =
+      region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT ? src_image->plane_count : 1;
+   if (region->srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
+      src_aspects[0] = region->srcSubresource.aspectMask;
+   if (region->dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
+      dst_aspects[0] = region->dstSubresource.aspectMask;
+
+   for (unsigned a = 0; a < aspect_count; ++a) {
+      /* Create blit surfaces */
+      struct radv_meta_blit2d_surf b_src = blit_surf_for_image_level_layer(
+         src_image, src_image_layout, &region->srcSubresource, src_aspects[a]);
+
+      struct radv_meta_blit2d_surf b_dst = blit_surf_for_image_level_layer(
+         dst_image, dst_image_layout, &region->dstSubresource, dst_aspects[a]);
+
+      uint32_t dst_queue_mask = radv_image_queue_family_mask(
+         dst_image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index);
+      bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image,
+                                                       dst_image_layout, false, dst_queue_mask);
+      uint32_t src_queue_mask = radv_image_queue_family_mask(
+         src_image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index);
+      bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image,
+                                                       src_image_layout, false, src_queue_mask);
+
+      if (!src_compressed || radv_dcc_formats_compatible(b_src.format, b_dst.format)) {
+         b_src.format = b_dst.format;
+      } else if (!dst_compressed) {
+         b_dst.format = b_src.format;
+      } else {
+         radv_decompress_dcc(cmd_buffer, dst_image,
+                             &(VkImageSubresourceRange){
+                                .aspectMask = dst_aspects[a],
+                                .baseMipLevel = region->dstSubresource.mipLevel,
+                                .levelCount = 1,
+                                .baseArrayLayer = region->dstSubresource.baseArrayLayer,
+                                .layerCount = region->dstSubresource.layerCount,
+                             });
+         b_dst.format = b_src.format;
+         b_dst.disable_compression = true;
+      }
+
+      /**
+       * From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images
+       *    imageExtent is the size in texels of the image to copy in width, height
+       *    and depth. 1D images use only x and width. 2D images use x, y, width
+       *    and height. 3D images use x, y, z, width, height and depth.
+       *
+       * Also, convert the offsets and extent from units of texels to units of
+       * blocks - which is the highest resolution accessible in this command.
+       */
+      const VkOffset3D dst_offset_el = meta_region_offset_el(dst_image, &region->dstOffset);
+      const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &region->srcOffset);
+
+      /*
+       * From Vulkan 1.0.68, "Copying Data Between Images":
+       *    "When copying between compressed and uncompressed formats
+       *     the extent members represent the texel dimensions of the
+       *     source image and not the destination."
+       * However, we must use the destination image type to avoid
+       * clamping depth when copying multiple layers of a 2D image to
+       * a 3D image.
+       */
+      const VkExtent3D img_extent_el =
+         meta_region_extent_el(src_image, dst_image->type, &region->extent);
+
+      /* Start creating blit rect */
+      struct radv_meta_blit2d_rect rect = {
+         .width = img_extent_el.width,
+         .height = img_extent_el.height,
+      };
+
+      if (src_image->type == VK_IMAGE_TYPE_3D)
+         b_src.layer = src_offset_el.z;
+
+      if (dst_image->type == VK_IMAGE_TYPE_3D)
+         b_dst.layer = dst_offset_el.z;
+
+      /* Loop through each 3D or array slice */
+      unsigned num_slices_3d = img_extent_el.depth;
+      unsigned num_slices_array = region->dstSubresource.layerCount;
+      unsigned slice_3d = 0;
+      unsigned slice_array = 0;
+      while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
+
+         /* Finish creating blit rect */
+         rect.dst_x = dst_offset_el.x;
+         rect.dst_y = dst_offset_el.y;
+         rect.src_x = src_offset_el.x;
+         rect.src_y = src_offset_el.y;
+
+         /* Perform Blit */
+         if (cs) {
+            radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect);
+         } else {
+            radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
+         }
+
+         b_src.layer++;
+         b_dst.layer++;
+         if (dst_image->type == VK_IMAGE_TYPE_3D)
+            slice_3d++;
+         else
+            slice_array++;
+      }
+   }
+
+   /* Restore conditional rendering. */
+   cmd_buffer->state.predicating = old_predicating;
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
-void radv_CmdCopyImage2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkCopyImageInfo2KHR*                  pCopyImageInfo)
+void
+radv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, const VkCopyImageInfo2KHR *pCopyImageInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage);
-	RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
-
-	for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
-		copy_image(cmd_buffer,
-			   src_image, pCopyImageInfo->srcImageLayout,
-			   dst_image, pCopyImageInfo->dstImageLayout,
-			   &pCopyImageInfo->pRegions[r]);
-	}
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage);
+   RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
+
+   for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
+      copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image,
+                 pCopyImageInfo->dstImageLayout, &pCopyImageInfo->pRegions[r]);
+   }
 }
diff --git a/src/amd/vulkan/radv_meta_dcc_retile.c b/src/amd/vulkan/radv_meta_dcc_retile.c
index 6153155e081..d2c2466c461 100644
--- a/src/amd/vulkan/radv_meta_dcc_retile.c
+++ b/src/amd/vulkan/radv_meta_dcc_retile.c
@@ -21,295 +21,271 @@
  * IN THE SOFTWARE.
  */
 
-#include "radv_private.h"
 #include "radv_meta.h"
+#include "radv_private.h"
 
 static nir_shader *
 build_dcc_retile_compute_shader(struct radv_device *dev)
 {
-	const struct glsl_type *buf_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
-							     false,
-							     GLSL_TYPE_UINT);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_retile_compute");
-
-	b.shader->info.cs.local_size[0] = 256;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
-
-	nir_variable *indices = nir_variable_create(b.shader, nir_var_uniform,
-						      buf_type, "indices_in");
-	indices->data.descriptor_set = 0;
-	indices->data.binding = 0;
-	nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform,
-						      buf_type, "dcc_in");
-	input_dcc->data.descriptor_set = 0;
-	input_dcc->data.binding = 1;
-	nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform,
-						      buf_type, "dcc_out");
-	output_dcc->data.descriptor_set = 0;
-	output_dcc->data.binding = 2;
-
-	nir_ssa_def *indices_ref = &nir_build_deref_var(&b, indices)->dest.ssa;
-	nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
-	nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						0, 0, 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
-	nir_intrinsic_instr *index_vals = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
-	index_vals->num_components = 2;
-	index_vals->src[0] = nir_src_for_ssa(indices_ref);
-	index_vals->src[1] = nir_src_for_ssa(global_id);
-	index_vals->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
-	index_vals->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
-	nir_ssa_dest_init(&index_vals->instr, &index_vals->dest, 2, 32, "indices");
-	nir_builder_instr_insert(&b, &index_vals->instr);
-
-	nir_ssa_def *src = nir_channels(&b, &index_vals->dest.ssa, 1);
-	nir_ssa_def *dst = nir_channels(&b, &index_vals->dest.ssa, 2);
-
-	nir_intrinsic_instr *dcc_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
-	dcc_val->num_components = 1;
-	dcc_val->src[0] = nir_src_for_ssa(input_dcc_ref);
-	dcc_val->src[1] = nir_src_for_ssa(nir_vec4(&b, src, src, src, src));
-	dcc_val->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
-	dcc_val->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
-	nir_ssa_dest_init(&dcc_val->instr, &dcc_val->dest, 1, 32, "dcc_val");
-	nir_builder_instr_insert(&b, &dcc_val->instr);
-
-	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
-	store->num_components = 1;
-	store->src[0] = nir_src_for_ssa(output_dcc_ref);
-	store->src[1] = nir_src_for_ssa(nir_vec4(&b, dst, dst, dst, dst));
-	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
-	store->src[3] = nir_src_for_ssa(&dcc_val->dest.ssa);
-	store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
-
-	nir_builder_instr_insert(&b, &store->instr);
-	return b.shader;
+   const struct glsl_type *buf_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_UINT);
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_retile_compute");
+
+   b.shader->info.cs.local_size[0] = 256;
+   b.shader->info.cs.local_size[1] = 1;
+   b.shader->info.cs.local_size[2] = 1;
+
+   nir_variable *indices = nir_variable_create(b.shader, nir_var_uniform, buf_type, "indices_in");
+   indices->data.descriptor_set = 0;
+   indices->data.binding = 0;
+   nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in");
+   input_dcc->data.descriptor_set = 0;
+   input_dcc->data.binding = 1;
+   nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out");
+   output_dcc->data.descriptor_set = 0;
+   output_dcc->data.binding = 2;
+
+   nir_ssa_def *indices_ref = &nir_build_deref_var(&b, indices)->dest.ssa;
+   nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
+   nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], 0, 0, 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+   nir_intrinsic_instr *index_vals =
+      nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
+   index_vals->num_components = 2;
+   index_vals->src[0] = nir_src_for_ssa(indices_ref);
+   index_vals->src[1] = nir_src_for_ssa(global_id);
+   index_vals->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+   index_vals->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
+   nir_ssa_dest_init(&index_vals->instr, &index_vals->dest, 2, 32, "indices");
+   nir_builder_instr_insert(&b, &index_vals->instr);
+
+   nir_ssa_def *src = nir_channels(&b, &index_vals->dest.ssa, 1);
+   nir_ssa_def *dst = nir_channels(&b, &index_vals->dest.ssa, 2);
+
+   nir_intrinsic_instr *dcc_val =
+      nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
+   dcc_val->num_components = 1;
+   dcc_val->src[0] = nir_src_for_ssa(input_dcc_ref);
+   dcc_val->src[1] = nir_src_for_ssa(nir_vec4(&b, src, src, src, src));
+   dcc_val->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+   dcc_val->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
+   nir_ssa_dest_init(&dcc_val->instr, &dcc_val->dest, 1, 32, "dcc_val");
+   nir_builder_instr_insert(&b, &dcc_val->instr);
+
+   nir_intrinsic_instr *store =
+      nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
+   store->num_components = 1;
+   store->src[0] = nir_src_for_ssa(output_dcc_ref);
+   store->src[1] = nir_src_for_ssa(nir_vec4(&b, dst, dst, dst, dst));
+   store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+   store->src[3] = nir_src_for_ssa(&dcc_val->dest.ssa);
+   store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
+
+   nir_builder_instr_insert(&b, &store->instr);
+   return b.shader;
 }
 
 void
 radv_device_finish_meta_dcc_retile_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->dcc_retile.pipeline,
-			     &state->alloc);
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->dcc_retile.p_layout,
-				   &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-	                                state->dcc_retile.ds_layout,
-	                                &state->alloc);
-
-	/* Reset for next finish. */
-	memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
+   struct radv_meta_state *state = &device->meta_state;
+
+   radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline, &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->dcc_retile.ds_layout,
+                                   &state->alloc);
+
+   /* Reset for next finish. */
+   memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
 }
 
 VkResult
 radv_device_init_meta_dcc_retile_state(struct radv_device *device)
 {
-	VkResult result = VK_SUCCESS;
-	nir_shader *cs = build_dcc_retile_compute_shader(device);
-
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 3,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 2,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.dcc_retile.ds_layout);
-	if (result != VK_SUCCESS)
-		goto cleanup;
-
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.dcc_retile.ds_layout,
-		.pushConstantRangeCount = 0,
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.dcc_retile.p_layout);
-	if (result != VK_SUCCESS)
-		goto cleanup;
-
-	/* compute shader */
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.dcc_retile.p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &vk_pipeline_info, NULL,
-					     &device->meta_state.dcc_retile.pipeline);
-	if (result != VK_SUCCESS)
-		goto cleanup;
+   VkResult result = VK_SUCCESS;
+   nir_shader *cs = build_dcc_retile_compute_shader(device);
+
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 3,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 2,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.dcc_retile.ds_layout);
+   if (result != VK_SUCCESS)
+      goto cleanup;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.dcc_retile.ds_layout,
+      .pushConstantRangeCount = 0,
+   };
+
+   result =
+      radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                &device->meta_state.alloc, &device->meta_state.dcc_retile.p_layout);
+   if (result != VK_SUCCESS)
+      goto cleanup;
+
+   /* compute shader */
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.dcc_retile.p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &vk_pipeline_info, NULL, &device->meta_state.dcc_retile.pipeline);
+   if (result != VK_SUCCESS)
+      goto cleanup;
 
 cleanup:
-	if (result != VK_SUCCESS)
-		radv_device_finish_meta_dcc_retile_state(device);
-	ralloc_free(cs);
-	return result;
+   if (result != VK_SUCCESS)
+      radv_device_finish_meta_dcc_retile_state(device);
+   ralloc_free(cs);
+   return result;
 }
 
 void
 radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
 {
-	struct radv_meta_saved_state saved_state;
-	struct radv_device *device = cmd_buffer->device;
-	uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface);
-
-	assert(image->type == VK_IMAGE_TYPE_2D);
-	assert(image->info.array_size == 1 && image->info.levels == 1);
-
-	struct radv_cmd_state *state = &cmd_buffer->state;
-
-	state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, image) |
-	                     radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
-
-	/* Compile pipelines if not already done so. */
-	if (!cmd_buffer->device->meta_state.dcc_retile.pipeline) {
-		VkResult ret = radv_device_init_meta_dcc_retile_state(cmd_buffer->device);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return;
-		}
-	}
-
-	radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS |
-	                                         RADV_META_SAVE_COMPUTE_PIPELINE);
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-	                     VK_PIPELINE_BIND_POINT_COMPUTE,
-	                     device->meta_state.dcc_retile.pipeline);
-
-	struct radv_buffer buffer = {
-		.size = image->size,
-		.bo = image->bo,
-		.offset = image->offset
-	};
-
-	struct radv_buffer retile_buffer = {
-		.size = retile_map_size,
-		.bo = image->retile_map,
-		.offset = 0
-	};
-
-	struct radv_buffer_view views[3];
-	VkBufferView view_handles[3];
-	radv_buffer_view_init(views + 0, cmd_buffer->device, &(VkBufferViewCreateInfo) {
-		.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
-		.buffer = radv_buffer_to_handle(&retile_buffer),
-		.offset = 0,
-		.range = retile_map_size,
-		.format = image->planes[0].surface.u.gfx9.dcc_retile_use_uint16 ?
-			VK_FORMAT_R16G16_UINT : VK_FORMAT_R32G32_UINT,
-	});
-	radv_buffer_view_init(views + 1, cmd_buffer->device, &(VkBufferViewCreateInfo) {
-		.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
-		.buffer = radv_buffer_to_handle(&buffer),
-		.offset = image->planes[0].surface.dcc_offset,
-		.range = image->planes[0].surface.dcc_size,
-		.format = VK_FORMAT_R8_UINT,
-	});
-	radv_buffer_view_init(views + 2, cmd_buffer->device, &(VkBufferViewCreateInfo) {
-		.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
-		.buffer = radv_buffer_to_handle(&buffer),
-		.offset = image->planes[0].surface.display_dcc_offset,
-		.range = image->planes[0].surface.u.gfx9.display_dcc_size,
-		.format = VK_FORMAT_R8_UINT,
-	});
-	for (unsigned i = 0; i < 3; ++i)
-		view_handles[i] = radv_buffer_view_to_handle(&views[i]);
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.dcc_retile.p_layout,
-				      0, /* set */
-				      3, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-					{
-						.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-						.dstBinding = 0,
-						.dstArrayElement = 0,
-						.descriptorCount = 1,
-						.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-						.pTexelBufferView = &view_handles[0],
-					},
-					{
-						.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-						.dstBinding = 1,
-						.dstArrayElement = 0,
-						.descriptorCount = 1,
-						.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-						.pTexelBufferView = &view_handles[1],
-					},
-					{
-						.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-						.dstBinding = 2,
-						.dstArrayElement = 0,
-						.descriptorCount = 1,
-						.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-						.pTexelBufferView = &view_handles[2],
-					},
-				       });
-
-	/* src+dst pairs count double, so the number of DCC bytes we move is
-	 * actually half of dcc_retile_num_elements. */
-	radv_unaligned_dispatch(cmd_buffer, image->planes[0].surface.u.gfx9.dcc_retile_num_elements / 2, 1, 1);
-
-	radv_meta_restore(&saved_state, cmd_buffer);
-
-	state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-	                     radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+   struct radv_meta_saved_state saved_state;
+   struct radv_device *device = cmd_buffer->device;
+   uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface);
+
+   assert(image->type == VK_IMAGE_TYPE_2D);
+   assert(image->info.array_size == 1 && image->info.levels == 1);
+
+   struct radv_cmd_state *state = &cmd_buffer->state;
+
+   state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, image) |
+                        radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+
+   /* Compile pipelines if not already done so. */
+   if (!cmd_buffer->device->meta_state.dcc_retile.pipeline) {
+      VkResult ret = radv_device_init_meta_dcc_retile_state(cmd_buffer->device);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return;
+      }
+   }
+
+   radv_meta_save(&saved_state, cmd_buffer,
+                  RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        device->meta_state.dcc_retile.pipeline);
+
+   struct radv_buffer buffer = {.size = image->size, .bo = image->bo, .offset = image->offset};
+
+   struct radv_buffer retile_buffer = {.size = retile_map_size,
+                                       .bo = image->retile_map,
+                                       .offset = 0};
+
+   struct radv_buffer_view views[3];
+   VkBufferView view_handles[3];
+   radv_buffer_view_init(
+      views + 0, cmd_buffer->device,
+      &(VkBufferViewCreateInfo){
+         .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+         .buffer = radv_buffer_to_handle(&retile_buffer),
+         .offset = 0,
+         .range = retile_map_size,
+         .format = image->planes[0].surface.u.gfx9.dcc_retile_use_uint16 ? VK_FORMAT_R16G16_UINT
+                                                                         : VK_FORMAT_R32G32_UINT,
+      });
+   radv_buffer_view_init(views + 1, cmd_buffer->device,
+                         &(VkBufferViewCreateInfo){
+                            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+                            .buffer = radv_buffer_to_handle(&buffer),
+                            .offset = image->planes[0].surface.dcc_offset,
+                            .range = image->planes[0].surface.dcc_size,
+                            .format = VK_FORMAT_R8_UINT,
+                         });
+   radv_buffer_view_init(views + 2, cmd_buffer->device,
+                         &(VkBufferViewCreateInfo){
+                            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+                            .buffer = radv_buffer_to_handle(&buffer),
+                            .offset = image->planes[0].surface.display_dcc_offset,
+                            .range = image->planes[0].surface.u.gfx9.display_dcc_size,
+                            .format = VK_FORMAT_R8_UINT,
+                         });
+   for (unsigned i = 0; i < 3; ++i)
+      view_handles[i] = radv_buffer_view_to_handle(&views[i]);
+
+   radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                 device->meta_state.dcc_retile.p_layout, 0, /* set */
+                                 3, /* descriptorWriteCount */
+                                 (VkWriteDescriptorSet[]){
+                                    {
+                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                       .dstBinding = 0,
+                                       .dstArrayElement = 0,
+                                       .descriptorCount = 1,
+                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+                                       .pTexelBufferView = &view_handles[0],
+                                    },
+                                    {
+                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                       .dstBinding = 1,
+                                       .dstArrayElement = 0,
+                                       .descriptorCount = 1,
+                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+                                       .pTexelBufferView = &view_handles[1],
+                                    },
+                                    {
+                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                       .dstBinding = 2,
+                                       .dstArrayElement = 0,
+                                       .descriptorCount = 1,
+                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+                                       .pTexelBufferView = &view_handles[2],
+                                    },
+                                 });
+
+   /* src+dst pairs count double, so the number of DCC bytes we move is
+    * actually half of dcc_retile_num_elements. */
+   radv_unaligned_dispatch(cmd_buffer, image->planes[0].surface.u.gfx9.dcc_retile_num_elements / 2,
+                           1, 1);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
+
+   state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+                        radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
 }
-
diff --git a/src/amd/vulkan/radv_meta_decompress.c b/src/amd/vulkan/radv_meta_decompress.c
index 25a3ff8bba7..73922d1d751 100644
--- a/src/amd/vulkan/radv_meta_decompress.c
+++ b/src/amd/vulkan/radv_meta_decompress.c
@@ -29,542 +29,506 @@
 #include "sid.h"
 
 enum radv_depth_op {
-	DEPTH_DECOMPRESS,
-	DEPTH_RESUMMARIZE,
+   DEPTH_DECOMPRESS,
+   DEPTH_RESUMMARIZE,
 };
 
 enum radv_depth_decompress {
-	DECOMPRESS_DEPTH_STENCIL,
-	DECOMPRESS_DEPTH,
-	DECOMPRESS_STENCIL,
+   DECOMPRESS_DEPTH_STENCIL,
+   DECOMPRESS_DEPTH,
+   DECOMPRESS_STENCIL,
 };
 
 static VkResult
-create_pass(struct radv_device *device,
-	    uint32_t samples,
-	    VkRenderPass *pass)
+create_pass(struct radv_device *device, uint32_t samples, VkRenderPass *pass)
 {
-	VkResult result;
-	VkDevice device_h = radv_device_to_handle(device);
-	const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
-	VkAttachmentDescription2 attachment;
-
-	attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
-	attachment.flags = 0;
-	attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
-	attachment.samples = samples;
-	attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-	attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
-	attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-	attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
-	attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-	attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-
-	result = radv_CreateRenderPass2(device_h,
-				       &(VkRenderPassCreateInfo2) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-						       .attachmentCount = 1,
-						       .pAttachments = &attachment,
-						       .subpassCount = 1,
-							.pSubpasses = &(VkSubpassDescription2) {
-						       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 0,
-						       .pColorAttachments = NULL,
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference2) {
-							       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
-						       },
-						       .preserveAttachmentCount = 0,
-						       .pPreserveAttachments = NULL,
-					       },
-							.dependencyCount = 2,
-							.pDependencies = (VkSubpassDependency2[]) {
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = VK_SUBPASS_EXTERNAL,
-									.dstSubpass = 0,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								},
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = 0,
-									.dstSubpass = VK_SUBPASS_EXTERNAL,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								}
-							},
-								   },
-				       alloc,
-				       pass);
-
-	return result;
+   VkResult result;
+   VkDevice device_h = radv_device_to_handle(device);
+   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+   VkAttachmentDescription2 attachment;
+
+   attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
+   attachment.flags = 0;
+   attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
+   attachment.samples = samples;
+   attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+   attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+   attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+   attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
+   attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+   attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+
+   result = radv_CreateRenderPass2(
+      device_h,
+      &(VkRenderPassCreateInfo2){
+         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+         .attachmentCount = 1,
+         .pAttachments = &attachment,
+         .subpassCount = 1,
+         .pSubpasses =
+            &(VkSubpassDescription2){
+               .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+               .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+               .inputAttachmentCount = 0,
+               .colorAttachmentCount = 0,
+               .pColorAttachments = NULL,
+               .pResolveAttachments = NULL,
+               .pDepthStencilAttachment =
+                  &(VkAttachmentReference2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                     .attachment = 0,
+                     .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+                  },
+               .preserveAttachmentCount = 0,
+               .pPreserveAttachments = NULL,
+            },
+         .dependencyCount = 2,
+         .pDependencies =
+            (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                      .dstSubpass = 0,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0},
+                                     {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = 0,
+                                      .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0}},
+      },
+      alloc, pass);
+
+   return result;
 }
 
 static VkResult
 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
 {
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 0,
-		.pSetLayouts = NULL,
-		.pushConstantRangeCount = 0,
-		.pPushConstantRanges = NULL,
-	};
-
-	return radv_CreatePipelineLayout(radv_device_to_handle(device),
-					 &pl_create_info,
-					 &device->meta_state.alloc,
-					 layout);
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 0,
+      .pSetLayouts = NULL,
+      .pushConstantRangeCount = 0,
+      .pPushConstantRanges = NULL,
+   };
+
+   return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                    &device->meta_state.alloc, layout);
 }
 
 static VkResult
-create_pipeline(struct radv_device *device,
-		uint32_t samples,
-		VkRenderPass pass,
-		VkPipelineLayout layout,
-		enum radv_depth_op op,
-		enum radv_depth_decompress decompress,
-		VkPipeline *pipeline)
+create_pipeline(struct radv_device *device, uint32_t samples, VkRenderPass pass,
+                VkPipelineLayout layout, enum radv_depth_op op,
+                enum radv_depth_decompress decompress, VkPipeline *pipeline)
 {
-	VkResult result;
-	VkDevice device_h = radv_device_to_handle(device);
-
-	mtx_lock(&device->meta_state.mtx);
-	if (*pipeline) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
-	nir_shader *fs_module = radv_meta_build_nir_fs_noop();
-
-	if (!vs_module || !fs_module) {
-		/* XXX: Need more accurate error */
-		result = VK_ERROR_OUT_OF_HOST_MEMORY;
-		goto cleanup;
-	}
-
-	const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
-		.sampleLocationsEnable = false,
-	};
-
-	const VkGraphicsPipelineCreateInfo pipeline_create_info = {
-		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-		.stageCount = 2,
-		.pStages = (VkPipelineShaderStageCreateInfo[]) {
-		       {
-				.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-				.stage = VK_SHADER_STAGE_VERTEX_BIT,
-				.module = vk_shader_module_handle_from_nir(vs_module),
-				.pName = "main",
-			},
-			{
-				.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-				.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-				.module = vk_shader_module_handle_from_nir(fs_module),
-				.pName = "main",
-			},
-		},
-		.pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-			.vertexBindingDescriptionCount = 0,
-			.vertexAttributeDescriptionCount = 0,
-		},
-		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-			.primitiveRestartEnable = false,
-		},
-		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-			.viewportCount = 1,
-			.scissorCount = 1,
-		},
-		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-			.depthClampEnable = false,
-			.rasterizerDiscardEnable = false,
-			.polygonMode = VK_POLYGON_MODE_FILL,
-			.cullMode = VK_CULL_MODE_NONE,
-			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
-		},
-		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.pNext = &sample_locs_create_info,
-			.rasterizationSamples = samples,
-			.sampleShadingEnable = false,
-			.pSampleMask = NULL,
-			.alphaToCoverageEnable = false,
-			.alphaToOneEnable = false,
-		},
-		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-			.logicOpEnable = false,
-			.attachmentCount = 0,
-			.pAttachments = NULL,
-		},
-		.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-			.depthTestEnable = false,
-			.depthWriteEnable = false,
-			.depthBoundsTestEnable = false,
-			.stencilTestEnable = false,
-		},
-		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-			.dynamicStateCount = 3,
-			.pDynamicStates = (VkDynamicState[]) {
-				VK_DYNAMIC_STATE_VIEWPORT,
-				VK_DYNAMIC_STATE_SCISSOR,
-				VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
-			},
-		},
-		.layout = layout,
-		.renderPass = pass,
-		.subpass = 0,
-	};
-
-	struct radv_graphics_pipeline_create_info extra = {
-		.use_rectlist = true,
-		.depth_compress_disable = decompress == DECOMPRESS_DEPTH_STENCIL ||
-					  decompress == DECOMPRESS_DEPTH,
-		.stencil_compress_disable = decompress == DECOMPRESS_DEPTH_STENCIL ||
-					    decompress == DECOMPRESS_STENCIL,
-		.resummarize_enable = op == DEPTH_RESUMMARIZE,
-	};
-
-	result = radv_graphics_pipeline_create(device_h,
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &pipeline_create_info, &extra,
-					       &device->meta_state.alloc,
-					       pipeline);
+   VkResult result;
+   VkDevice device_h = radv_device_to_handle(device);
+
+   mtx_lock(&device->meta_state.mtx);
+   if (*pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
+   nir_shader *fs_module = radv_meta_build_nir_fs_noop();
+
+   if (!vs_module || !fs_module) {
+      /* XXX: Need more accurate error */
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto cleanup;
+   }
+
+   const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
+      .sampleLocationsEnable = false,
+   };
+
+   const VkGraphicsPipelineCreateInfo pipeline_create_info = {
+      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+      .stageCount = 2,
+      .pStages =
+         (VkPipelineShaderStageCreateInfo[]){
+            {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+               .stage = VK_SHADER_STAGE_VERTEX_BIT,
+               .module = vk_shader_module_handle_from_nir(vs_module),
+               .pName = "main",
+            },
+            {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+               .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+               .module = vk_shader_module_handle_from_nir(fs_module),
+               .pName = "main",
+            },
+         },
+      .pVertexInputState =
+         &(VkPipelineVertexInputStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+            .vertexBindingDescriptionCount = 0,
+            .vertexAttributeDescriptionCount = 0,
+         },
+      .pInputAssemblyState =
+         &(VkPipelineInputAssemblyStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+            .primitiveRestartEnable = false,
+         },
+      .pViewportState =
+         &(VkPipelineViewportStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+            .viewportCount = 1,
+            .scissorCount = 1,
+         },
+      .pRasterizationState =
+         &(VkPipelineRasterizationStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+            .depthClampEnable = false,
+            .rasterizerDiscardEnable = false,
+            .polygonMode = VK_POLYGON_MODE_FILL,
+            .cullMode = VK_CULL_MODE_NONE,
+            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+         },
+      .pMultisampleState =
+         &(VkPipelineMultisampleStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+            .pNext = &sample_locs_create_info,
+            .rasterizationSamples = samples,
+            .sampleShadingEnable = false,
+            .pSampleMask = NULL,
+            .alphaToCoverageEnable = false,
+            .alphaToOneEnable = false,
+         },
+      .pColorBlendState =
+         &(VkPipelineColorBlendStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+            .logicOpEnable = false,
+            .attachmentCount = 0,
+            .pAttachments = NULL,
+         },
+      .pDepthStencilState =
+         &(VkPipelineDepthStencilStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+            .depthTestEnable = false,
+            .depthWriteEnable = false,
+            .depthBoundsTestEnable = false,
+            .stencilTestEnable = false,
+         },
+      .pDynamicState =
+         &(VkPipelineDynamicStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+            .dynamicStateCount = 3,
+            .pDynamicStates =
+               (VkDynamicState[]){
+                  VK_DYNAMIC_STATE_VIEWPORT,
+                  VK_DYNAMIC_STATE_SCISSOR,
+                  VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
+               },
+         },
+      .layout = layout,
+      .renderPass = pass,
+      .subpass = 0,
+   };
+
+   struct radv_graphics_pipeline_create_info extra = {
+      .use_rectlist = true,
+      .depth_compress_disable =
+         decompress == DECOMPRESS_DEPTH_STENCIL || decompress == DECOMPRESS_DEPTH,
+      .stencil_compress_disable =
+         decompress == DECOMPRESS_DEPTH_STENCIL || decompress == DECOMPRESS_STENCIL,
+      .resummarize_enable = op == DEPTH_RESUMMARIZE,
+   };
+
+   result = radv_graphics_pipeline_create(
+      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), &pipeline_create_info,
+      &extra, &device->meta_state.alloc, pipeline);
 
 cleanup:
-	ralloc_free(fs_module);
-	ralloc_free(vs_module);
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   ralloc_free(fs_module);
+   ralloc_free(vs_module);
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 void
 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
-		radv_DestroyRenderPass(radv_device_to_handle(device),
-				       state->depth_decomp[i].pass,
-				       &state->alloc);
-		radv_DestroyPipelineLayout(radv_device_to_handle(device),
-					   state->depth_decomp[i].p_layout,
-					   &state->alloc);
-
-		for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->depth_decomp[i].decompress_pipeline[j],
-					     &state->alloc);
-		}
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->depth_decomp[i].resummarize_pipeline,
-				     &state->alloc);
-	}
+   struct radv_meta_state *state = &device->meta_state;
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
+      radv_DestroyRenderPass(radv_device_to_handle(device), state->depth_decomp[i].pass,
+                             &state->alloc);
+      radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout,
+                                 &state->alloc);
+
+      for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
+         radv_DestroyPipeline(radv_device_to_handle(device),
+                              state->depth_decomp[i].decompress_pipeline[j], &state->alloc);
+      }
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->depth_decomp[i].resummarize_pipeline, &state->alloc);
+   }
 }
 
 VkResult
 radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
 {
-	struct radv_meta_state *state = &device->meta_state;
-	VkResult res = VK_SUCCESS;
-
-	for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
-		uint32_t samples = 1 << i;
-
-		res = create_pass(device, samples, &state->depth_decomp[i].pass);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_pipeline_layout(device,
-					     &state->depth_decomp[i].p_layout);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		if (on_demand)
-			continue;
-
-		for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
-			res = create_pipeline(device, samples,
-					      state->depth_decomp[i].pass,
-					      state->depth_decomp[i].p_layout,
-					      DEPTH_DECOMPRESS,
-					      j,
-					      &state->depth_decomp[i].decompress_pipeline[j]);
-			if (res != VK_SUCCESS)
-				goto fail;
-		}
-
-		res = create_pipeline(device, samples,
-				      state->depth_decomp[i].pass,
-				      state->depth_decomp[i].p_layout,
-				      DEPTH_RESUMMARIZE,
-				      0, /* unused */
-				      &state->depth_decomp[i].resummarize_pipeline);
-		if (res != VK_SUCCESS)
-			goto fail;
-	}
-
-	return VK_SUCCESS;
+   struct radv_meta_state *state = &device->meta_state;
+   VkResult res = VK_SUCCESS;
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
+      uint32_t samples = 1 << i;
+
+      res = create_pass(device, samples, &state->depth_decomp[i].pass);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_pipeline_layout(device, &state->depth_decomp[i].p_layout);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      if (on_demand)
+         continue;
+
+      for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
+         res = create_pipeline(device, samples, state->depth_decomp[i].pass,
+                               state->depth_decomp[i].p_layout, DEPTH_DECOMPRESS, j,
+                               &state->depth_decomp[i].decompress_pipeline[j]);
+         if (res != VK_SUCCESS)
+            goto fail;
+      }
+
+      res = create_pipeline(device, samples, state->depth_decomp[i].pass,
+                            state->depth_decomp[i].p_layout, DEPTH_RESUMMARIZE, 0, /* unused */
+                            &state->depth_decomp[i].resummarize_pipeline);
+      if (res != VK_SUCCESS)
+         goto fail;
+   }
+
+   return VK_SUCCESS;
 
 fail:
-	radv_device_finish_meta_depth_decomp_state(device);
-	return res;
+   radv_device_finish_meta_depth_decomp_state(device);
+   return res;
 }
 
 static VkPipeline *
-radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
-			struct radv_image *image,
-			const VkImageSubresourceRange *subresourceRange,
-			enum radv_depth_op op)
+radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                        const VkImageSubresourceRange *subresourceRange, enum radv_depth_op op)
 {
-	struct radv_meta_state *state = &cmd_buffer->device->meta_state;
-	uint32_t samples = image->info.samples;
-	uint32_t samples_log2 = ffs(samples) - 1;
-	enum radv_depth_decompress decompress;
-	VkPipeline *pipeline;
-
-	if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
-		decompress = DECOMPRESS_DEPTH;
-	} else if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
-		decompress = DECOMPRESS_STENCIL;
-	} else {
-		decompress = DECOMPRESS_DEPTH_STENCIL;
-	}
-
-	if (!state->depth_decomp[samples_log2].decompress_pipeline[decompress]) {
-		VkResult ret;
-
-		for (uint32_t i = 0; i < NUM_DEPTH_DECOMPRESS_PIPELINES; i++) {
-			ret = create_pipeline(cmd_buffer->device, samples,
-					      state->depth_decomp[samples_log2].pass,
-					      state->depth_decomp[samples_log2].p_layout,
-					      DEPTH_DECOMPRESS,
-					      i,
-					      &state->depth_decomp[samples_log2].decompress_pipeline[i]);
-			if (ret != VK_SUCCESS) {
-				cmd_buffer->record_result = ret;
-				return NULL;
-			}
-		}
-
-		ret = create_pipeline(cmd_buffer->device, samples,
-				      state->depth_decomp[samples_log2].pass,
-				      state->depth_decomp[samples_log2].p_layout,
-				      DEPTH_RESUMMARIZE,
-				      0, /* unused */
-				      &state->depth_decomp[samples_log2].resummarize_pipeline);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return NULL;
-		}
-       }
-
-	switch (op) {
-	case DEPTH_DECOMPRESS:
-		pipeline = &state->depth_decomp[samples_log2].decompress_pipeline[decompress];
-		break;
-	case DEPTH_RESUMMARIZE:
-		pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
-		break;
-	default:
-		unreachable("unknown operation");
-	}
-
-	return pipeline;
+   struct radv_meta_state *state = &cmd_buffer->device->meta_state;
+   uint32_t samples = image->info.samples;
+   uint32_t samples_log2 = ffs(samples) - 1;
+   enum radv_depth_decompress decompress;
+   VkPipeline *pipeline;
+
+   if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+      decompress = DECOMPRESS_DEPTH;
+   } else if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+      decompress = DECOMPRESS_STENCIL;
+   } else {
+      decompress = DECOMPRESS_DEPTH_STENCIL;
+   }
+
+   if (!state->depth_decomp[samples_log2].decompress_pipeline[decompress]) {
+      VkResult ret;
+
+      for (uint32_t i = 0; i < NUM_DEPTH_DECOMPRESS_PIPELINES; i++) {
+         ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,
+                               state->depth_decomp[samples_log2].p_layout, DEPTH_DECOMPRESS, i,
+                               &state->depth_decomp[samples_log2].decompress_pipeline[i]);
+         if (ret != VK_SUCCESS) {
+            cmd_buffer->record_result = ret;
+            return NULL;
+         }
+      }
+
+      ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,
+                            state->depth_decomp[samples_log2].p_layout, DEPTH_RESUMMARIZE,
+                            0, /* unused */
+                            &state->depth_decomp[samples_log2].resummarize_pipeline);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return NULL;
+      }
+   }
+
+   switch (op) {
+   case DEPTH_DECOMPRESS:
+      pipeline = &state->depth_decomp[samples_log2].decompress_pipeline[decompress];
+      break;
+   case DEPTH_RESUMMARIZE:
+      pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
+      break;
+   default:
+      unreachable("unknown operation");
+   }
+
+   return pipeline;
 }
 
 static void
-radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer,
-			       struct radv_image *image,
-			       const VkImageSubresourceRange *range,
-			       int level, int layer)
+radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                               const VkImageSubresourceRange *range, int level, int layer)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_meta_state *state = &device->meta_state;
-	uint32_t samples_log2 = ffs(image->info.samples) - 1;
-	struct radv_image_view iview;
-	uint32_t width, height;
-
-	width = radv_minify(image->info.width, range->baseMipLevel + level);
-	height = radv_minify(image->info.height, range->baseMipLevel + level);
-
-	radv_image_view_init(&iview, device,
-			     &(VkImageViewCreateInfo) {
-					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-					.image = radv_image_to_handle(image),
-					.viewType = radv_meta_get_view_type(image),
-					.format = image->vk_format,
-					.subresourceRange = {
-						.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
-						.baseMipLevel = range->baseMipLevel + level,
-						.levelCount = 1,
-						.baseArrayLayer = range->baseArrayLayer + layer,
-						.layerCount = 1,
-					},
-			     }, NULL);
-
-
-	VkFramebuffer fb_h;
-	radv_CreateFramebuffer(radv_device_to_handle(device),
-			       &(VkFramebufferCreateInfo) {
-					.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
-					.attachmentCount = 1,
-						.pAttachments = (VkImageView[]) {
-							radv_image_view_to_handle(&iview)
-					},
-					.width = width,
-					.height = height,
-					.layers = 1
-			       }, &cmd_buffer->pool->alloc, &fb_h);
-
-	radv_cmd_buffer_begin_render_pass(cmd_buffer,
-					  &(VkRenderPassBeginInfo) {
-						.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-						.renderPass = state->depth_decomp[samples_log2].pass,
-						.framebuffer = fb_h,
-						.renderArea = {
-							.offset = {
-								0,
-								0,
-							},
-							.extent = {
-								width,
-								height,
-							}
-						},
-						.clearValueCount = 0,
-						.pClearValues = NULL,
-					}, NULL);
-	radv_cmd_buffer_set_subpass(cmd_buffer,
-				    &cmd_buffer->state.pass->subpasses[0]);
-
-	radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
-	radv_cmd_buffer_end_render_pass(cmd_buffer);
-
-	radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h,
-				&cmd_buffer->pool->alloc);
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_state *state = &device->meta_state;
+   uint32_t samples_log2 = ffs(image->info.samples) - 1;
+   struct radv_image_view iview;
+   uint32_t width, height;
+
+   width = radv_minify(image->info.width, range->baseMipLevel + level);
+   height = radv_minify(image->info.height, range->baseMipLevel + level);
+
+   radv_image_view_init(&iview, device,
+                        &(VkImageViewCreateInfo){
+                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                           .image = radv_image_to_handle(image),
+                           .viewType = radv_meta_get_view_type(image),
+                           .format = image->vk_format,
+                           .subresourceRange =
+                              {
+                                 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
+                                 .baseMipLevel = range->baseMipLevel + level,
+                                 .levelCount = 1,
+                                 .baseArrayLayer = range->baseArrayLayer + layer,
+                                 .layerCount = 1,
+                              },
+                        },
+                        NULL);
+
+   VkFramebuffer fb_h;
+   radv_CreateFramebuffer(
+      radv_device_to_handle(device),
+      &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+                                 .attachmentCount = 1,
+                                 .pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},
+                                 .width = width,
+                                 .height = height,
+                                 .layers = 1},
+      &cmd_buffer->pool->alloc, &fb_h);
+
+   radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                     &(VkRenderPassBeginInfo){
+                                        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                                        .renderPass = state->depth_decomp[samples_log2].pass,
+                                        .framebuffer = fb_h,
+                                        .renderArea = {.offset =
+                                                          {
+                                                             0,
+                                                             0,
+                                                          },
+                                                       .extent =
+                                                          {
+                                                             width,
+                                                             height,
+                                                          }},
+                                        .clearValueCount = 0,
+                                        .pClearValues = NULL,
+                                     },
+                                     NULL);
+   radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+   radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+   radv_cmd_buffer_end_render_pass(cmd_buffer);
+
+   radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
 }
 
-static void radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
-				       struct radv_image *image,
-				       const VkImageSubresourceRange *subresourceRange,
-				       struct radv_sample_locations_state *sample_locs,
-				       enum radv_depth_op op)
+static void
+radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                           const VkImageSubresourceRange *subresourceRange,
+                           struct radv_sample_locations_state *sample_locs, enum radv_depth_op op)
 {
-	struct radv_meta_saved_state saved_state;
-	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-	VkPipeline *pipeline;
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE |
-		       RADV_META_SAVE_SAMPLE_LOCATIONS |
-		       RADV_META_SAVE_PASS);
-
-	pipeline = radv_get_depth_pipeline(cmd_buffer, image,
-					   subresourceRange, op);
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
-
-	if (sample_locs) {
-		assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
-
-		/* Set the sample locations specified during explicit or
-		 * automatic layout transitions, otherwise the depth decompress
-		 * pass uses the default HW locations.
-		 */
-		radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT) {
-			.sampleLocationsPerPixel = sample_locs->per_pixel,
-			.sampleLocationGridSize = sample_locs->grid_size,
-			.sampleLocationsCount = sample_locs->count,
-			.pSampleLocations = sample_locs->locations,
-		});
-	}
-
-	for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
-
-		/* Do not decompress levels without HTILE. */
-		if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
-			continue;
-
-		uint32_t width =
-			radv_minify(image->info.width,
-				    subresourceRange->baseMipLevel + l);
-		uint32_t height =
-			radv_minify(image->info.height,
-				    subresourceRange->baseMipLevel + l);
-
-		radv_CmdSetViewport(cmd_buffer_h, 0, 1,
-				    &(VkViewport) {
-					.x = 0,
-					.y = 0,
-					.width = width,
-					.height = height,
-					.minDepth = 0.0f,
-					.maxDepth = 1.0f
-				    });
-
-		radv_CmdSetScissor(cmd_buffer_h, 0, 1,
-				   &(VkRect2D) {
-					.offset = { 0, 0 },
-					.extent = { width, height },
-				   });
-
-		for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
-			radv_process_depth_image_layer(cmd_buffer, image,
-						       subresourceRange, l, s);
-		}
-	}
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_meta_saved_state saved_state;
+   VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+   VkPipeline *pipeline;
+
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_SAMPLE_LOCATIONS | RADV_META_SAVE_PASS);
+
+   pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op);
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+                        *pipeline);
+
+   if (sample_locs) {
+      assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
+
+      /* Set the sample locations specified during explicit or
+       * automatic layout transitions, otherwise the depth decompress
+       * pass uses the default HW locations.
+       */
+      radv_CmdSetSampleLocationsEXT(cmd_buffer_h,
+                                    &(VkSampleLocationsInfoEXT){
+                                       .sampleLocationsPerPixel = sample_locs->per_pixel,
+                                       .sampleLocationGridSize = sample_locs->grid_size,
+                                       .sampleLocationsCount = sample_locs->count,
+                                       .pSampleLocations = sample_locs->locations,
+                                    });
+   }
+
+   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
+
+      /* Do not decompress levels without HTILE. */
+      if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
+         continue;
+
+      uint32_t width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
+      uint32_t height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
+
+      radv_CmdSetViewport(cmd_buffer_h, 0, 1,
+                          &(VkViewport){.x = 0,
+                                        .y = 0,
+                                        .width = width,
+                                        .height = height,
+                                        .minDepth = 0.0f,
+                                        .maxDepth = 1.0f});
+
+      radv_CmdSetScissor(cmd_buffer_h, 0, 1,
+                         &(VkRect2D){
+                            .offset = {0, 0},
+                            .extent = {width, height},
+                         });
+
+      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
+         radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s);
+      }
+   }
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
-void radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
-				   struct radv_image *image,
-				   const VkImageSubresourceRange *subresourceRange,
-				   struct radv_sample_locations_state *sample_locs)
+void
+radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                              const VkImageSubresourceRange *subresourceRange,
+                              struct radv_sample_locations_state *sample_locs)
 {
-	struct radv_barrier_data barrier = {0};
+   struct radv_barrier_data barrier = {0};
 
-	barrier.layout_transitions.depth_stencil_expand = 1;
-	radv_describe_layout_transition(cmd_buffer, &barrier);
+   barrier.layout_transitions.depth_stencil_expand = 1;
+   radv_describe_layout_transition(cmd_buffer, &barrier);
 
-	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-	radv_process_depth_stencil(cmd_buffer, image, subresourceRange,
-				   sample_locs, DEPTH_DECOMPRESS);
+   assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
+   radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS);
 }
 
-void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
-				    struct radv_image *image,
-				    const VkImageSubresourceRange *subresourceRange,
-				    struct radv_sample_locations_state *sample_locs)
+void
+radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                               const VkImageSubresourceRange *subresourceRange,
+                               struct radv_sample_locations_state *sample_locs)
 {
-	struct radv_barrier_data barrier = {0};
+   struct radv_barrier_data barrier = {0};
 
-	barrier.layout_transitions.depth_stencil_resummarize = 1;
-	radv_describe_layout_transition(cmd_buffer, &barrier);
+   barrier.layout_transitions.depth_stencil_resummarize = 1;
+   radv_describe_layout_transition(cmd_buffer, &barrier);
 
-	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-	radv_process_depth_stencil(cmd_buffer, image, subresourceRange,
-				   sample_locs, DEPTH_RESUMMARIZE);
+   assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
+   radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE);
 }
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index ea29bdec9f2..9a62664fc95 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -28,955 +28,901 @@
 #include "radv_private.h"
 #include "sid.h"
 
-
 static nir_shader *
 build_dcc_decompress_compute_shader(struct radv_device *dev)
 {
-	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
-							     false,
-							     false,
-							     GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D,
-							   false,
-							   GLSL_TYPE_FLOAT);
-
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_decompress_compute");
-
-	/* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
-	b.shader->info.cs.local_size[0] = 16;
-	b.shader->info.cs.local_size[1] = 16;
-	b.shader->info.cs.local_size[2] = 1;
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      buf_type, "s_tex");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						       img_type, "out_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 1;
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
-	tex->op = nir_texop_txf;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3));
-	tex->src[1].src_type = nir_tex_src_lod;
-	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-	tex->src[2].src_type = nir_tex_src_texture_deref;
-	tex->src[2].src = nir_src_for_ssa(input_img_deref);
-	tex->dest_type = nir_type_float32;
-	tex->is_array = false;
-	tex->coord_components = 2;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(&b, &tex->instr);
-
-	nir_scoped_barrier(&b, .execution_scope=NIR_SCOPE_WORKGROUP,
-			       .memory_scope=NIR_SCOPE_WORKGROUP,
-			       .memory_semantics=NIR_MEMORY_ACQ_REL,
-			       .memory_modes=nir_var_mem_ssbo);
-
-	nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
-	                          global_id, nir_ssa_undef(&b, 1, 32), &tex->dest.ssa,
-	                          nir_imm_int(&b, 0));
-	return b.shader;
+   const struct glsl_type *buf_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
+
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_decompress_compute");
+
+   /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
+   b.shader->info.cs.local_size[0] = 16;
+   b.shader->info.cs.local_size[1] = 16;
+   b.shader->info.cs.local_size[2] = 1;
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 1;
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+   tex->op = nir_texop_txf;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3));
+   tex->src[1].src_type = nir_tex_src_lod;
+   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+   tex->src[2].src_type = nir_tex_src_texture_deref;
+   tex->src[2].src = nir_src_for_ssa(input_img_deref);
+   tex->dest_type = nir_type_float32;
+   tex->is_array = false;
+   tex->coord_components = 2;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(&b, &tex->instr);
+
+   nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP,
+                      .memory_scope = NIR_SCOPE_WORKGROUP, .memory_semantics = NIR_MEMORY_ACQ_REL,
+                      .memory_modes = nir_var_mem_ssbo);
+
+   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
+                         nir_ssa_undef(&b, 1, 32), &tex->dest.ssa, nir_imm_int(&b, 0));
+   return b.shader;
 }
 
 static VkResult
 create_dcc_compress_compute(struct radv_device *device)
 {
-	VkResult result = VK_SUCCESS;
-	nir_shader *cs = build_dcc_decompress_compute_shader(device);
-
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 2,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
-	if (result != VK_SUCCESS)
-		goto cleanup;
-
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
-		.pushConstantRangeCount = 0,
-		.pPushConstantRanges = NULL,
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
-	if (result != VK_SUCCESS)
-		goto cleanup;
-
-	/* compute shader */
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &vk_pipeline_info, NULL,
-					     &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
-	if (result != VK_SUCCESS)
-		goto cleanup;
+   VkResult result = VK_SUCCESS;
+   nir_shader *cs = build_dcc_decompress_compute_shader(device);
+
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(
+      radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
+      &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
+   if (result != VK_SUCCESS)
+      goto cleanup;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
+      .pushConstantRangeCount = 0,
+      .pPushConstantRanges = NULL,
+   };
+
+   result = radv_CreatePipelineLayout(
+      radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+      &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
+   if (result != VK_SUCCESS)
+      goto cleanup;
+
+   /* compute shader */
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &vk_pipeline_info, NULL,
+      &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+   if (result != VK_SUCCESS)
+      goto cleanup;
 
 cleanup:
-	ralloc_free(cs);
-	return result;
+   ralloc_free(cs);
+   return result;
 }
 
 static VkResult
 create_pass(struct radv_device *device)
 {
-	VkResult result;
-	VkDevice device_h = radv_device_to_handle(device);
-	const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
-	VkAttachmentDescription2 attachment;
-
-	attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
-	attachment.format = VK_FORMAT_UNDEFINED;
-	attachment.samples = 1;
-	attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-	attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
-	attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-	attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-
-	result = radv_CreateRenderPass2(device_h,
-				       &(VkRenderPassCreateInfo2) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-						       .attachmentCount = 1,
-						       .pAttachments = &attachment,
-						       .subpassCount = 1,
-						       .pSubpasses = &(VkSubpassDescription2) {
-						       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 1,
-						       .pColorAttachments = (VkAttachmentReference2[]) {
-							       {
-								       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-								       .attachment = 0,
-								       .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
-							       },
-						       },
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference2) {
-							       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-							       .attachment = VK_ATTACHMENT_UNUSED,
-						       },
-						       .preserveAttachmentCount = 0,
-						       .pPreserveAttachments = NULL,
-					       },
-							.dependencyCount = 2,
-							.pDependencies = (VkSubpassDependency2[]) {
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = VK_SUBPASS_EXTERNAL,
-									.dstSubpass = 0,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								},
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = 0,
-									.dstSubpass = VK_SUBPASS_EXTERNAL,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								}
-							},
-				       },
-				       alloc,
-				       &device->meta_state.fast_clear_flush.pass);
-
-	return result;
+   VkResult result;
+   VkDevice device_h = radv_device_to_handle(device);
+   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+   VkAttachmentDescription2 attachment;
+
+   attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
+   attachment.format = VK_FORMAT_UNDEFINED;
+   attachment.samples = 1;
+   attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+   attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+   attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+   attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+
+   result = radv_CreateRenderPass2(
+      device_h,
+      &(VkRenderPassCreateInfo2){
+         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+         .attachmentCount = 1,
+         .pAttachments = &attachment,
+         .subpassCount = 1,
+         .pSubpasses =
+            &(VkSubpassDescription2){
+               .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+               .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+               .inputAttachmentCount = 0,
+               .colorAttachmentCount = 1,
+               .pColorAttachments =
+                  (VkAttachmentReference2[]){
+                     {
+                        .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                        .attachment = 0,
+                        .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+                     },
+                  },
+               .pResolveAttachments = NULL,
+               .pDepthStencilAttachment =
+                  &(VkAttachmentReference2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                     .attachment = VK_ATTACHMENT_UNUSED,
+                  },
+               .preserveAttachmentCount = 0,
+               .pPreserveAttachments = NULL,
+            },
+         .dependencyCount = 2,
+         .pDependencies =
+            (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                      .dstSubpass = 0,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0},
+                                     {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = 0,
+                                      .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0}},
+      },
+      alloc, &device->meta_state.fast_clear_flush.pass);
+
+   return result;
 }
 
 static VkResult
 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
 {
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 0,
-		.pSetLayouts = NULL,
-		.pushConstantRangeCount = 0,
-		.pPushConstantRanges = NULL,
-	};
-
-	return radv_CreatePipelineLayout(radv_device_to_handle(device),
-					 &pl_create_info,
-					 &device->meta_state.alloc,
-					 layout);
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 0,
+      .pSetLayouts = NULL,
+      .pushConstantRangeCount = 0,
+      .pPushConstantRanges = NULL,
+   };
+
+   return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                    &device->meta_state.alloc, layout);
 }
 
 static VkResult
-create_pipeline(struct radv_device *device,
-		VkShaderModule vs_module_h,
-		VkPipelineLayout layout)
+create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
 {
-	VkResult result;
-	VkDevice device_h = radv_device_to_handle(device);
-
-	nir_shader *fs_module = radv_meta_build_nir_fs_noop();
-
-	if (!fs_module) {
-		/* XXX: Need more accurate error */
-		result = VK_ERROR_OUT_OF_HOST_MEMORY;
-		goto cleanup;
-	}
-
-	const VkPipelineShaderStageCreateInfo stages[2] = {
-		{
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_VERTEX_BIT,
-			.module = vs_module_h,
-			.pName = "main",
-		},
-		{
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-			.module = vk_shader_module_handle_from_nir(fs_module),
-			.pName = "main",
-		},
-	};
-
-	const VkPipelineVertexInputStateCreateInfo vi_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 0,
-		.vertexAttributeDescriptionCount = 0,
-	};
-
-	const VkPipelineInputAssemblyStateCreateInfo ia_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-		.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-		.primitiveRestartEnable = false,
-	};
-
-	const VkPipelineColorBlendStateCreateInfo blend_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-		.logicOpEnable = false,
-		.attachmentCount = 1,
-		.pAttachments = (VkPipelineColorBlendAttachmentState []) {
-			{
-				.colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
-				VK_COLOR_COMPONENT_G_BIT |
-				VK_COLOR_COMPONENT_B_BIT |
-				VK_COLOR_COMPONENT_A_BIT,
-			},
-		}
-	};
-	const VkPipelineRasterizationStateCreateInfo rs_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-		.depthClampEnable = false,
-		.rasterizerDiscardEnable = false,
-		.polygonMode = VK_POLYGON_MODE_FILL,
-		.cullMode = VK_CULL_MODE_NONE,
-		.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
-	};
-
-	result = radv_graphics_pipeline_create(device_h,
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &(VkGraphicsPipelineCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-						       .stageCount = 2,
-						       .pStages = stages,
-
-						       .pVertexInputState = &vi_state,
-						       .pInputAssemblyState = &ia_state,
-
-					       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-						       .viewportCount = 1,
-						       .scissorCount = 1,
-					       },
-						       .pRasterizationState = &rs_state,
-					       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-						       .rasterizationSamples = 1,
-						       .sampleShadingEnable = false,
-						       .pSampleMask = NULL,
-						       .alphaToCoverageEnable = false,
-						       .alphaToOneEnable = false,
-					       },
-						.pColorBlendState = &blend_state,
-						.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-							.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-							.dynamicStateCount = 2,
-							.pDynamicStates = (VkDynamicState[]) {
-								VK_DYNAMIC_STATE_VIEWPORT,
-								VK_DYNAMIC_STATE_SCISSOR,
-							},
-						},
-					        .layout = layout,
-						.renderPass = device->meta_state.fast_clear_flush.pass,
-						.subpass = 0,
-					       },
-					       &(struct radv_graphics_pipeline_create_info) {
-						       .use_rectlist = true,
-						       .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
-					       },
-					       &device->meta_state.alloc,
-					       &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
-	if (result != VK_SUCCESS)
-		goto cleanup;
-
-	result = radv_graphics_pipeline_create(device_h,
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &(VkGraphicsPipelineCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-						       .stageCount = 2,
-						       .pStages = stages,
-
-						       .pVertexInputState = &vi_state,
-						       .pInputAssemblyState = &ia_state,
-
-					       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-						       .viewportCount = 1,
-						       .scissorCount = 1,
-					       },
-						       .pRasterizationState = &rs_state,
-					       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-						       .rasterizationSamples = 1,
-						       .sampleShadingEnable = false,
-						       .pSampleMask = NULL,
-						       .alphaToCoverageEnable = false,
-						       .alphaToOneEnable = false,
-					       },
-						.pColorBlendState = &blend_state,
-						.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-							.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-							.dynamicStateCount = 2,
-							.pDynamicStates = (VkDynamicState[]) {
-								VK_DYNAMIC_STATE_VIEWPORT,
-								VK_DYNAMIC_STATE_SCISSOR,
-							},
-						},
-						.layout = layout,
-						.renderPass = device->meta_state.fast_clear_flush.pass,
-						.subpass = 0,
-					       },
-					       &(struct radv_graphics_pipeline_create_info) {
-						       .use_rectlist = true,
-						       .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
-					       },
-					       &device->meta_state.alloc,
-					       &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
-	if (result != VK_SUCCESS)
-		goto cleanup;
-
-	result = radv_graphics_pipeline_create(device_h,
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &(VkGraphicsPipelineCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-						       .stageCount = 2,
-						       .pStages = stages,
-
-						       .pVertexInputState = &vi_state,
-						       .pInputAssemblyState = &ia_state,
-
-					       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-						       .viewportCount = 1,
-						       .scissorCount = 1,
-					       },
-						       .pRasterizationState = &rs_state,
-					       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-						       .rasterizationSamples = 1,
-						       .sampleShadingEnable = false,
-						       .pSampleMask = NULL,
-						       .alphaToCoverageEnable = false,
-						       .alphaToOneEnable = false,
-					       },
-						.pColorBlendState = &blend_state,
-						.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-							.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-							.dynamicStateCount = 2,
-							.pDynamicStates = (VkDynamicState[]) {
-								VK_DYNAMIC_STATE_VIEWPORT,
-								VK_DYNAMIC_STATE_SCISSOR,
-							},
-						},
-						.layout = layout,
-						.renderPass = device->meta_state.fast_clear_flush.pass,
-						.subpass = 0,
-					       },
-					       &(struct radv_graphics_pipeline_create_info) {
-						       .use_rectlist = true,
-						       .custom_blend_mode = V_028808_CB_DCC_DECOMPRESS,
-					       },
-					       &device->meta_state.alloc,
-					       &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
-	if (result != VK_SUCCESS)
-		goto cleanup;
-
-	goto cleanup;
+   VkResult result;
+   VkDevice device_h = radv_device_to_handle(device);
+
+   nir_shader *fs_module = radv_meta_build_nir_fs_noop();
+
+   if (!fs_module) {
+      /* XXX: Need more accurate error */
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto cleanup;
+   }
+
+   const VkPipelineShaderStageCreateInfo stages[2] = {
+      {
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+         .stage = VK_SHADER_STAGE_VERTEX_BIT,
+         .module = vs_module_h,
+         .pName = "main",
+      },
+      {
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+         .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+         .module = vk_shader_module_handle_from_nir(fs_module),
+         .pName = "main",
+      },
+   };
+
+   const VkPipelineVertexInputStateCreateInfo vi_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+      .vertexBindingDescriptionCount = 0,
+      .vertexAttributeDescriptionCount = 0,
+   };
+
+   const VkPipelineInputAssemblyStateCreateInfo ia_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+      .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+      .primitiveRestartEnable = false,
+   };
+
+   const VkPipelineColorBlendStateCreateInfo blend_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+      .logicOpEnable = false,
+      .attachmentCount = 1,
+      .pAttachments = (VkPipelineColorBlendAttachmentState[]){
+         {
+            .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+                              VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+         },
+      }};
+   const VkPipelineRasterizationStateCreateInfo rs_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+      .depthClampEnable = false,
+      .rasterizerDiscardEnable = false,
+      .polygonMode = VK_POLYGON_MODE_FILL,
+      .cullMode = VK_CULL_MODE_NONE,
+      .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+   };
+
+   result = radv_graphics_pipeline_create(
+      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &(VkGraphicsPipelineCreateInfo){
+         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+         .stageCount = 2,
+         .pStages = stages,
+
+         .pVertexInputState = &vi_state,
+         .pInputAssemblyState = &ia_state,
+
+         .pViewportState =
+            &(VkPipelineViewportStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+               .viewportCount = 1,
+               .scissorCount = 1,
+            },
+         .pRasterizationState = &rs_state,
+         .pMultisampleState =
+            &(VkPipelineMultisampleStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+               .rasterizationSamples = 1,
+               .sampleShadingEnable = false,
+               .pSampleMask = NULL,
+               .alphaToCoverageEnable = false,
+               .alphaToOneEnable = false,
+            },
+         .pColorBlendState = &blend_state,
+         .pDynamicState =
+            &(VkPipelineDynamicStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+               .dynamicStateCount = 2,
+               .pDynamicStates =
+                  (VkDynamicState[]){
+                     VK_DYNAMIC_STATE_VIEWPORT,
+                     VK_DYNAMIC_STATE_SCISSOR,
+                  },
+            },
+         .layout = layout,
+         .renderPass = device->meta_state.fast_clear_flush.pass,
+         .subpass = 0,
+      },
+      &(struct radv_graphics_pipeline_create_info){
+         .use_rectlist = true,
+         .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
+      },
+      &device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
+   if (result != VK_SUCCESS)
+      goto cleanup;
+
+   result = radv_graphics_pipeline_create(
+      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &(VkGraphicsPipelineCreateInfo){
+         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+         .stageCount = 2,
+         .pStages = stages,
+
+         .pVertexInputState = &vi_state,
+         .pInputAssemblyState = &ia_state,
+
+         .pViewportState =
+            &(VkPipelineViewportStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+               .viewportCount = 1,
+               .scissorCount = 1,
+            },
+         .pRasterizationState = &rs_state,
+         .pMultisampleState =
+            &(VkPipelineMultisampleStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+               .rasterizationSamples = 1,
+               .sampleShadingEnable = false,
+               .pSampleMask = NULL,
+               .alphaToCoverageEnable = false,
+               .alphaToOneEnable = false,
+            },
+         .pColorBlendState = &blend_state,
+         .pDynamicState =
+            &(VkPipelineDynamicStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+               .dynamicStateCount = 2,
+               .pDynamicStates =
+                  (VkDynamicState[]){
+                     VK_DYNAMIC_STATE_VIEWPORT,
+                     VK_DYNAMIC_STATE_SCISSOR,
+                  },
+            },
+         .layout = layout,
+         .renderPass = device->meta_state.fast_clear_flush.pass,
+         .subpass = 0,
+      },
+      &(struct radv_graphics_pipeline_create_info){
+         .use_rectlist = true,
+         .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
+      },
+      &device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
+   if (result != VK_SUCCESS)
+      goto cleanup;
+
+   result = radv_graphics_pipeline_create(
+      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &(VkGraphicsPipelineCreateInfo){
+         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+         .stageCount = 2,
+         .pStages = stages,
+
+         .pVertexInputState = &vi_state,
+         .pInputAssemblyState = &ia_state,
+
+         .pViewportState =
+            &(VkPipelineViewportStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+               .viewportCount = 1,
+               .scissorCount = 1,
+            },
+         .pRasterizationState = &rs_state,
+         .pMultisampleState =
+            &(VkPipelineMultisampleStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+               .rasterizationSamples = 1,
+               .sampleShadingEnable = false,
+               .pSampleMask = NULL,
+               .alphaToCoverageEnable = false,
+               .alphaToOneEnable = false,
+            },
+         .pColorBlendState = &blend_state,
+         .pDynamicState =
+            &(VkPipelineDynamicStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+               .dynamicStateCount = 2,
+               .pDynamicStates =
+                  (VkDynamicState[]){
+                     VK_DYNAMIC_STATE_VIEWPORT,
+                     VK_DYNAMIC_STATE_SCISSOR,
+                  },
+            },
+         .layout = layout,
+         .renderPass = device->meta_state.fast_clear_flush.pass,
+         .subpass = 0,
+      },
+      &(struct radv_graphics_pipeline_create_info){
+         .use_rectlist = true,
+         .custom_blend_mode = V_028808_CB_DCC_DECOMPRESS,
+      },
+      &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
+   if (result != VK_SUCCESS)
+      goto cleanup;
+
+   goto cleanup;
 
 cleanup:
-	ralloc_free(fs_module);
-	return result;
+   ralloc_free(fs_module);
+   return result;
 }
 
 void
 radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->fast_clear_flush.dcc_decompress_pipeline,
-			     &state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->fast_clear_flush.fmask_decompress_pipeline,
-			     &state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->fast_clear_flush.cmask_eliminate_pipeline,
-			     &state->alloc);
-	radv_DestroyRenderPass(radv_device_to_handle(device),
-			       state->fast_clear_flush.pass, &state->alloc);
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->fast_clear_flush.p_layout,
-				   &state->alloc);
-
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->fast_clear_flush.dcc_decompress_compute_pipeline,
-			     &state->alloc);
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->fast_clear_flush.dcc_decompress_compute_p_layout,
-				   &state->alloc);
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-	                                state->fast_clear_flush.dcc_decompress_compute_ds_layout,
-	                                &state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   radv_DestroyPipeline(radv_device_to_handle(device),
+                        state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device),
+                        state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device),
+                        state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);
+   radv_DestroyRenderPass(radv_device_to_handle(device), state->fast_clear_flush.pass,
+                          &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout,
+                              &state->alloc);
+
+   radv_DestroyPipeline(radv_device_to_handle(device),
+                        state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device),
+                              state->fast_clear_flush.dcc_decompress_compute_p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+                                   state->fast_clear_flush.dcc_decompress_compute_ds_layout,
+                                   &state->alloc);
 }
 
 static VkResult
 radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
 {
-	VkResult res = VK_SUCCESS;
-
-	mtx_lock(&device->meta_state.mtx);
-	if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
-	if (!vs_module) {
-		/* XXX: Need more accurate error */
-		res = VK_ERROR_OUT_OF_HOST_MEMORY;
-		goto fail;
-	}
-
-	res = create_pass(device);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	res = create_pipeline_layout(device,
-				     &device->meta_state.fast_clear_flush.p_layout);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
-	res = create_pipeline(device, vs_module_h,
-			      device->meta_state.fast_clear_flush.p_layout);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	res = create_dcc_compress_compute(device);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	goto cleanup;
+   VkResult res = VK_SUCCESS;
+
+   mtx_lock(&device->meta_state.mtx);
+   if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
+   if (!vs_module) {
+      /* XXX: Need more accurate error */
+      res = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto fail;
+   }
+
+   res = create_pass(device);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   res = create_pipeline_layout(device, &device->meta_state.fast_clear_flush.p_layout);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
+   res = create_pipeline(device, vs_module_h, device->meta_state.fast_clear_flush.p_layout);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   res = create_dcc_compress_compute(device);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   goto cleanup;
 
 fail:
-	radv_device_finish_meta_fast_clear_flush_state(device);
+   radv_device_finish_meta_fast_clear_flush_state(device);
 
 cleanup:
-	ralloc_free(vs_module);
-	mtx_unlock(&device->meta_state.mtx);
+   ralloc_free(vs_module);
+   mtx_unlock(&device->meta_state.mtx);
 
-	return res;
+   return res;
 }
 
-
 VkResult
 radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
 {
-	if (on_demand)
-		return VK_SUCCESS;
+   if (on_demand)
+      return VK_SUCCESS;
 
-	return radv_device_init_meta_fast_clear_flush_state_internal(device);
+   return radv_device_init_meta_fast_clear_flush_state_internal(device);
 }
 
 static void
 radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
-				      struct radv_image *image,
-				      uint64_t pred_offset, bool value)
+                                           struct radv_image *image, uint64_t pred_offset,
+                                           bool value)
 {
-	uint64_t va = 0;
+   uint64_t va = 0;
 
-	if (value) {
-		va = radv_buffer_get_va(image->bo) + image->offset;
-		va += pred_offset;
-	}
+   if (value) {
+      va = radv_buffer_get_va(image->bo) + image->offset;
+      va += pred_offset;
+   }
 
-	si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);
+   si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);
 }
 
 static void
-radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer,
-			       struct radv_image *image,
-			       const VkImageSubresourceRange *range,
-			       int level, int layer, bool flush_cb)
+radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                               const VkImageSubresourceRange *range, int level, int layer,
+                               bool flush_cb)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_image_view iview;
-	uint32_t width, height;
-
-	width = radv_minify(image->info.width, range->baseMipLevel + level);
-	height = radv_minify(image->info.height, range->baseMipLevel + level);
-
-	radv_image_view_init(&iview, device,
-			     &(VkImageViewCreateInfo) {
-				.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-				.image = radv_image_to_handle(image),
-				.viewType = radv_meta_get_view_type(image),
-				.format = image->vk_format,
-				.subresourceRange = {
-					.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-					.baseMipLevel = range->baseMipLevel + level,
-					.levelCount = 1,
-					.baseArrayLayer = range->baseArrayLayer + layer,
-					.layerCount = 1,
-				 },
-			      }, NULL);
-
-	VkFramebuffer fb_h;
-	radv_CreateFramebuffer(radv_device_to_handle(device),
-			       &(VkFramebufferCreateInfo) {
-					.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
-					.attachmentCount = 1,
-					.pAttachments = (VkImageView[]) {
-						radv_image_view_to_handle(&iview)
-					},
-					.width = width,
-					.height = height,
-					.layers = 1
-				}, &cmd_buffer->pool->alloc, &fb_h);
-
-	radv_cmd_buffer_begin_render_pass(cmd_buffer,
-					  &(VkRenderPassBeginInfo) {
-						.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-						.renderPass = device->meta_state.fast_clear_flush.pass,
-						.framebuffer = fb_h,
-						.renderArea = {
-							.offset = { 0, 0, },
-							.extent = { width, height, }
-						},
-						.clearValueCount = 0,
-						.pClearValues = NULL,
-					}, NULL);
-
-	radv_cmd_buffer_set_subpass(cmd_buffer,
-				    &cmd_buffer->state.pass->subpasses[0]);
-
-	if (flush_cb)
-		cmd_buffer->state.flush_bits |=
-			radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
-
-	radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
-
-	if (flush_cb)
-		cmd_buffer->state.flush_bits |=
-			radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
-
-	radv_cmd_buffer_end_render_pass(cmd_buffer);
-
-	radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h,
-				&cmd_buffer->pool->alloc);
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_image_view iview;
+   uint32_t width, height;
+
+   width = radv_minify(image->info.width, range->baseMipLevel + level);
+   height = radv_minify(image->info.height, range->baseMipLevel + level);
+
+   radv_image_view_init(&iview, device,
+                        &(VkImageViewCreateInfo){
+                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                           .image = radv_image_to_handle(image),
+                           .viewType = radv_meta_get_view_type(image),
+                           .format = image->vk_format,
+                           .subresourceRange =
+                              {
+                                 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                 .baseMipLevel = range->baseMipLevel + level,
+                                 .levelCount = 1,
+                                 .baseArrayLayer = range->baseArrayLayer + layer,
+                                 .layerCount = 1,
+                              },
+                        },
+                        NULL);
+
+   VkFramebuffer fb_h;
+   radv_CreateFramebuffer(
+      radv_device_to_handle(device),
+      &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+                                 .attachmentCount = 1,
+                                 .pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},
+                                 .width = width,
+                                 .height = height,
+                                 .layers = 1},
+      &cmd_buffer->pool->alloc, &fb_h);
+
+   radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                     &(VkRenderPassBeginInfo){
+                                        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                                        .renderPass = device->meta_state.fast_clear_flush.pass,
+                                        .framebuffer = fb_h,
+                                        .renderArea = {.offset =
+                                                          {
+                                                             0,
+                                                             0,
+                                                          },
+                                                       .extent =
+                                                          {
+                                                             width,
+                                                             height,
+                                                          }},
+                                        .clearValueCount = 0,
+                                        .pClearValues = NULL,
+                                     },
+                                     NULL);
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+   if (flush_cb)
+      cmd_buffer->state.flush_bits |=
+         radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
+
+   radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+   if (flush_cb)
+      cmd_buffer->state.flush_bits |=
+         radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
+
+   radv_cmd_buffer_end_render_pass(cmd_buffer);
+
+   radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
 }
 
 static void
-radv_process_color_image(struct radv_cmd_buffer *cmd_buffer,
-			 struct radv_image *image,
-			 const VkImageSubresourceRange *subresourceRange,
-			 bool decompress_dcc)
+radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                         const VkImageSubresourceRange *subresourceRange, bool decompress_dcc)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_meta_saved_state saved_state;
-	bool flush_cb = false;
-	VkPipeline *pipeline;
-
-	if (decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
-		pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
-	} else if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
-		pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
-	} else {
-		pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
-	}
-
-	if (!*pipeline) {
-		VkResult ret;
-
-		ret = radv_device_init_meta_fast_clear_flush_state_internal(device);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return;
-		}
-	}
-
-	if (pipeline ==	&device->meta_state.fast_clear_flush.dcc_decompress_pipeline ||
-	    pipeline == &device->meta_state.fast_clear_flush.fmask_decompress_pipeline) {
-		/* Flushing CB is required before and after DCC_DECOMPRESS or
-		 * FMASK_DECOMPRESS.
-		 */
-		flush_cb = true;
-	}
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE |
-		       RADV_META_SAVE_PASS);
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
-
-	for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
-		uint32_t width, height;
-
-		/* Do not decompress levels without DCC. */
-		if (decompress_dcc &&
-		    !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
-			continue;
-
-		width = radv_minify(image->info.width,
-				    subresourceRange->baseMipLevel + l);
-		height = radv_minify(image->info.height,
-				    subresourceRange->baseMipLevel + l);
-
-		radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
-				    &(VkViewport) {
-					.x = 0,
-					.y = 0,
-					.width = width,
-					.height = height,
-					.minDepth = 0.0f,
-					.maxDepth = 1.0f
-				    });
-
-		radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
-				   &(VkRect2D) {
-					.offset = { 0, 0 },
-					.extent = { width, height },
-				   });
-
-		for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
-			radv_process_color_image_layer(cmd_buffer, image,
-						       subresourceRange, l, s,
-						       flush_cb);
-		}
-	}
-
-	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-					RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_saved_state saved_state;
+   bool flush_cb = false;
+   VkPipeline *pipeline;
+
+   if (decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
+      pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
+   } else if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
+      pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
+   } else {
+      pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
+   }
+
+   if (!*pipeline) {
+      VkResult ret;
+
+      ret = radv_device_init_meta_fast_clear_flush_state_internal(device);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return;
+      }
+   }
+
+   if (pipeline == &device->meta_state.fast_clear_flush.dcc_decompress_pipeline ||
+       pipeline == &device->meta_state.fast_clear_flush.fmask_decompress_pipeline) {
+      /* Flushing CB is required before and after DCC_DECOMPRESS or
+       * FMASK_DECOMPRESS.
+       */
+      flush_cb = true;
+   }
+
+   radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS);
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+                        *pipeline);
+
+   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
+      uint32_t width, height;
+
+      /* Do not decompress levels without DCC. */
+      if (decompress_dcc && !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
+         continue;
+
+      width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
+      height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
+
+      radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                          &(VkViewport){.x = 0,
+                                        .y = 0,
+                                        .width = width,
+                                        .height = height,
+                                        .minDepth = 0.0f,
+                                        .maxDepth = 1.0f});
+
+      radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                         &(VkRect2D){
+                            .offset = {0, 0},
+                            .extent = {width, height},
+                         });
+
+      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
+         radv_process_color_image_layer(cmd_buffer, image, subresourceRange, l, s, flush_cb);
+      }
+   }
+
+   cmd_buffer->state.flush_bits |=
+      RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
 static void
-radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
-                           struct radv_image *image,
-                           const VkImageSubresourceRange *subresourceRange,
-                           bool decompress_dcc)
+radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                           const VkImageSubresourceRange *subresourceRange, bool decompress_dcc)
 {
-	bool use_predication = false;
-	bool old_predicating = false;
-
-	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-
-	if ((decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) ||
-	    (!(radv_image_has_fmask(image) && !image->tc_compatible_cmask) && image->fce_pred_offset)) {
-		use_predication = true;
-	}
-
-	/* If we are asked for DCC decompression without DCC predicates we cannot
-	 * use the FCE predicate. */
-	if (decompress_dcc && image->dcc_pred_offset == 0)
-		use_predication = false;
-
-	if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
-	    (image->info.array_size != radv_get_layerCount(image, subresourceRange) ||
-	    subresourceRange->baseArrayLayer != 0)) {
-		/* Only use predication if the image has DCC with mipmaps or
-		 * if the range of layers covers the whole image because the
-		 * predication is based on mip level.
-		 */
-		use_predication = false;
-	}
-
-	if (use_predication) {
-		uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
-							image->fce_pred_offset;
-		pred_offset += 8 * subresourceRange->baseMipLevel;
-
-		old_predicating = cmd_buffer->state.predicating;
-
-		radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
-		cmd_buffer->state.predicating = true;
-	}
-
-	radv_process_color_image(cmd_buffer, image, subresourceRange,
-				 decompress_dcc);
-
-	if (use_predication) {
-		uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
-							image->fce_pred_offset;
-		pred_offset += 8 * subresourceRange->baseMipLevel;
-
-		cmd_buffer->state.predicating = old_predicating;
-
-		radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
-
-		if (cmd_buffer->state.predication_type != -1) {
-			/* Restore previous conditional rendering user state. */
-			si_emit_set_predication_state(cmd_buffer,
-						      cmd_buffer->state.predication_type,
-						      cmd_buffer->state.predication_op,
-						      cmd_buffer->state.predication_va);
-		}
-	}
-
-	if (image->fce_pred_offset != 0) {
-		/* Clear the image's fast-clear eliminate predicate because
-		 * FMASK and DCC also imply a fast-clear eliminate.
-		 */
-		radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
-	}
-
-	if (radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
-		/* Mark the image as being decompressed. */
-		if (decompress_dcc)
-			radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
-	}
+   bool use_predication = false;
+   bool old_predicating = false;
+
+   assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
+
+   if ((decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) ||
+       (!(radv_image_has_fmask(image) && !image->tc_compatible_cmask) && image->fce_pred_offset)) {
+      use_predication = true;
+   }
+
+   /* If we are asked for DCC decompression without DCC predicates we cannot
+    * use the FCE predicate. */
+   if (decompress_dcc && image->dcc_pred_offset == 0)
+      use_predication = false;
+
+   if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
+       (image->info.array_size != radv_get_layerCount(image, subresourceRange) ||
+        subresourceRange->baseArrayLayer != 0)) {
+      /* Only use predication if the image has DCC with mipmaps or
+       * if the range of layers covers the whole image because the
+       * predication is based on mip level.
+       */
+      use_predication = false;
+   }
+
+   if (use_predication) {
+      uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset : image->fce_pred_offset;
+      pred_offset += 8 * subresourceRange->baseMipLevel;
+
+      old_predicating = cmd_buffer->state.predicating;
+
+      radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
+      cmd_buffer->state.predicating = true;
+   }
+
+   radv_process_color_image(cmd_buffer, image, subresourceRange, decompress_dcc);
+
+   if (use_predication) {
+      uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset : image->fce_pred_offset;
+      pred_offset += 8 * subresourceRange->baseMipLevel;
+
+      cmd_buffer->state.predicating = old_predicating;
+
+      radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
+
+      if (cmd_buffer->state.predication_type != -1) {
+         /* Restore previous conditional rendering user state. */
+         si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,
+                                       cmd_buffer->state.predication_op,
+                                       cmd_buffer->state.predication_va);
+      }
+   }
+
+   if (image->fce_pred_offset != 0) {
+      /* Clear the image's fast-clear eliminate predicate because
+       * FMASK and DCC also imply a fast-clear eliminate.
+       */
+      radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
+   }
+
+   if (radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
+      /* Mark the image as being decompressed. */
+      if (decompress_dcc)
+         radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
+   }
 }
 
 void
-radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
-                                    struct radv_image *image,
+radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
                                     const VkImageSubresourceRange *subresourceRange)
 {
-	struct radv_barrier_data barrier = {0};
+   struct radv_barrier_data barrier = {0};
 
-	if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
-		barrier.layout_transitions.fmask_decompress = 1;
-	} else {
-		barrier.layout_transitions.fast_clear_eliminate = 1;
-	}
-	radv_describe_layout_transition(cmd_buffer, &barrier);
+   if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
+      barrier.layout_transitions.fmask_decompress = 1;
+   } else {
+      barrier.layout_transitions.fast_clear_eliminate = 1;
+   }
+   radv_describe_layout_transition(cmd_buffer, &barrier);
 
-	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-	radv_emit_color_decompress(cmd_buffer, image, subresourceRange, false);
+   assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
+   radv_emit_color_decompress(cmd_buffer, image, subresourceRange, false);
 }
 
 static void
-radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer,
-                        struct radv_image *image,
+radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
                         const VkImageSubresourceRange *subresourceRange)
 {
-	radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
+   radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
 }
 
 static void
-radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
-                            struct radv_image *image,
+radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
                             const VkImageSubresourceRange *subresourceRange)
 {
-	struct radv_meta_saved_state saved_state;
-	struct radv_image_view load_iview = {0};
-	struct radv_image_view store_iview = {0};
-	struct radv_device *device = cmd_buffer->device;
-
-	cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT,
-	                                                      image);
-
-	if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
-		VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return;
-		}
-	}
-
-	radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS |
-	                                         RADV_META_SAVE_COMPUTE_PIPELINE);
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-	                     VK_PIPELINE_BIND_POINT_COMPUTE,
-	                     device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
-
-	for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
-		uint32_t width, height;
-
-		/* Do not decompress levels without DCC. */
-		if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
-			continue;
-
-		width = radv_minify(image->info.width,
-				    subresourceRange->baseMipLevel + l);
-		height = radv_minify(image->info.height,
-				    subresourceRange->baseMipLevel + l);
-
-		for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
-			radv_image_view_init(&load_iview, cmd_buffer->device,
-					     &(VkImageViewCreateInfo) {
-						     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-							     .image = radv_image_to_handle(image),
-							     .viewType = VK_IMAGE_VIEW_TYPE_2D,
-							     .format = image->vk_format,
-							     .subresourceRange = {
-								.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-								.baseMipLevel = subresourceRange->baseMipLevel + l,
-								.levelCount = 1,
-								.baseArrayLayer = subresourceRange->baseArrayLayer + s,
-								.layerCount = 1
-							     },
-					     }, NULL);
-			radv_image_view_init(&store_iview, cmd_buffer->device,
-					     &(VkImageViewCreateInfo) {
-						     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-							     .image = radv_image_to_handle(image),
-							     .viewType = VK_IMAGE_VIEW_TYPE_2D,
-							     .format = image->vk_format,
-							     .subresourceRange = {
-								.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-								.baseMipLevel = subresourceRange->baseMipLevel + l,
-								.levelCount = 1,
-								.baseArrayLayer = subresourceRange->baseArrayLayer + s,
-								.layerCount = 1
-							     },
-					     }, &(struct radv_image_view_extra_create_info) {
-						     .disable_compression = true
-					     });
-
-			radv_meta_push_descriptor_set(cmd_buffer,
-						      VK_PIPELINE_BIND_POINT_COMPUTE,
-						      device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
-						      0, /* set */
-						      2, /* descriptorWriteCount */
-						      (VkWriteDescriptorSet[]) {
-					              {
-						                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-						                       .dstBinding = 0,
-						                       .dstArrayElement = 0,
-						                       .descriptorCount = 1,
-						                       .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-						                       .pImageInfo = (VkDescriptorImageInfo[]) {
-						                               {
-						                                       .sampler = VK_NULL_HANDLE,
-						                                       .imageView = radv_image_view_to_handle(&load_iview),
-						                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-						                               },
-						                       }
-						              },
-						              {
-						                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-						                       .dstBinding = 1,
-						                       .dstArrayElement = 0,
-						                       .descriptorCount = 1,
-						                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-						                       .pImageInfo = (VkDescriptorImageInfo[]) {
-						                               {
-						                                       .sampler = VK_NULL_HANDLE,
-						                                       .imageView = radv_image_view_to_handle(&store_iview),
-						                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-						                               },
-						                       }
-						              }
-						      });
-
-			radv_unaligned_dispatch(cmd_buffer, width, height, 1);
-		}
-	}
-
-	/* Mark this image as actually being decompressed. */
-	radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
-
-	radv_meta_restore(&saved_state, cmd_buffer);
-
-	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-					RADV_CMD_FLAG_INV_VCACHE |
-			radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
-
-	/* Initialize the DCC metadata as "fully expanded". */
-	cmd_buffer->state.flush_bits |=
-		radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
+   struct radv_meta_saved_state saved_state;
+   struct radv_image_view load_iview = {0};
+   struct radv_image_view store_iview = {0};
+   struct radv_device *device = cmd_buffer->device;
+
+   cmd_buffer->state.flush_bits |=
+      radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+
+   if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
+      VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return;
+      }
+   }
+
+   radv_meta_save(&saved_state, cmd_buffer,
+                  RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+
+   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
+      uint32_t width, height;
+
+      /* Do not decompress levels without DCC. */
+      if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
+         continue;
+
+      width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
+      height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
+
+      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
+         radv_image_view_init(
+            &load_iview, cmd_buffer->device,
+            &(VkImageViewCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+               .image = radv_image_to_handle(image),
+               .viewType = VK_IMAGE_VIEW_TYPE_2D,
+               .format = image->vk_format,
+               .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                    .baseMipLevel = subresourceRange->baseMipLevel + l,
+                                    .levelCount = 1,
+                                    .baseArrayLayer = subresourceRange->baseArrayLayer + s,
+                                    .layerCount = 1},
+            },
+            NULL);
+         radv_image_view_init(
+            &store_iview, cmd_buffer->device,
+            &(VkImageViewCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+               .image = radv_image_to_handle(image),
+               .viewType = VK_IMAGE_VIEW_TYPE_2D,
+               .format = image->vk_format,
+               .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                    .baseMipLevel = subresourceRange->baseMipLevel + l,
+                                    .levelCount = 1,
+                                    .baseArrayLayer = subresourceRange->baseArrayLayer + s,
+                                    .layerCount = 1},
+            },
+            &(struct radv_image_view_extra_create_info){.disable_compression = true});
+
+         radv_meta_push_descriptor_set(
+            cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+            device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */
+            2, /* descriptorWriteCount */
+            (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                      .dstBinding = 0,
+                                      .dstArrayElement = 0,
+                                      .descriptorCount = 1,
+                                      .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                      .pImageInfo =
+                                         (VkDescriptorImageInfo[]){
+                                            {
+                                               .sampler = VK_NULL_HANDLE,
+                                               .imageView = radv_image_view_to_handle(&load_iview),
+                                               .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                            },
+                                         }},
+                                     {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                      .dstBinding = 1,
+                                      .dstArrayElement = 0,
+                                      .descriptorCount = 1,
+                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                      .pImageInfo = (VkDescriptorImageInfo[]){
+                                         {
+                                            .sampler = VK_NULL_HANDLE,
+                                            .imageView = radv_image_view_to_handle(&store_iview),
+                                            .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                         },
+                                      }}});
+
+         radv_unaligned_dispatch(cmd_buffer, width, height, 1);
+      }
+   }
+
+   /* Mark this image as actually being decompressed. */
+   radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
+
+   cmd_buffer->state.flush_bits |=
+      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+      radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+
+   /* Initialize the DCC metadata as "fully expanded". */
+   cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
 }
 
 void
-radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
-                    struct radv_image *image,
+radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
                     const VkImageSubresourceRange *subresourceRange)
 {
-	struct radv_barrier_data barrier = {0};
+   struct radv_barrier_data barrier = {0};
 
-	barrier.layout_transitions.dcc_decompress = 1;
-	radv_describe_layout_transition(cmd_buffer, &barrier);
+   barrier.layout_transitions.dcc_decompress = 1;
+   radv_describe_layout_transition(cmd_buffer, &barrier);
 
-	if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
-		radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);
-	else
-		radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
+   if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
+      radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);
+   else
+      radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
 }
diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c
index 4b7b685d6b0..dc2327162e1 100644
--- a/src/amd/vulkan/radv_meta_fmask_expand.c
+++ b/src/amd/vulkan/radv_meta_fmask_expand.c
@@ -29,279 +29,249 @@
 static nir_shader *
 build_fmask_expand_compute_shader(struct radv_device *device, int samples)
 {
-	const struct glsl_type *type =
-		glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true,
-				  GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type =
-		glsl_image_type(GLSL_SAMPLER_DIM_MS, true,
-				  GLSL_TYPE_FLOAT);
-
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_fmask_expand_cs-%d", samples);
-	b.shader->info.cs.local_size[0] = 8;
-	b.shader->info.cs.local_size[1] = 8;
-	b.shader->info.cs.local_size[2] = 1;
-
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      type, "s_tex");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						       img_type, "out_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 1;
-	output_img->data.access = ACCESS_NON_READABLE;
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-	nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
-
-	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-	nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
-
-	nir_ssa_def *tex_coord = nir_vec3(&b, nir_channel(&b, global_id, 0),
-					      nir_channel(&b, global_id, 1),
-					      layer_id);
-
-	nir_tex_instr *tex_instr[8];
-	for (uint32_t i = 0; i < samples; i++) {
-		tex_instr[i] = nir_tex_instr_create(b.shader, 3);
-
-		nir_tex_instr *tex = tex_instr[i];
-		tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
-		tex->op = nir_texop_txf_ms;
-		tex->src[0].src_type = nir_tex_src_coord;
-		tex->src[0].src = nir_src_for_ssa(tex_coord);
-		tex->src[1].src_type = nir_tex_src_ms_index;
-		tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
-		tex->src[2].src_type = nir_tex_src_texture_deref;
-		tex->src[2].src = nir_src_for_ssa(input_img_deref);
-		tex->dest_type = nir_type_float32;
-		tex->is_array = true;
-		tex->coord_components = 3;
-
-		nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-		nir_builder_instr_insert(&b, &tex->instr);
-	}
-
-	nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, tex_coord, 0),
-					  nir_channel(&b, tex_coord, 1),
-					  nir_channel(&b, tex_coord, 2),
-					  nir_imm_int(&b, 0));
-
-	for (uint32_t i = 0; i < samples; i++) {
-		nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
-
-		nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i),
-		                      outval, nir_imm_int(&b, 0));
-	}
-
-	return b.shader;
+   const struct glsl_type *type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, true, GLSL_TYPE_FLOAT);
+
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_fmask_expand_cs-%d", samples);
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
+   b.shader->info.cs.local_size[2] = 1;
+
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 1;
+   output_img->data.access = ACCESS_NON_READABLE;
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+   nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
+
+   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+   nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
+
+   nir_ssa_def *tex_coord =
+      nir_vec3(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), layer_id);
+
+   nir_tex_instr *tex_instr[8];
+   for (uint32_t i = 0; i < samples; i++) {
+      tex_instr[i] = nir_tex_instr_create(b.shader, 3);
+
+      nir_tex_instr *tex = tex_instr[i];
+      tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+      tex->op = nir_texop_txf_ms;
+      tex->src[0].src_type = nir_tex_src_coord;
+      tex->src[0].src = nir_src_for_ssa(tex_coord);
+      tex->src[1].src_type = nir_tex_src_ms_index;
+      tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+      tex->src[2].src_type = nir_tex_src_texture_deref;
+      tex->src[2].src = nir_src_for_ssa(input_img_deref);
+      tex->dest_type = nir_type_float32;
+      tex->is_array = true;
+      tex->coord_components = 3;
+
+      nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+      nir_builder_instr_insert(&b, &tex->instr);
+   }
+
+   nir_ssa_def *img_coord =
+      nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1),
+               nir_channel(&b, tex_coord, 2), nir_imm_int(&b, 0));
+
+   for (uint32_t i = 0; i < samples; i++) {
+      nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
+
+      nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i), outval,
+                            nir_imm_int(&b, 0));
+   }
+
+   return b.shader;
 }
 
 void
-radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
-				struct radv_image *image,
-				const VkImageSubresourceRange *subresourceRange)
+radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                                const VkImageSubresourceRange *subresourceRange)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_meta_saved_state saved_state;
-	const uint32_t samples = image->info.samples;
-	const uint32_t samples_log2 = ffs(samples) - 1;
-	unsigned layer_count = radv_get_layerCount(image, subresourceRange);
-	struct radv_image_view iview;
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_COMPUTE_PIPELINE |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2];
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
-	cmd_buffer->state.flush_bits |=
-		radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT |
-						  VK_ACCESS_SHADER_WRITE_BIT, image);
-
-	radv_image_view_init(&iview, device,
-			     &(VkImageViewCreateInfo) {
-				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-				     .image = radv_image_to_handle(image),
-				     .viewType = radv_meta_get_view_type(image),
-				     .format = vk_format_no_srgb(image->vk_format),
-				     .subresourceRange = {
-					     .aspectMask = subresourceRange->aspectMask,
-					     .baseMipLevel = 0,
-					     .levelCount = 1,
-					     .baseArrayLayer = subresourceRange->baseArrayLayer,
-					     .layerCount = layer_count,
-				     },
-			     }, NULL);
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      cmd_buffer->device->meta_state.fmask_expand.p_layout,
-				      0, /* set */
-				      2, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				      {
-					      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-					      .dstBinding = 0,
-					      .dstArrayElement = 0,
-					      .descriptorCount = 1,
-					      .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-					      .pImageInfo = (VkDescriptorImageInfo[]) {
-						      {
-							      .sampler = VK_NULL_HANDLE,
-							      .imageView = radv_image_view_to_handle(&iview),
-							      .imageLayout = VK_IMAGE_LAYOUT_GENERAL
-						      },
-					      }
-				      },
-				      {
-					      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-					      .dstBinding = 1,
-					      .dstArrayElement = 0,
-					      .descriptorCount = 1,
-					      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-					      .pImageInfo = (VkDescriptorImageInfo[]) {
-						      {
-							      .sampler = VK_NULL_HANDLE,
-							      .imageView = radv_image_view_to_handle(&iview),
-							      .imageLayout = VK_IMAGE_LAYOUT_GENERAL
-						      },
-					      }
-				      }
-				      });
-
-	radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, layer_count);
-
-	radv_meta_restore(&saved_state, cmd_buffer);
-
-	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-					radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
-
-	/* Re-initialize FMASK in fully expanded mode. */
-	cmd_buffer->state.flush_bits |=
-		radv_init_fmask(cmd_buffer, image, subresourceRange);
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_saved_state saved_state;
+   const uint32_t samples = image->info.samples;
+   const uint32_t samples_log2 = ffs(samples) - 1;
+   unsigned layer_count = radv_get_layerCount(image, subresourceRange);
+   struct radv_image_view iview;
+
+   radv_meta_save(&saved_state, cmd_buffer,
+                  RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
+
+   VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2];
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        pipeline);
+
+   cmd_buffer->state.flush_bits |= radv_dst_access_flush(
+      cmd_buffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, image);
+
+   radv_image_view_init(&iview, device,
+                        &(VkImageViewCreateInfo){
+                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                           .image = radv_image_to_handle(image),
+                           .viewType = radv_meta_get_view_type(image),
+                           .format = vk_format_no_srgb(image->vk_format),
+                           .subresourceRange =
+                              {
+                                 .aspectMask = subresourceRange->aspectMask,
+                                 .baseMipLevel = 0,
+                                 .levelCount = 1,
+                                 .baseArrayLayer = subresourceRange->baseArrayLayer,
+                                 .layerCount = layer_count,
+                              },
+                        },
+                        NULL);
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+      cmd_buffer->device->meta_state.fmask_expand.p_layout, 0, /* set */
+      2,                                                       /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 0,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                .pImageInfo =
+                                   (VkDescriptorImageInfo[]){
+                                      {.sampler = VK_NULL_HANDLE,
+                                       .imageView = radv_image_view_to_handle(&iview),
+                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+                                   }},
+                               {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 1,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                .pImageInfo = (VkDescriptorImageInfo[]){
+                                   {.sampler = VK_NULL_HANDLE,
+                                    .imageView = radv_image_view_to_handle(&iview),
+                                    .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+                                }}});
+
+   radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, layer_count);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
+
+   cmd_buffer->state.flush_bits |=
+      RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+      radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+
+   /* Re-initialize FMASK in fully expanded mode. */
+   cmd_buffer->state.flush_bits |= radv_init_fmask(cmd_buffer, image, subresourceRange);
 }
 
-void radv_device_finish_meta_fmask_expand_state(struct radv_device *device)
+void
+radv_device_finish_meta_fmask_expand_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->fmask_expand.pipeline[i],
-				     &state->alloc);
-	}
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->fmask_expand.p_layout,
-				   &state->alloc);
-
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-					state->fmask_expand.ds_layout,
-					&state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+
+   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+      radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_expand.pipeline[i],
+                           &state->alloc);
+   }
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_expand.p_layout,
+                              &state->alloc);
+
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->fmask_expand.ds_layout,
+                                   &state->alloc);
 }
 
 static VkResult
-create_fmask_expand_pipeline(struct radv_device *device,
-			     int samples,
-			     VkPipeline *pipeline)
+create_fmask_expand_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
 {
-	struct radv_meta_state *state = &device->meta_state;
-	VkResult result;
-	nir_shader *cs = build_fmask_expand_compute_shader(device, samples);;
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = state->fmask_expand.p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&state->cache),
-					     1, &vk_pipeline_info, NULL,
-					     pipeline);
-
-	ralloc_free(cs);
-	return result;
+   struct radv_meta_state *state = &device->meta_state;
+   VkResult result;
+   nir_shader *cs = build_fmask_expand_compute_shader(device, samples);
+   ;
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = state->fmask_expand.p_layout,
+   };
+
+   result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                        radv_pipeline_cache_to_handle(&state->cache), 1,
+                                        &vk_pipeline_info, NULL, pipeline);
+
+   ralloc_free(cs);
+   return result;
 }
 
 VkResult
 radv_device_init_meta_fmask_expand_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-	VkResult result;
-
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 2,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info, &state->alloc,
-						&state->fmask_expand.ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineLayoutCreateInfo color_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &state->fmask_expand.ds_layout,
-		.pushConstantRangeCount = 0,
-		.pPushConstantRanges = NULL,
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					   &color_create_info, &state->alloc,
-					   &state->fmask_expand.p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
-		uint32_t samples = 1 << i;
-		result = create_fmask_expand_pipeline(device, samples,
-						      &state->fmask_expand.pipeline[i]);
-		if (result != VK_SUCCESS)
-			goto fail;
-	}
-
-	return VK_SUCCESS;
+   struct radv_meta_state *state = &device->meta_state;
+   VkResult result;
+
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &state->alloc, &state->fmask_expand.ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo color_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &state->fmask_expand.ds_layout,
+      .pushConstantRangeCount = 0,
+      .pPushConstantRanges = NULL,
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &color_create_info,
+                                      &state->alloc, &state->fmask_expand.p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
+      uint32_t samples = 1 << i;
+      result = create_fmask_expand_pipeline(device, samples, &state->fmask_expand.pipeline[i]);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   return VK_SUCCESS;
 fail:
-	radv_device_finish_meta_fmask_expand_state(device);
-	return result;
+   radv_device_finish_meta_fmask_expand_state(device);
+   return result;
 }
diff --git a/src/amd/vulkan/radv_meta_resolve.c b/src/amd/vulkan/radv_meta_resolve.c
index 301725d19da..3a23f392249 100644
--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -24,751 +24,708 @@
 #include <assert.h>
 #include <stdbool.h>
 
+#include "nir/nir_builder.h"
 #include "radv_meta.h"
 #include "radv_private.h"
-#include "vk_format.h"
-#include "nir/nir_builder.h"
 #include "sid.h"
+#include "vk_format.h"
 
 /* emit 0, 0, 0, 1 */
 static nir_shader *
 build_nir_fs(void)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	nir_variable *f_color; /* vec4, fragment output color */
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   nir_variable *f_color; /* vec4, fragment output color */
 
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs");
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs");
 
-	f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
-				      "f_color");
-	f_color->data.location = FRAG_RESULT_DATA0;
-	nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);
+   f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+   f_color->data.location = FRAG_RESULT_DATA0;
+   nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);
 
-	return b.shader;
+   return b.shader;
 }
 
 static VkResult
 create_pass(struct radv_device *device, VkFormat vk_format, VkRenderPass *pass)
 {
-	VkResult result;
-	VkDevice device_h = radv_device_to_handle(device);
-	const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
-	VkAttachmentDescription2 attachments[2];
-	int i;
-
-	for (i = 0; i < 2; i++) {
-		attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
-		attachments[i].format = vk_format;
-		attachments[i].samples = 1;
-		attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-		attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
-	}
-	attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-	attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-	attachments[1].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-	attachments[1].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-
-	result = radv_CreateRenderPass2(device_h,
-				       &(VkRenderPassCreateInfo2) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-						       .attachmentCount = 2,
-						       .pAttachments = attachments,
-						       .subpassCount = 1,
-								.pSubpasses = &(VkSubpassDescription2) {
-						       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 2,
-						       .pColorAttachments = (VkAttachmentReference2[]) {
-							       {
-								       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-								       .attachment = 0,
-								       .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
-							       },
-							       {
-								       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-								       .attachment = 1,
-								       .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
-							       },
-						       },
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference2) {
-							       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-							       .attachment = VK_ATTACHMENT_UNUSED,
-						       },
-						       .preserveAttachmentCount = 0,
-						       .pPreserveAttachments = NULL,
-					       },
-							.dependencyCount = 2,
-							.pDependencies = (VkSubpassDependency2[]) {
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = VK_SUBPASS_EXTERNAL,
-									.dstSubpass = 0,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								},
-								{
-									.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-									.srcSubpass = 0,
-									.dstSubpass = VK_SUBPASS_EXTERNAL,
-									.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-									.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-									.srcAccessMask = 0,
-									.dstAccessMask = 0,
-									.dependencyFlags = 0
-								}
-							},
-									 },
-				       alloc,
-				       pass);
-
-	return result;
+   VkResult result;
+   VkDevice device_h = radv_device_to_handle(device);
+   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+   VkAttachmentDescription2 attachments[2];
+   int i;
+
+   for (i = 0; i < 2; i++) {
+      attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
+      attachments[i].format = vk_format;
+      attachments[i].samples = 1;
+      attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+      attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+   }
+   attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+   attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+   attachments[1].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+   attachments[1].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+
+   result = radv_CreateRenderPass2(
+      device_h,
+      &(VkRenderPassCreateInfo2){
+         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+         .attachmentCount = 2,
+         .pAttachments = attachments,
+         .subpassCount = 1,
+         .pSubpasses =
+            &(VkSubpassDescription2){
+               .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+               .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+               .inputAttachmentCount = 0,
+               .colorAttachmentCount = 2,
+               .pColorAttachments =
+                  (VkAttachmentReference2[]){
+                     {
+                        .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                        .attachment = 0,
+                        .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+                     },
+                     {
+                        .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                        .attachment = 1,
+                        .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+                     },
+                  },
+               .pResolveAttachments = NULL,
+               .pDepthStencilAttachment =
+                  &(VkAttachmentReference2){
+                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                     .attachment = VK_ATTACHMENT_UNUSED,
+                  },
+               .preserveAttachmentCount = 0,
+               .pPreserveAttachments = NULL,
+            },
+         .dependencyCount = 2,
+         .pDependencies =
+            (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                      .dstSubpass = 0,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0},
+                                     {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                      .srcSubpass = 0,
+                                      .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                      .srcAccessMask = 0,
+                                      .dstAccessMask = 0,
+                                      .dependencyFlags = 0}},
+      },
+      alloc, pass);
+
+   return result;
 }
 
 static VkResult
-create_pipeline(struct radv_device *device,
-		 VkShaderModule vs_module_h,
-		 VkPipeline *pipeline,
-		 VkRenderPass pass)
+create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipeline *pipeline,
+                VkRenderPass pass)
 {
-	VkResult result;
-	VkDevice device_h = radv_device_to_handle(device);
-
-	nir_shader *fs_module = build_nir_fs();
-	if (!fs_module) {
-		/* XXX: Need more accurate error */
-		result = VK_ERROR_OUT_OF_HOST_MEMORY;
-		goto cleanup;
-	}
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 0,
-		.pSetLayouts = NULL,
-		.pushConstantRangeCount = 0,
-		.pPushConstantRanges = NULL,
-	};
-
-	if (!device->meta_state.resolve.p_layout) {
-		result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-						   &pl_create_info,
-						   &device->meta_state.alloc,
-						   &device->meta_state.resolve.p_layout);
-		if (result != VK_SUCCESS)
-			goto cleanup;
-	}
-
-	result = radv_graphics_pipeline_create(device_h,
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &(VkGraphicsPipelineCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-						       .stageCount = 2,
-						       .pStages = (VkPipelineShaderStageCreateInfo[]) {
-						       {
-							       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-							       .stage = VK_SHADER_STAGE_VERTEX_BIT,
-							       .module = vs_module_h,
-							       .pName = "main",
-						       },
-						       {
-							       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-							       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-							       .module = vk_shader_module_handle_from_nir(fs_module),
-							       .pName = "main",
-						       },
-					       },
-					       .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-						       .vertexBindingDescriptionCount = 0,
-						       .vertexAttributeDescriptionCount = 0,
-					       },
-					       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-						       .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-						       .primitiveRestartEnable = false,
-					       },
-					       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-						       .viewportCount = 1,
-						       .scissorCount = 1,
-					       },
-					       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-						       .depthClampEnable = false,
-						       .rasterizerDiscardEnable = false,
-						       .polygonMode = VK_POLYGON_MODE_FILL,
-						       .cullMode = VK_CULL_MODE_NONE,
-						       .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
-					       },
-					       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-						       .rasterizationSamples = 1,
-						       .sampleShadingEnable = false,
-						       .pSampleMask = NULL,
-						       .alphaToCoverageEnable = false,
-						       .alphaToOneEnable = false,
-					       },
-					       .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-						       .logicOpEnable = false,
-						       .attachmentCount = 2,
-						       .pAttachments = (VkPipelineColorBlendAttachmentState []) {
-							       {
-							       .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
-									       VK_COLOR_COMPONENT_G_BIT |
-									       VK_COLOR_COMPONENT_B_BIT |
-									       VK_COLOR_COMPONENT_A_BIT,
-							       },
-							       {
-							       .colorWriteMask = 0,
-
-							       }
-						       },
-						},
-						.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-							.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-							.dynamicStateCount = 2,
-							.pDynamicStates = (VkDynamicState[]) {
-								VK_DYNAMIC_STATE_VIEWPORT,
-								VK_DYNAMIC_STATE_SCISSOR,
-							},
-						},
-						.layout = device->meta_state.resolve.p_layout,
-						.renderPass = pass,
-																       .subpass = 0,
-																       },
-					       &(struct radv_graphics_pipeline_create_info) {
-						       .use_rectlist = true,
-						       .custom_blend_mode = V_028808_CB_RESOLVE,
-							       },
-					       &device->meta_state.alloc, pipeline);
-	if (result != VK_SUCCESS)
-		goto cleanup;
-
-	goto cleanup;
+   VkResult result;
+   VkDevice device_h = radv_device_to_handle(device);
+
+   nir_shader *fs_module = build_nir_fs();
+   if (!fs_module) {
+      /* XXX: Need more accurate error */
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto cleanup;
+   }
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 0,
+      .pSetLayouts = NULL,
+      .pushConstantRangeCount = 0,
+      .pPushConstantRanges = NULL,
+   };
+
+   if (!device->meta_state.resolve.p_layout) {
+      result =
+         radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                   &device->meta_state.alloc, &device->meta_state.resolve.p_layout);
+      if (result != VK_SUCCESS)
+         goto cleanup;
+   }
+
+   result = radv_graphics_pipeline_create(
+      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &(VkGraphicsPipelineCreateInfo){
+         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+         .stageCount = 2,
+         .pStages =
+            (VkPipelineShaderStageCreateInfo[]){
+               {
+                  .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                  .stage = VK_SHADER_STAGE_VERTEX_BIT,
+                  .module = vs_module_h,
+                  .pName = "main",
+               },
+               {
+                  .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                  .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+                  .module = vk_shader_module_handle_from_nir(fs_module),
+                  .pName = "main",
+               },
+            },
+         .pVertexInputState =
+            &(VkPipelineVertexInputStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+               .vertexBindingDescriptionCount = 0,
+               .vertexAttributeDescriptionCount = 0,
+            },
+         .pInputAssemblyState =
+            &(VkPipelineInputAssemblyStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+               .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+               .primitiveRestartEnable = false,
+            },
+         .pViewportState =
+            &(VkPipelineViewportStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+               .viewportCount = 1,
+               .scissorCount = 1,
+            },
+         .pRasterizationState =
+            &(VkPipelineRasterizationStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+               .depthClampEnable = false,
+               .rasterizerDiscardEnable = false,
+               .polygonMode = VK_POLYGON_MODE_FILL,
+               .cullMode = VK_CULL_MODE_NONE,
+               .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+            },
+         .pMultisampleState =
+            &(VkPipelineMultisampleStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+               .rasterizationSamples = 1,
+               .sampleShadingEnable = false,
+               .pSampleMask = NULL,
+               .alphaToCoverageEnable = false,
+               .alphaToOneEnable = false,
+            },
+         .pColorBlendState =
+            &(VkPipelineColorBlendStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+               .logicOpEnable = false,
+               .attachmentCount = 2,
+               .pAttachments =
+                  (VkPipelineColorBlendAttachmentState[]){
+                     {
+                        .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+                                          VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+                     },
+                     {
+                        .colorWriteMask = 0,
+
+                     }},
+            },
+         .pDynamicState =
+            &(VkPipelineDynamicStateCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+               .dynamicStateCount = 2,
+               .pDynamicStates =
+                  (VkDynamicState[]){
+                     VK_DYNAMIC_STATE_VIEWPORT,
+                     VK_DYNAMIC_STATE_SCISSOR,
+                  },
+            },
+         .layout = device->meta_state.resolve.p_layout,
+         .renderPass = pass,
+         .subpass = 0,
+      },
+      &(struct radv_graphics_pipeline_create_info){
+         .use_rectlist = true,
+         .custom_blend_mode = V_028808_CB_RESOLVE,
+      },
+      &device->meta_state.alloc, pipeline);
+   if (result != VK_SUCCESS)
+      goto cleanup;
+
+   goto cleanup;
 
 cleanup:
-	ralloc_free(fs_module);
-	return result;
+   ralloc_free(fs_module);
+   return result;
 }
 
 void
 radv_device_finish_meta_resolve_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-
-	for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) {
-		radv_DestroyRenderPass(radv_device_to_handle(device),
-				       state->resolve.pass[j], &state->alloc);
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve.pipeline[j], &state->alloc);
-	}
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->resolve.p_layout, &state->alloc);
-
+   struct radv_meta_state *state = &device->meta_state;
+
+   for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) {
+      radv_DestroyRenderPass(radv_device_to_handle(device), state->resolve.pass[j], &state->alloc);
+      radv_DestroyPipeline(radv_device_to_handle(device), state->resolve.pipeline[j],
+                           &state->alloc);
+   }
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve.p_layout,
+                              &state->alloc);
 }
 
 VkResult
 radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand)
 {
-	if (on_demand)
-		return VK_SUCCESS;
-
-	VkResult res = VK_SUCCESS;
-	struct radv_meta_state *state = &device->meta_state;
-	nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
-	if (!vs_module) {
-		/* XXX: Need more accurate error */
-		res = VK_ERROR_OUT_OF_HOST_MEMORY;
-		goto fail;
-	}
-
-	for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {
-		VkFormat format = radv_fs_key_format_exemplars[i];
-		unsigned fs_key = radv_format_meta_fs_key(device, format);
-		res = create_pass(device, format, &state->resolve.pass[fs_key]);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
-		res = create_pipeline(device, vs_module_h,
-				      &state->resolve.pipeline[fs_key], state->resolve.pass[fs_key]);
-		if (res != VK_SUCCESS)
-			goto fail;
-	}
-
-	goto cleanup;
+   if (on_demand)
+      return VK_SUCCESS;
+
+   VkResult res = VK_SUCCESS;
+   struct radv_meta_state *state = &device->meta_state;
+   nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
+   if (!vs_module) {
+      /* XXX: Need more accurate error */
+      res = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto fail;
+   }
+
+   for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {
+      VkFormat format = radv_fs_key_format_exemplars[i];
+      unsigned fs_key = radv_format_meta_fs_key(device, format);
+      res = create_pass(device, format, &state->resolve.pass[fs_key]);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
+      res = create_pipeline(device, vs_module_h, &state->resolve.pipeline[fs_key],
+                            state->resolve.pass[fs_key]);
+      if (res != VK_SUCCESS)
+         goto fail;
+   }
+
+   goto cleanup;
 
 fail:
-	radv_device_finish_meta_resolve_state(device);
+   radv_device_finish_meta_resolve_state(device);
 
 cleanup:
-	ralloc_free(vs_module);
+   ralloc_free(vs_module);
 
-	return res;
+   return res;
 }
 
 static void
-emit_resolve(struct radv_cmd_buffer *cmd_buffer,
-             const struct radv_image *src_image,
-	     const struct radv_image *dst_image,
-	     VkFormat vk_format,
-             const VkOffset2D *dest_offset,
+emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
+             const struct radv_image *dst_image, VkFormat vk_format, const VkOffset2D *dest_offset,
              const VkExtent2D *resolve_extent)
 {
-	struct radv_device *device = cmd_buffer->device;
-	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-	unsigned fs_key = radv_format_meta_fs_key(device, vk_format);
-
-	cmd_buffer->state.flush_bits |=
-		radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, src_image) |
-		radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, src_image) |
-		radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
-
-	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-			     device->meta_state.resolve.pipeline[fs_key]);
-
-	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-		.x = dest_offset->x,
-		.y = dest_offset->y,
-		.width = resolve_extent->width,
-		.height = resolve_extent->height,
-		.minDepth = 0.0f,
-		.maxDepth = 1.0f
-	});
-
-	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-		.offset = *dest_offset,
-		.extent = *resolve_extent,
-	});
-
-	radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
-	cmd_buffer->state.flush_bits |=
-		radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
+   struct radv_device *device = cmd_buffer->device;
+   VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+   unsigned fs_key = radv_format_meta_fs_key(device, vk_format);
+
+   cmd_buffer->state.flush_bits |=
+      radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, src_image) |
+      radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, src_image) |
+      radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
+
+   radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                        device->meta_state.resolve.pipeline[fs_key]);
+
+   radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                       &(VkViewport){.x = dest_offset->x,
+                                     .y = dest_offset->y,
+                                     .width = resolve_extent->width,
+                                     .height = resolve_extent->height,
+                                     .minDepth = 0.0f,
+                                     .maxDepth = 1.0f});
+
+   radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                      &(VkRect2D){
+                         .offset = *dest_offset,
+                         .extent = *resolve_extent,
+                      });
+
+   radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+   cmd_buffer->state.flush_bits |=
+      radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
 }
 
 enum radv_resolve_method {
-	RESOLVE_HW,
-	RESOLVE_COMPUTE,
-	RESOLVE_FRAGMENT,
+   RESOLVE_HW,
+   RESOLVE_COMPUTE,
+   RESOLVE_FRAGMENT,
 };
 
-static bool image_hw_resolve_compat(const struct radv_device *device,
-				    struct radv_image *src_image,
-				    struct radv_image *dst_image)
+static bool
+image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image,
+                        struct radv_image *dst_image)
 {
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		return dst_image->planes[0].surface.u.gfx9.surf.swizzle_mode ==
-		       src_image->planes[0].surface.u.gfx9.surf.swizzle_mode;
-	} else {
-		return dst_image->planes[0].surface.micro_tile_mode ==
-		       src_image->planes[0].surface.micro_tile_mode;
-	}
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      return dst_image->planes[0].surface.u.gfx9.surf.swizzle_mode ==
+             src_image->planes[0].surface.u.gfx9.surf.swizzle_mode;
+   } else {
+      return dst_image->planes[0].surface.micro_tile_mode ==
+             src_image->planes[0].surface.micro_tile_mode;
+   }
 }
 
-static void radv_pick_resolve_method_images(struct radv_device *device,
-					    struct radv_image *src_image,
-					    VkFormat src_format,
-					    struct radv_image *dest_image,
-					    VkImageLayout dest_image_layout,
-					    bool dest_render_loop,
-					    struct radv_cmd_buffer *cmd_buffer,
-					    enum radv_resolve_method *method)
+static void
+radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *src_image,
+                                VkFormat src_format, struct radv_image *dest_image,
+                                VkImageLayout dest_image_layout, bool dest_render_loop,
+                                struct radv_cmd_buffer *cmd_buffer,
+                                enum radv_resolve_method *method)
 
 {
-	uint32_t queue_mask = radv_image_queue_family_mask(dest_image,
-	                                                   cmd_buffer->queue_family_index,
-	                                                   cmd_buffer->queue_family_index);
-
-	if (vk_format_is_color(src_format)) {
-		/* Using the fragment resolve path is currently a hint to
-		 * avoid decompressing DCC for partial resolves and
-		 * re-initialize it after resolving using compute.
-		 * TODO: Add support for layered and int to the fragment path.
-		 */
-		if (radv_layout_dcc_compressed(device, dest_image, dest_image_layout,
-		                               dest_render_loop, queue_mask)) {
-			*method = RESOLVE_FRAGMENT;
-		} else if (!image_hw_resolve_compat(device, src_image, dest_image)) {
-			/* The micro tile mode only needs to match for the HW
-			 * resolve path which is the default path for non-DCC
-			 * resolves.
-			 */
-			*method = RESOLVE_COMPUTE;
-		}
-
-		if (src_format == VK_FORMAT_R16G16_UNORM ||
-		    src_format == VK_FORMAT_R16G16_SNORM)
-			*method = RESOLVE_COMPUTE;
-		else if (vk_format_is_int(src_format))
-			*method = RESOLVE_COMPUTE;
-		else if (src_image->info.array_size > 1 ||
-			 dest_image->info.array_size > 1)
-			*method = RESOLVE_COMPUTE;
-	} else {
-		if (src_image->info.array_size > 1 ||
-		    dest_image->info.array_size > 1)
-			*method = RESOLVE_COMPUTE;
-		else
-			*method = RESOLVE_FRAGMENT;
-	}
+   uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index,
+                                                      cmd_buffer->queue_family_index);
+
+   if (vk_format_is_color(src_format)) {
+      /* Using the fragment resolve path is currently a hint to
+       * avoid decompressing DCC for partial resolves and
+       * re-initialize it after resolving using compute.
+       * TODO: Add support for layered and int to the fragment path.
+       */
+      if (radv_layout_dcc_compressed(device, dest_image, dest_image_layout, dest_render_loop,
+                                     queue_mask)) {
+         *method = RESOLVE_FRAGMENT;
+      } else if (!image_hw_resolve_compat(device, src_image, dest_image)) {
+         /* The micro tile mode only needs to match for the HW
+          * resolve path which is the default path for non-DCC
+          * resolves.
+          */
+         *method = RESOLVE_COMPUTE;
+      }
+
+      if (src_format == VK_FORMAT_R16G16_UNORM || src_format == VK_FORMAT_R16G16_SNORM)
+         *method = RESOLVE_COMPUTE;
+      else if (vk_format_is_int(src_format))
+         *method = RESOLVE_COMPUTE;
+      else if (src_image->info.array_size > 1 || dest_image->info.array_size > 1)
+         *method = RESOLVE_COMPUTE;
+   } else {
+      if (src_image->info.array_size > 1 || dest_image->info.array_size > 1)
+         *method = RESOLVE_COMPUTE;
+      else
+         *method = RESOLVE_FRAGMENT;
+   }
 }
 
 static VkResult
-build_resolve_pipeline(struct radv_device *device,
-                       unsigned fs_key)
+build_resolve_pipeline(struct radv_device *device, unsigned fs_key)
 {
-	VkResult result = VK_SUCCESS;
+   VkResult result = VK_SUCCESS;
 
-	if (device->meta_state.resolve.pipeline[fs_key])
-		return result;
+   if (device->meta_state.resolve.pipeline[fs_key])
+      return result;
 
-	mtx_lock(&device->meta_state.mtx);
-	if (device->meta_state.resolve.pipeline[fs_key]) {
-		mtx_unlock(&device->meta_state.mtx);
-		return result;
-	}
+   mtx_lock(&device->meta_state.mtx);
+   if (device->meta_state.resolve.pipeline[fs_key]) {
+      mtx_unlock(&device->meta_state.mtx);
+      return result;
+   }
 
-	nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
+   nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
 
-	result = create_pass(device, radv_fs_key_format_exemplars[fs_key], &device->meta_state.resolve.pass[fs_key]);
-	if (result != VK_SUCCESS)
-		goto fail;
+   result = create_pass(device, radv_fs_key_format_exemplars[fs_key],
+                        &device->meta_state.resolve.pass[fs_key]);
+   if (result != VK_SUCCESS)
+      goto fail;
 
-	VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
-	result = create_pipeline(device, vs_module_h, &device->meta_state.resolve.pipeline[fs_key], device->meta_state.resolve.pass[fs_key]);
+   VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
+   result = create_pipeline(device, vs_module_h, &device->meta_state.resolve.pipeline[fs_key],
+                            device->meta_state.resolve.pass[fs_key]);
 
 fail:
-	ralloc_free(vs_module);
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   ralloc_free(vs_module);
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 static void
-radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer,
-				 struct radv_image *src_image,
-				 VkImageLayout src_image_layout,
-				 struct radv_image *dst_image,
-				 VkImageLayout dst_image_layout,
-				 const VkImageResolve2KHR *region)
+radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+                                 VkImageLayout src_image_layout, struct radv_image *dst_image,
+                                 VkImageLayout dst_image_layout, const VkImageResolve2KHR *region)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_meta_saved_state saved_state;
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE);
-
-	assert(src_image->info.samples > 1);
-	if (src_image->info.samples <= 1) {
-		/* this causes GPU hangs if we get past here */
-		fprintf(stderr, "radv: Illegal resolve operation (src not multisampled), will hang GPU.");
-		return;
-	}
-	assert(dst_image->info.samples == 1);
-
-	if (src_image->info.array_size > 1)
-		radv_finishme("vkCmdResolveImage: multisample array images");
-
-	unsigned fs_key = radv_format_meta_fs_key(device, dst_image->vk_format);
-
-	/* From the Vulkan 1.0 spec:
-	 *
-	 *    - The aspectMask member of srcSubresource and dstSubresource must
-	 *      only contain VK_IMAGE_ASPECT_COLOR_BIT
-	 *
-	 *    - The layerCount member of srcSubresource and dstSubresource must
-	 *      match
-	 */
-	assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
-	assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
-	assert(region->srcSubresource.layerCount ==
-	       region->dstSubresource.layerCount);
-
-	const uint32_t src_base_layer =
-		radv_meta_get_iview_layer(src_image, &region->srcSubresource,
-					  &region->srcOffset);
-
-	const uint32_t dst_base_layer =
-		radv_meta_get_iview_layer(dst_image, &region->dstSubresource,
-					  &region->dstOffset);
-
-	/**
-	 * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
-	 *
-	 *    extent is the size in texels of the source image to resolve in width,
-	 *    height and depth. 1D images use only x and width. 2D images use x, y,
-	 *    width and height. 3D images use x, y, z, width, height and depth.
-	 *
-	 *    srcOffset and dstOffset select the initial x, y, and z offsets in
-	 *    texels of the sub-regions of the source and destination image data.
-	 *    extent is the size in texels of the source image to resolve in width,
-	 *    height and depth. 1D images use only x and width. 2D images use x, y,
-	 *    width and height. 3D images use x, y, z, width, height and depth.
-	 */
-	const struct VkExtent3D extent =
-		radv_sanitize_image_extent(src_image->type, region->extent);
-	const struct VkOffset3D dstOffset =
-		radv_sanitize_image_offset(dst_image->type, region->dstOffset);
-
-	if (radv_dcc_enabled(dst_image, region->dstSubresource.mipLevel)) {
-		VkImageSubresourceRange range = {
-			.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-			.baseMipLevel = region->dstSubresource.mipLevel,
-			.levelCount = 1,
-			.baseArrayLayer = dst_base_layer,
-			.layerCount = region->dstSubresource.layerCount,
-		};
-
-		cmd_buffer->state.flush_bits |=
-			radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);
-	}
-
-	for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
-	     ++layer) {
-
-		VkResult ret = build_resolve_pipeline(device, fs_key);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			break;
-		}
-
-		struct radv_image_view src_iview;
-		radv_image_view_init(&src_iview, cmd_buffer->device,
-				     &(VkImageViewCreateInfo) {
-					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-						     .image = radv_image_to_handle(src_image),
-						     .viewType = radv_meta_get_view_type(src_image),
-						     .format = src_image->vk_format,
-						     .subresourceRange = {
-						     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-						     .baseMipLevel = region->srcSubresource.mipLevel,
-						     .levelCount = 1,
-						     .baseArrayLayer = src_base_layer + layer,
-						     .layerCount = 1,
-					     },
-				     }, NULL);
-
-		struct radv_image_view dst_iview;
-		radv_image_view_init(&dst_iview, cmd_buffer->device,
-				     &(VkImageViewCreateInfo) {
-					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-						     .image = radv_image_to_handle(dst_image),
-						     .viewType = radv_meta_get_view_type(dst_image),
-						     .format = dst_image->vk_format,
-						     .subresourceRange = {
-						     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-						     .baseMipLevel = region->dstSubresource.mipLevel,
-						     .levelCount = 1,
-						     .baseArrayLayer = dst_base_layer + layer,
-						     .layerCount = 1,
-					     },
-				      }, NULL);
-
-		VkFramebuffer fb_h;
-		radv_CreateFramebuffer(radv_device_to_handle(device),
-				       &(VkFramebufferCreateInfo) {
-					       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
-						       .attachmentCount = 2,
-						       .pAttachments = (VkImageView[]) {
-						       radv_image_view_to_handle(&src_iview),
-						       radv_image_view_to_handle(&dst_iview),
-					       },
-					       .width = radv_minify(dst_image->info.width,
-								    region->dstSubresource.mipLevel),
-					       .height = radv_minify(dst_image->info.height,
-								      region->dstSubresource.mipLevel),
-					       .layers = 1
-				       },
-				       &cmd_buffer->pool->alloc,
-				       &fb_h);
-
-		radv_cmd_buffer_begin_render_pass(cmd_buffer,
-						  &(VkRenderPassBeginInfo) {
-							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-							.renderPass = device->meta_state.resolve.pass[fs_key],
-							.framebuffer = fb_h,
-							.renderArea = {
-								.offset = {
-									dstOffset.x,
-									dstOffset.y,
-								},
-								.extent = {
-									extent.width,
-									extent.height,
-							      }
-							},
-							.clearValueCount = 0,
-							.pClearValues = NULL,
-					      }, NULL);
-
-		radv_cmd_buffer_set_subpass(cmd_buffer,
-					    &cmd_buffer->state.pass->subpasses[0]);
-
-		emit_resolve(cmd_buffer, src_image, dst_image,
-			     dst_iview.vk_format,
-			     &(VkOffset2D) {
-				     .x = dstOffset.x,
-				     .y = dstOffset.y,
-			     },
-			     &(VkExtent2D) {
-				     .width = extent.width,
-				     .height = extent.height,
-			     });
-
-		radv_cmd_buffer_end_render_pass(cmd_buffer);
-
-		radv_DestroyFramebuffer(radv_device_to_handle(device),
-					fb_h, &cmd_buffer->pool->alloc);
-	}
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_saved_state saved_state;
+
+   radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE);
+
+   assert(src_image->info.samples > 1);
+   if (src_image->info.samples <= 1) {
+      /* this causes GPU hangs if we get past here */
+      fprintf(stderr, "radv: Illegal resolve operation (src not multisampled), will hang GPU.");
+      return;
+   }
+   assert(dst_image->info.samples == 1);
+
+   if (src_image->info.array_size > 1)
+      radv_finishme("vkCmdResolveImage: multisample array images");
+
+   unsigned fs_key = radv_format_meta_fs_key(device, dst_image->vk_format);
+
+   /* From the Vulkan 1.0 spec:
+    *
+    *    - The aspectMask member of srcSubresource and dstSubresource must
+    *      only contain VK_IMAGE_ASPECT_COLOR_BIT
+    *
+    *    - The layerCount member of srcSubresource and dstSubresource must
+    *      match
+    */
+   assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+   assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+   assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
+
+   const uint32_t src_base_layer =
+      radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
+
+   const uint32_t dst_base_layer =
+      radv_meta_get_iview_layer(dst_image, &region->dstSubresource, &region->dstOffset);
+
+   /**
+    * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
+    *
+    *    extent is the size in texels of the source image to resolve in width,
+    *    height and depth. 1D images use only x and width. 2D images use x, y,
+    *    width and height. 3D images use x, y, z, width, height and depth.
+    *
+    *    srcOffset and dstOffset select the initial x, y, and z offsets in
+    *    texels of the sub-regions of the source and destination image data.
+    *    extent is the size in texels of the source image to resolve in width,
+    *    height and depth. 1D images use only x and width. 2D images use x, y,
+    *    width and height. 3D images use x, y, z, width, height and depth.
+    */
+   const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);
+   const struct VkOffset3D dstOffset =
+      radv_sanitize_image_offset(dst_image->type, region->dstOffset);
+
+   if (radv_dcc_enabled(dst_image, region->dstSubresource.mipLevel)) {
+      VkImageSubresourceRange range = {
+         .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+         .baseMipLevel = region->dstSubresource.mipLevel,
+         .levelCount = 1,
+         .baseArrayLayer = dst_base_layer,
+         .layerCount = region->dstSubresource.layerCount,
+      };
+
+      cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);
+   }
+
+   for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
+
+      VkResult ret = build_resolve_pipeline(device, fs_key);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         break;
+      }
+
+      struct radv_image_view src_iview;
+      radv_image_view_init(&src_iview, cmd_buffer->device,
+                           &(VkImageViewCreateInfo){
+                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                              .image = radv_image_to_handle(src_image),
+                              .viewType = radv_meta_get_view_type(src_image),
+                              .format = src_image->vk_format,
+                              .subresourceRange =
+                                 {
+                                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                    .baseMipLevel = region->srcSubresource.mipLevel,
+                                    .levelCount = 1,
+                                    .baseArrayLayer = src_base_layer + layer,
+                                    .layerCount = 1,
+                                 },
+                           },
+                           NULL);
+
+      struct radv_image_view dst_iview;
+      radv_image_view_init(&dst_iview, cmd_buffer->device,
+                           &(VkImageViewCreateInfo){
+                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                              .image = radv_image_to_handle(dst_image),
+                              .viewType = radv_meta_get_view_type(dst_image),
+                              .format = dst_image->vk_format,
+                              .subresourceRange =
+                                 {
+                                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                    .baseMipLevel = region->dstSubresource.mipLevel,
+                                    .levelCount = 1,
+                                    .baseArrayLayer = dst_base_layer + layer,
+                                    .layerCount = 1,
+                                 },
+                           },
+                           NULL);
+
+      VkFramebuffer fb_h;
+      radv_CreateFramebuffer(
+         radv_device_to_handle(device),
+         &(VkFramebufferCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+            .attachmentCount = 2,
+            .pAttachments =
+               (VkImageView[]){
+                  radv_image_view_to_handle(&src_iview),
+                  radv_image_view_to_handle(&dst_iview),
+               },
+            .width = radv_minify(dst_image->info.width, region->dstSubresource.mipLevel),
+            .height = radv_minify(dst_image->info.height, region->dstSubresource.mipLevel),
+            .layers = 1},
+         &cmd_buffer->pool->alloc, &fb_h);
+
+      radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                        &(VkRenderPassBeginInfo){
+                                           .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                                           .renderPass = device->meta_state.resolve.pass[fs_key],
+                                           .framebuffer = fb_h,
+                                           .renderArea = {.offset =
+                                                             {
+                                                                dstOffset.x,
+                                                                dstOffset.y,
+                                                             },
+                                                          .extent =
+                                                             {
+                                                                extent.width,
+                                                                extent.height,
+                                                             }},
+                                           .clearValueCount = 0,
+                                           .pClearValues = NULL,
+                                        },
+                                        NULL);
+
+      radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+      emit_resolve(cmd_buffer, src_image, dst_image, dst_iview.vk_format,
+                   &(VkOffset2D){
+                      .x = dstOffset.x,
+                      .y = dstOffset.y,
+                   },
+                   &(VkExtent2D){
+                      .width = extent.width,
+                      .height = extent.height,
+                   });
+
+      radv_cmd_buffer_end_render_pass(cmd_buffer);
+
+      radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
+   }
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
 static void
-resolve_image(struct radv_cmd_buffer *cmd_buffer,
-	      struct radv_image *src_image,
-	      VkImageLayout src_image_layout,
-	      struct radv_image *dst_image,
-	      VkImageLayout dst_image_layout,
-	      const VkImageResolve2KHR *region,
-	      enum radv_resolve_method resolve_method)
+resolve_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+              VkImageLayout src_image_layout, struct radv_image *dst_image,
+              VkImageLayout dst_image_layout, const VkImageResolve2KHR *region,
+              enum radv_resolve_method resolve_method)
 {
-	switch (resolve_method) {
-	case RESOLVE_HW:
-		radv_meta_resolve_hardware_image(cmd_buffer,
-						 src_image,
-						 src_image_layout,
-						 dst_image,
-						 dst_image_layout,
-						 region);
-		break;
-	case RESOLVE_FRAGMENT:
-		radv_meta_resolve_fragment_image(cmd_buffer,
-						 src_image,
-						 src_image_layout,
-						 dst_image,
-						 dst_image_layout,
-						 region);
-		break;
-	case RESOLVE_COMPUTE:
-		radv_meta_resolve_compute_image(cmd_buffer,
-						src_image,
-						src_image->vk_format,
-						src_image_layout,
-						dst_image,
-						dst_image->vk_format,
-						dst_image_layout,
-						region);
-		break;
-	default:
-		assert(!"Invalid resolve method selected");
-	}
+   switch (resolve_method) {
+   case RESOLVE_HW:
+      radv_meta_resolve_hardware_image(cmd_buffer, src_image, src_image_layout, dst_image,
+                                       dst_image_layout, region);
+      break;
+   case RESOLVE_FRAGMENT:
+      radv_meta_resolve_fragment_image(cmd_buffer, src_image, src_image_layout, dst_image,
+                                       dst_image_layout, region);
+      break;
+   case RESOLVE_COMPUTE:
+      radv_meta_resolve_compute_image(cmd_buffer, src_image, src_image->vk_format, src_image_layout,
+                                      dst_image, dst_image->vk_format, dst_image_layout, region);
+      break;
+   default:
+      assert(!"Invalid resolve method selected");
+   }
 }
 
-void radv_CmdResolveImage2KHR(
-	VkCommandBuffer                             commandBuffer,
-	const VkResolveImageInfo2KHR*               pResolveImageInfo)
+void
+radv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
+                         const VkResolveImageInfo2KHR *pResolveImageInfo)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);
-	RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage);
-	VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;
-	VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;
-	enum radv_resolve_method resolve_method = RESOLVE_HW;
-	/* we can use the hw resolve only for single full resolves */
-	if (pResolveImageInfo->regionCount == 1) {
-		if (pResolveImageInfo->pRegions[0].srcOffset.x ||
-		    pResolveImageInfo->pRegions[0].srcOffset.y ||
-		    pResolveImageInfo->pRegions[0].srcOffset.z)
-			resolve_method = RESOLVE_COMPUTE;
-		if (pResolveImageInfo->pRegions[0].dstOffset.x ||
-		    pResolveImageInfo->pRegions[0].dstOffset.y ||
-		    pResolveImageInfo->pRegions[0].dstOffset.z)
-			resolve_method = RESOLVE_COMPUTE;
-
-		if (pResolveImageInfo->pRegions[0].extent.width != src_image->info.width ||
-		    pResolveImageInfo->pRegions[0].extent.height != src_image->info.height ||
-		    pResolveImageInfo->pRegions[0].extent.depth != src_image->info.depth)
-			resolve_method = RESOLVE_COMPUTE;
-	} else
-		resolve_method = RESOLVE_COMPUTE;
-
-	radv_pick_resolve_method_images(cmd_buffer->device, src_image,
-					src_image->vk_format, dst_image,
-					dst_image_layout, false, cmd_buffer,
-					&resolve_method);
-
-	for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
-		resolve_image(cmd_buffer, src_image, src_image_layout,
-			      dst_image, dst_image_layout,
-			      &pResolveImageInfo->pRegions[r], resolve_method);
-	}
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);
+   RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage);
+   VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;
+   VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;
+   enum radv_resolve_method resolve_method = RESOLVE_HW;
+   /* we can use the hw resolve only for single full resolves */
+   if (pResolveImageInfo->regionCount == 1) {
+      if (pResolveImageInfo->pRegions[0].srcOffset.x ||
+          pResolveImageInfo->pRegions[0].srcOffset.y || pResolveImageInfo->pRegions[0].srcOffset.z)
+         resolve_method = RESOLVE_COMPUTE;
+      if (pResolveImageInfo->pRegions[0].dstOffset.x ||
+          pResolveImageInfo->pRegions[0].dstOffset.y || pResolveImageInfo->pRegions[0].dstOffset.z)
+         resolve_method = RESOLVE_COMPUTE;
+
+      if (pResolveImageInfo->pRegions[0].extent.width != src_image->info.width ||
+          pResolveImageInfo->pRegions[0].extent.height != src_image->info.height ||
+          pResolveImageInfo->pRegions[0].extent.depth != src_image->info.depth)
+         resolve_method = RESOLVE_COMPUTE;
+   } else
+      resolve_method = RESOLVE_COMPUTE;
+
+   radv_pick_resolve_method_images(cmd_buffer->device, src_image, src_image->vk_format, dst_image,
+                                   dst_image_layout, false, cmd_buffer, &resolve_method);
+
+   for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
+      resolve_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout,
+                    &pResolveImageInfo->pRegions[r], resolve_method);
+   }
 }
 
 static void
 radv_cmd_buffer_resolve_subpass_hw(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	struct radv_meta_saved_state saved_state;
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE);
-
-	for (uint32_t i = 0; i < subpass->color_count; ++i) {
-		struct radv_subpass_attachment src_att = subpass->color_attachments[i];
-		struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
-
-		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
-			continue;
-
-		struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
-		struct radv_image *src_img = src_iview->image;
-
-		struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
-		struct radv_image *dst_img = dest_iview->image;
-
-		if (radv_dcc_enabled(dst_img, dest_iview->base_mip)) {
-			VkImageSubresourceRange range = {
-				.aspectMask = dest_iview->aspect_mask,
-				.baseMipLevel = dest_iview->base_mip,
-				.levelCount = dest_iview->level_count,
-				.baseArrayLayer = dest_iview->base_layer,
-				.layerCount = dest_iview->layer_count,
-			};
-
-			cmd_buffer->state.flush_bits |=
-				radv_init_dcc(cmd_buffer, dst_img, &range, 0xffffffff);
-			cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-		}
-
-		struct radv_subpass resolve_subpass = {
-			.color_count = 2,
-			.color_attachments = (struct radv_subpass_attachment[]) { src_att, dest_att },
-			.depth_stencil_attachment = NULL,
-		};
-
-		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
-
-		VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dest_iview->vk_format));
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			continue;
-		}
-
-		emit_resolve(cmd_buffer, src_img, dst_img,
-			     dest_iview->vk_format,
-			     &(VkOffset2D) { 0, 0 },
-			     &(VkExtent2D) { fb->width, fb->height });
-	}
-
-	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   struct radv_meta_saved_state saved_state;
+
+   radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE);
+
+   for (uint32_t i = 0; i < subpass->color_count; ++i) {
+      struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+      struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
+
+      if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+      struct radv_image *src_img = src_iview->image;
+
+      struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
+      struct radv_image *dst_img = dest_iview->image;
+
+      if (radv_dcc_enabled(dst_img, dest_iview->base_mip)) {
+         VkImageSubresourceRange range = {
+            .aspectMask = dest_iview->aspect_mask,
+            .baseMipLevel = dest_iview->base_mip,
+            .levelCount = dest_iview->level_count,
+            .baseArrayLayer = dest_iview->base_layer,
+            .layerCount = dest_iview->layer_count,
+         };
+
+         cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_img, &range, 0xffffffff);
+         cmd_buffer->state.attachments[dest_att.attachment].current_layout =
+            VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+      }
+
+      struct radv_subpass resolve_subpass = {
+         .color_count = 2,
+         .color_attachments = (struct radv_subpass_attachment[]){src_att, dest_att},
+         .depth_stencil_attachment = NULL,
+      };
+
+      radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+
+      VkResult ret = build_resolve_pipeline(
+         cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dest_iview->vk_format));
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         continue;
+      }
+
+      emit_resolve(cmd_buffer, src_img, dst_img, dest_iview->vk_format, &(VkOffset2D){0, 0},
+                   &(VkExtent2D){fb->width, fb->height});
+   }
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
 /**
@@ -777,125 +734,114 @@ radv_cmd_buffer_resolve_subpass_hw(struct radv_cmd_buffer *cmd_buffer)
 void
 radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 {
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	enum radv_resolve_method resolve_method = RESOLVE_HW;
-
-	if (!subpass->has_color_resolve && !subpass->ds_resolve_attachment)
-		return;
-
-	radv_describe_begin_render_pass_resolve(cmd_buffer);
-
-	if (subpass->ds_resolve_attachment) {
-		struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
-		struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;
-		struct radv_image_view *src_iview =
-			cmd_buffer->state.attachments[src_att.attachment].iview;
-		struct radv_image_view *dst_iview =
-			cmd_buffer->state.attachments[dst_att.attachment].iview;
-
-		/* Make sure to not clear the depth/stencil attachment after resolves. */
-		cmd_buffer->state.attachments[dst_att.attachment].pending_clear_aspects = 0;
-
-		radv_pick_resolve_method_images(cmd_buffer->device,
-						src_iview->image,
-						src_iview->vk_format,
-						dst_iview->image,
-						dst_att.layout,
-						dst_att.in_render_loop,
-						cmd_buffer,
-						&resolve_method);
-
-		if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) &&
-		    subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
-			if (resolve_method == RESOLVE_FRAGMENT) {
-				radv_depth_stencil_resolve_subpass_fs(cmd_buffer,
-								      VK_IMAGE_ASPECT_DEPTH_BIT,
-								      subpass->depth_resolve_mode);
-			} else {
-				assert(resolve_method == RESOLVE_COMPUTE);
-				radv_depth_stencil_resolve_subpass_cs(cmd_buffer,
-								      VK_IMAGE_ASPECT_DEPTH_BIT,
-								      subpass->depth_resolve_mode);
-			}
-		}
-
-		if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) &&
-		    subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
-			if (resolve_method == RESOLVE_FRAGMENT) {
-				radv_depth_stencil_resolve_subpass_fs(cmd_buffer,
-								      VK_IMAGE_ASPECT_STENCIL_BIT,
-								      subpass->stencil_resolve_mode);
-			} else {
-				assert(resolve_method == RESOLVE_COMPUTE);
-				radv_depth_stencil_resolve_subpass_cs(cmd_buffer,
-								      VK_IMAGE_ASPECT_STENCIL_BIT,
-								      subpass->stencil_resolve_mode);
-			}
-		}
-
-		/* From the Vulkan spec 1.2.165:
-		 *
-		 * "VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT specifies
-		 *  write access to a color, resolve, or depth/stencil
-		 *  resolve attachment during a render pass or via
-		 *  certain subpass load and store operations."
-		 *
-		 * Yes, it's counterintuitive but it makes sense because ds
-		 * resolve operations happen late at the end of the subpass.
-		 *
-		 * That said, RADV is wrong because it executes the subpass
-		 * end barrier *before* any subpass resolves instead of after.
-		 *
-		 * TODO: Fix this properly by executing subpass end barriers
-		 * after subpass resolves.
-		 */
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-		if (radv_image_has_htile(dst_iview->image))
-			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
-	}
-
-	if (subpass->has_color_resolve) {
-		for (uint32_t i = 0; i < subpass->color_count; ++i) {
-			struct radv_subpass_attachment src_att = subpass->color_attachments[i];
-			struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
-
-			if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
-				continue;
-
-			/* Make sure to not clear color attachments after resolves. */
-			cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects = 0;
-
-			struct radv_image *dst_img = cmd_buffer->state.attachments[dest_att.attachment].iview->image;
-			struct radv_image_view *src_iview= cmd_buffer->state.attachments[src_att.attachment].iview;
-			struct radv_image *src_img = src_iview->image;
-
-			radv_pick_resolve_method_images(cmd_buffer->device, src_img,
-							src_iview->vk_format, dst_img,
-							dest_att.layout,
-							dest_att.in_render_loop,
-							cmd_buffer, &resolve_method);
-
-			if (resolve_method == RESOLVE_FRAGMENT) {
-				break;
-			}
-		}
-
-		switch (resolve_method) {
-		case RESOLVE_HW:
-			radv_cmd_buffer_resolve_subpass_hw(cmd_buffer);
-			break;
-		case RESOLVE_COMPUTE:
-			radv_cmd_buffer_resolve_subpass_cs(cmd_buffer);
-			break;
-		case RESOLVE_FRAGMENT:
-			radv_cmd_buffer_resolve_subpass_fs(cmd_buffer);
-			break;
-		default:
-			unreachable("Invalid resolve method");
-		}
-	}
-
-	radv_describe_end_render_pass_resolve(cmd_buffer);
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   enum radv_resolve_method resolve_method = RESOLVE_HW;
+
+   if (!subpass->has_color_resolve && !subpass->ds_resolve_attachment)
+      return;
+
+   radv_describe_begin_render_pass_resolve(cmd_buffer);
+
+   if (subpass->ds_resolve_attachment) {
+      struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+      struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;
+      struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+      struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
+
+      /* Make sure to not clear the depth/stencil attachment after resolves. */
+      cmd_buffer->state.attachments[dst_att.attachment].pending_clear_aspects = 0;
+
+      radv_pick_resolve_method_images(cmd_buffer->device, src_iview->image, src_iview->vk_format,
+                                      dst_iview->image, dst_att.layout, dst_att.in_render_loop,
+                                      cmd_buffer, &resolve_method);
+
+      if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+          subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+         if (resolve_method == RESOLVE_FRAGMENT) {
+            radv_depth_stencil_resolve_subpass_fs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT,
+                                                  subpass->depth_resolve_mode);
+         } else {
+            assert(resolve_method == RESOLVE_COMPUTE);
+            radv_depth_stencil_resolve_subpass_cs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT,
+                                                  subpass->depth_resolve_mode);
+         }
+      }
+
+      if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+          subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+         if (resolve_method == RESOLVE_FRAGMENT) {
+            radv_depth_stencil_resolve_subpass_fs(cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT,
+                                                  subpass->stencil_resolve_mode);
+         } else {
+            assert(resolve_method == RESOLVE_COMPUTE);
+            radv_depth_stencil_resolve_subpass_cs(cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT,
+                                                  subpass->stencil_resolve_mode);
+         }
+      }
+
+      /* From the Vulkan spec 1.2.165:
+       *
+       * "VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT specifies
+       *  write access to a color, resolve, or depth/stencil
+       *  resolve attachment during a render pass or via
+       *  certain subpass load and store operations."
+       *
+       * Yes, it's counterintuitive but it makes sense because ds
+       * resolve operations happen late at the end of the subpass.
+       *
+       * That said, RADV is wrong because it executes the subpass
+       * end barrier *before* any subpass resolves instead of after.
+       *
+       * TODO: Fix this properly by executing subpass end barriers
+       * after subpass resolves.
+       */
+      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+      if (radv_image_has_htile(dst_iview->image))
+         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+   }
+
+   if (subpass->has_color_resolve) {
+      for (uint32_t i = 0; i < subpass->color_count; ++i) {
+         struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+         struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
+
+         if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+            continue;
+
+         /* Make sure to not clear color attachments after resolves. */
+         cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects = 0;
+
+         struct radv_image *dst_img =
+            cmd_buffer->state.attachments[dest_att.attachment].iview->image;
+         struct radv_image_view *src_iview =
+            cmd_buffer->state.attachments[src_att.attachment].iview;
+         struct radv_image *src_img = src_iview->image;
+
+         radv_pick_resolve_method_images(cmd_buffer->device, src_img, src_iview->vk_format, dst_img,
+                                         dest_att.layout, dest_att.in_render_loop, cmd_buffer,
+                                         &resolve_method);
+
+         if (resolve_method == RESOLVE_FRAGMENT) {
+            break;
+         }
+      }
+
+      switch (resolve_method) {
+      case RESOLVE_HW:
+         radv_cmd_buffer_resolve_subpass_hw(cmd_buffer);
+         break;
+      case RESOLVE_COMPUTE:
+         radv_cmd_buffer_resolve_subpass_cs(cmd_buffer);
+         break;
+      case RESOLVE_FRAGMENT:
+         radv_cmd_buffer_resolve_subpass_fs(cmd_buffer);
+         break;
+      default:
+         unreachable("Invalid resolve method");
+      }
+   }
+
+   radv_describe_end_render_pass_resolve(cmd_buffer);
 }
 
 /**
@@ -905,94 +851,89 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 void
 radv_decompress_resolve_subpass_src(struct radv_cmd_buffer *cmd_buffer)
 {
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	uint32_t layer_count = fb->layers;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+   uint32_t layer_count = fb->layers;
 
-	if (subpass->view_mask)
-		layer_count = util_last_bit(subpass->view_mask);
+   if (subpass->view_mask)
+      layer_count = util_last_bit(subpass->view_mask);
 
-	for (uint32_t i = 0; i < subpass->color_count; ++i) {
-		struct radv_subpass_attachment src_att = subpass->color_attachments[i];
-		struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
+   for (uint32_t i = 0; i < subpass->color_count; ++i) {
+      struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+      struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
 
-		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
-			continue;
+      if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+         continue;
 
-		struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
-		struct radv_image *src_image = src_iview->image;
+      struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+      struct radv_image *src_image = src_iview->image;
 
-		VkImageResolve2KHR region = {0};
-		region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
-		region.srcSubresource.aspectMask = src_iview->aspect_mask;
-		region.srcSubresource.mipLevel = 0;
-		region.srcSubresource.baseArrayLayer = src_iview->base_layer;
-		region.srcSubresource.layerCount = layer_count;
+      VkImageResolve2KHR region = {0};
+      region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
+      region.srcSubresource.aspectMask = src_iview->aspect_mask;
+      region.srcSubresource.mipLevel = 0;
+      region.srcSubresource.baseArrayLayer = src_iview->base_layer;
+      region.srcSubresource.layerCount = layer_count;
 
-		radv_decompress_resolve_src(cmd_buffer, src_image,
-					    src_att.layout, &region);
-	}
+      radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
+   }
 }
 
 static struct radv_sample_locations_state *
 radv_get_resolve_sample_locations(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+   uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
 
-	for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
-		if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
-			return &state->subpass_sample_locs[i].sample_location;
-	}
+   for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
+      if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
+         return &state->subpass_sample_locs[i].sample_location;
+   }
 
-	return NULL;
+   return NULL;
 }
 
 /**
  * Decompress CMask/FMask before resolving a multisampled source image.
  */
 void
-radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer,
-			    struct radv_image *src_image,
-			    VkImageLayout src_image_layout,
-			     const VkImageResolve2KHR *region)
+radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+                            VkImageLayout src_image_layout, const VkImageResolve2KHR *region)
 {
-	const uint32_t src_base_layer =
-		radv_meta_get_iview_layer(src_image, &region->srcSubresource,
-					  &region->srcOffset);
-
-	VkImageMemoryBarrier barrier = {0};
-	barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
-	barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
-	barrier.oldLayout = src_image_layout;
-	barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
-	barrier.image = radv_image_to_handle(src_image);
-	barrier.subresourceRange = (VkImageSubresourceRange) {
-		.aspectMask = region->srcSubresource.aspectMask,
-		.baseMipLevel = region->srcSubresource.mipLevel,
-		.levelCount = 1,
-		.baseArrayLayer = src_base_layer,
-		.layerCount = region->srcSubresource.layerCount,
-	};
-
-	if (src_image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) {
-		/* If the depth/stencil image uses different sample
-		 * locations, we need them during HTILE decompressions.
-		 */
-		struct radv_sample_locations_state *sample_locs =
-			radv_get_resolve_sample_locations(cmd_buffer);
-
-		barrier.pNext = &(VkSampleLocationsInfoEXT) {
-			.sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
-			.sampleLocationsPerPixel = sample_locs->per_pixel,
-			.sampleLocationGridSize = sample_locs->grid_size,
-			.sampleLocationsCount = sample_locs->count,
-			.pSampleLocations = sample_locs->locations,
-		};
-	}
-
-	radv_CmdPipelineBarrier(radv_cmd_buffer_to_handle(cmd_buffer),
-				VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-				VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-				false, 0, NULL, 0, NULL, 1, &barrier);
+   const uint32_t src_base_layer =
+      radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
+
+   VkImageMemoryBarrier barrier = {0};
+   barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+   barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+   barrier.oldLayout = src_image_layout;
+   barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+   barrier.image = radv_image_to_handle(src_image);
+   barrier.subresourceRange = (VkImageSubresourceRange){
+      .aspectMask = region->srcSubresource.aspectMask,
+      .baseMipLevel = region->srcSubresource.mipLevel,
+      .levelCount = 1,
+      .baseArrayLayer = src_base_layer,
+      .layerCount = region->srcSubresource.layerCount,
+   };
+
+   if (src_image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) {
+      /* If the depth/stencil image uses different sample
+       * locations, we need them during HTILE decompressions.
+       */
+      struct radv_sample_locations_state *sample_locs =
+         radv_get_resolve_sample_locations(cmd_buffer);
+
+      barrier.pNext = &(VkSampleLocationsInfoEXT){
+         .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
+         .sampleLocationsPerPixel = sample_locs->per_pixel,
+         .sampleLocationGridSize = sample_locs->grid_size,
+         .sampleLocationsCount = sample_locs->count,
+         .pSampleLocations = sample_locs->locations,
+      };
+   }
+
+   radv_CmdPipelineBarrier(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                           VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, false, 0, NULL, 0, NULL, 1,
+                           &barrier);
 }
diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c
index b5167b0cf49..176063967e0 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -21,863 +21,760 @@
  * IN THE SOFTWARE.
  */
 
-
 #include <assert.h>
 #include <stdbool.h>
 
+#include "nir/nir_builder.h"
 #include "radv_meta.h"
 #include "radv_private.h"
-#include "nir/nir_builder.h"
 #include "sid.h"
 #include "vk_format.h"
 
-static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
-							    nir_ssa_def *input)
+static nir_ssa_def *
+radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)
 {
-	unsigned i;
-
-	nir_ssa_def *cmp[3];
-	for (i = 0; i < 3; i++)
-		cmp[i] = nir_flt(b, nir_channel(b, input, i),
-				 nir_imm_int(b, 0x3b4d2e1c));
-
-	nir_ssa_def *ltvals[3];
-	for (i = 0; i < 3; i++)
-		ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
-				     nir_imm_float(b, 12.92));
-
-	nir_ssa_def *gtvals[3];
-
-	for (i = 0; i < 3; i++) {
-		gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
-				     nir_imm_float(b, 1.0/2.4));
-		gtvals[i] = nir_fmul(b, gtvals[i],
-				     nir_imm_float(b, 1.055));
-		gtvals[i] = nir_fsub(b, gtvals[i],
-				     nir_imm_float(b, 0.055));
-	}
-
-	nir_ssa_def *comp[4];
-	for (i = 0; i < 3; i++)
-		comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
-	comp[3] = nir_channels(b, input, 1 << 3);
-	return nir_vec(b, comp, 4);
+   unsigned i;
+
+   nir_ssa_def *cmp[3];
+   for (i = 0; i < 3; i++)
+      cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_int(b, 0x3b4d2e1c));
+
+   nir_ssa_def *ltvals[3];
+   for (i = 0; i < 3; i++)
+      ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92));
+
+   nir_ssa_def *gtvals[3];
+
+   for (i = 0; i < 3; i++) {
+      gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0 / 2.4));
+      gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055));
+      gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055));
+   }
+
+   nir_ssa_def *comp[4];
+   for (i = 0; i < 3; i++)
+      comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
+   comp[3] = nir_channels(b, input, 1 << 3);
+   return nir_vec(b, comp, 4);
 }
 
 static nir_shader *
 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
 {
-	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
-								 false,
-								 false,
-								 GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D,
-							   false,
-							   GLSL_TYPE_FLOAT);
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
-						       "meta_resolve_cs-%d-%s",
-						       samples,
-						       is_integer ? "int" : (is_srgb ? "srgb" : "float"));
-	b.shader->info.cs.local_size[0] = 8;
-	b.shader->info.cs.local_size[1] = 8;
-	b.shader->info.cs.local_size[2] = 1;
-
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      sampler_type, "s_tex");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						       img_type, "out_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 1;
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
-	nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range=16);
-	nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range=16);
-
-	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, src_offset), 0x3);
-	nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
-
-	radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
-	                                    color, img_coord);
-
-	nir_ssa_def *outval = nir_load_var(&b, color);
-	if (is_srgb)
-		outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
-
-	nir_ssa_def *coord = nir_iadd(&b, global_id, dst_offset);
-	nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
-	                      coord, nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
-	return b.shader;
+   const struct glsl_type *sampler_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs-%d-%s", samples,
+                                     is_integer ? "int" : (is_srgb ? "srgb" : "float"));
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
+   b.shader->info.cs.local_size[2] = 1;
+
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 1;
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+   nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);
+   nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
+
+   nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, src_offset), 0x3);
+   nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
+
+   radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, img_coord);
+
+   nir_ssa_def *outval = nir_load_var(&b, color);
+   if (is_srgb)
+      outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
+
+   nir_ssa_def *coord = nir_iadd(&b, global_id, dst_offset);
+   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
+                         nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
+   return b.shader;
 }
 
 enum {
-	DEPTH_RESOLVE,
-	STENCIL_RESOLVE,
+   DEPTH_RESOLVE,
+   STENCIL_RESOLVE,
 };
 
 static const char *
 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
 {
-	switch (resolve_mode) {
-	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
-		return "zero";
-	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
-		return "average";
-	case VK_RESOLVE_MODE_MIN_BIT_KHR:
-		return "min";
-	case VK_RESOLVE_MODE_MAX_BIT_KHR:
-		return "max";
-	default:
-		unreachable("invalid resolve mode");
-	}
+   switch (resolve_mode) {
+   case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+      return "zero";
+   case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+      return "average";
+   case VK_RESOLVE_MODE_MIN_BIT_KHR:
+      return "min";
+   case VK_RESOLVE_MODE_MAX_BIT_KHR:
+      return "max";
+   default:
+      unreachable("invalid resolve mode");
+   }
 }
 
 static nir_shader *
-build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
-					   int index,
-					   VkResolveModeFlagBits resolve_mode)
+build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
+                                           VkResolveModeFlagBits resolve_mode)
 {
-	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
-								 false,
-								 true,
-								 GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D,
-							   true,
-							   GLSL_TYPE_FLOAT);
-
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
-						       "meta_resolve_cs_%s-%s-%d",
-						       index == DEPTH_RESOLVE ? "depth" : "stencil",
-						       get_resolve_mode_str(resolve_mode), samples);
-	b.shader->info.cs.local_size[0] = 8;
-	b.shader->info.cs.local_size[1] = 8;
-	b.shader->info.cs.local_size[2] = 1;
-
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      sampler_type, "s_tex");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
-
-	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
-						       img_type, "out_img");
-	output_img->data.descriptor_set = 0;
-	output_img->data.binding = 1;
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
-
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-	nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
-
-	nir_ssa_def *img_coord = nir_vec3(&b, nir_channel(&b, global_id, 0),
-					      nir_channel(&b, global_id, 1),
-					      layer_id);
-
-	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
-	nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
-	tex->op = nir_texop_txf_ms;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(img_coord);
-	tex->src[1].src_type = nir_tex_src_ms_index;
-	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-	tex->src[2].src_type = nir_tex_src_texture_deref;
-	tex->src[2].src = nir_src_for_ssa(input_img_deref);
-	tex->dest_type = type;
-	tex->is_array = true;
-	tex->coord_components = 3;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(&b, &tex->instr);
-
-	nir_ssa_def *outval = &tex->dest.ssa;
-
-	if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
-		for (int i = 1; i < samples; i++) {
-			nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
-			tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
-			tex_add->op = nir_texop_txf_ms;
-			tex_add->src[0].src_type = nir_tex_src_coord;
-			tex_add->src[0].src = nir_src_for_ssa(img_coord);
-			tex_add->src[1].src_type = nir_tex_src_ms_index;
-			tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
-			tex_add->src[2].src_type = nir_tex_src_texture_deref;
-			tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
-			tex_add->dest_type = type;
-			tex_add->is_array = true;
-			tex_add->coord_components = 3;
-
-			nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
-			nir_builder_instr_insert(&b, &tex_add->instr);
-
-			switch (resolve_mode) {
-			case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
-				assert(index == DEPTH_RESOLVE);
-				outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
-				break;
-			case VK_RESOLVE_MODE_MIN_BIT_KHR:
-				if (index == DEPTH_RESOLVE)
-					outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
-				else
-					outval = nir_umin(&b, outval, &tex_add->dest.ssa);
-				break;
-			case VK_RESOLVE_MODE_MAX_BIT_KHR:
-				if (index == DEPTH_RESOLVE)
-					outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
-				else
-					outval = nir_umax(&b, outval, &tex_add->dest.ssa);
-				break;
-			default:
-				unreachable("invalid resolve mode");
-			}
-		}
-
-		if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
-			outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
-	}
-
-	nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0),
-					  nir_channel(&b, img_coord, 1),
-					  nir_channel(&b, img_coord, 2),
-					  nir_imm_int(&b, 0));
-	nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
-	                      coord, nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
-	return b.shader;
+   const struct glsl_type *sampler_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
+
+   nir_builder b = nir_builder_init_simple_shader(
+      MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs_%s-%s-%d",
+      index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
+   b.shader->info.cs.local_size[2] = 1;
+
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 1;
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+   nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
+
+   nir_ssa_def *img_coord =
+      nir_vec3(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), layer_id);
+
+   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+   nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+   tex->op = nir_texop_txf_ms;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(img_coord);
+   tex->src[1].src_type = nir_tex_src_ms_index;
+   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+   tex->src[2].src_type = nir_tex_src_texture_deref;
+   tex->src[2].src = nir_src_for_ssa(input_img_deref);
+   tex->dest_type = type;
+   tex->is_array = true;
+   tex->coord_components = 3;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(&b, &tex->instr);
+
+   nir_ssa_def *outval = &tex->dest.ssa;
+
+   if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
+      for (int i = 1; i < samples; i++) {
+         nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
+         tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+         tex_add->op = nir_texop_txf_ms;
+         tex_add->src[0].src_type = nir_tex_src_coord;
+         tex_add->src[0].src = nir_src_for_ssa(img_coord);
+         tex_add->src[1].src_type = nir_tex_src_ms_index;
+         tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+         tex_add->src[2].src_type = nir_tex_src_texture_deref;
+         tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+         tex_add->dest_type = type;
+         tex_add->is_array = true;
+         tex_add->coord_components = 3;
+
+         nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+         nir_builder_instr_insert(&b, &tex_add->instr);
+
+         switch (resolve_mode) {
+         case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+            assert(index == DEPTH_RESOLVE);
+            outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
+            break;
+         case VK_RESOLVE_MODE_MIN_BIT_KHR:
+            if (index == DEPTH_RESOLVE)
+               outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
+            else
+               outval = nir_umin(&b, outval, &tex_add->dest.ssa);
+            break;
+         case VK_RESOLVE_MODE_MAX_BIT_KHR:
+            if (index == DEPTH_RESOLVE)
+               outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
+            else
+               outval = nir_umax(&b, outval, &tex_add->dest.ssa);
+            break;
+         default:
+            unreachable("invalid resolve mode");
+         }
+      }
+
+      if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
+         outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
+   }
+
+   nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
+                                 nir_channel(&b, img_coord, 2), nir_imm_int(&b, 0));
+   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
+                         nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
+   return b.shader;
 }
 
 static VkResult
 create_layout(struct radv_device *device)
 {
-	VkResult result;
-	/*
-	 * two descriptors one for the image being sampled
-	 * one for the buffer being written.
-	 */
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 2,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.resolve_compute.ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.resolve_compute.p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-	return VK_SUCCESS;
+   VkResult result;
+   /*
+    * two descriptors one for the image being sampled
+    * one for the buffer being written.
+    */
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.resolve_compute.ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                      &device->meta_state.alloc,
+                                      &device->meta_state.resolve_compute.p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+   return VK_SUCCESS;
 fail:
-	return result;
+   return result;
 }
 
 static VkResult
-create_resolve_pipeline(struct radv_device *device,
-			int samples,
-			bool is_integer,
-			bool is_srgb,
-			VkPipeline *pipeline)
+create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
+                        VkPipeline *pipeline)
 {
-	VkResult result;
- 
-
-	mtx_lock(&device->meta_state.mtx);
-	if (*pipeline) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
-
-	/* compute shader */
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.resolve_compute.p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &vk_pipeline_info, NULL,
-					     pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	ralloc_free(cs);
-	mtx_unlock(&device->meta_state.mtx);
-	return VK_SUCCESS;
+   VkResult result;
+
+   mtx_lock(&device->meta_state.mtx);
+   if (*pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
+
+   /* compute shader */
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.resolve_compute.p_layout,
+   };
+
+   result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+                                        &vk_pipeline_info, NULL, pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   ralloc_free(cs);
+   mtx_unlock(&device->meta_state.mtx);
+   return VK_SUCCESS;
 fail:
-	ralloc_free(cs);
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   ralloc_free(cs);
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 static VkResult
-create_depth_stencil_resolve_pipeline(struct radv_device *device,
-				      int samples,
-				      int index,
-				      VkResolveModeFlagBits resolve_mode,
-				      VkPipeline *pipeline)
+create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
+                                      VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
 {
-	VkResult result;
-
-	mtx_lock(&device->meta_state.mtx);
-	if (*pipeline) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	nir_shader *cs = build_depth_stencil_resolve_compute_shader(device, samples,
-							    index, resolve_mode);
-
-	/* compute shader */
-	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.resolve_compute.p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &vk_pipeline_info, NULL,
-					     pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	ralloc_free(cs);
-	mtx_unlock(&device->meta_state.mtx);
-	return VK_SUCCESS;
+   VkResult result;
+
+   mtx_lock(&device->meta_state.mtx);
+   if (*pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   nir_shader *cs =
+      build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
+
+   /* compute shader */
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.resolve_compute.p_layout,
+   };
+
+   result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+                                        &vk_pipeline_info, NULL, pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   ralloc_free(cs);
+   mtx_unlock(&device->meta_state.mtx);
+   return VK_SUCCESS;
 fail:
-	ralloc_free(cs);
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   ralloc_free(cs);
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 VkResult
 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
 {
-	struct radv_meta_state *state = &device->meta_state;
-	VkResult res;
-
-	res = create_layout(device);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	if (on_demand)
-		return VK_SUCCESS;
-
-	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
-		uint32_t samples = 1 << i;
-
-		res = create_resolve_pipeline(device, samples, false, false,
-					      &state->resolve_compute.rc[i].pipeline);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_resolve_pipeline(device, samples, true, false,
-					      &state->resolve_compute.rc[i].i_pipeline);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_resolve_pipeline(device, samples, false, true,
-					      &state->resolve_compute.rc[i].srgb_pipeline);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_depth_stencil_resolve_pipeline(device, samples,
-							    DEPTH_RESOLVE,
-							    VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
-							    &state->resolve_compute.depth[i].average_pipeline);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_depth_stencil_resolve_pipeline(device, samples,
-							    DEPTH_RESOLVE,
-							    VK_RESOLVE_MODE_MAX_BIT_KHR,
-							    &state->resolve_compute.depth[i].max_pipeline);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_depth_stencil_resolve_pipeline(device, samples,
-							    DEPTH_RESOLVE,
-							    VK_RESOLVE_MODE_MIN_BIT_KHR,
-							    &state->resolve_compute.depth[i].min_pipeline);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_depth_stencil_resolve_pipeline(device, samples,
-							    STENCIL_RESOLVE,
-							    VK_RESOLVE_MODE_MAX_BIT_KHR,
-							    &state->resolve_compute.stencil[i].max_pipeline);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_depth_stencil_resolve_pipeline(device, samples,
-							    STENCIL_RESOLVE,
-							    VK_RESOLVE_MODE_MIN_BIT_KHR,
-							    &state->resolve_compute.stencil[i].min_pipeline);
-		if (res != VK_SUCCESS)
-			goto fail;
-	}
-
-	res = create_depth_stencil_resolve_pipeline(device, 0,
-						    DEPTH_RESOLVE,
-						    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
-						    &state->resolve_compute.depth_zero_pipeline);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	res = create_depth_stencil_resolve_pipeline(device, 0,
-						    STENCIL_RESOLVE,
-						    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
-						    &state->resolve_compute.stencil_zero_pipeline);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	return VK_SUCCESS;
+   struct radv_meta_state *state = &device->meta_state;
+   VkResult res;
+
+   res = create_layout(device);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   if (on_demand)
+      return VK_SUCCESS;
+
+   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+      uint32_t samples = 1 << i;
+
+      res = create_resolve_pipeline(device, samples, false, false,
+                                    &state->resolve_compute.rc[i].pipeline);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_resolve_pipeline(device, samples, true, false,
+                                    &state->resolve_compute.rc[i].i_pipeline);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_resolve_pipeline(device, samples, false, true,
+                                    &state->resolve_compute.rc[i].srgb_pipeline);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_depth_stencil_resolve_pipeline(
+         device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
+         &state->resolve_compute.depth[i].average_pipeline);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
+                                                  VK_RESOLVE_MODE_MAX_BIT_KHR,
+                                                  &state->resolve_compute.depth[i].max_pipeline);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
+                                                  VK_RESOLVE_MODE_MIN_BIT_KHR,
+                                                  &state->resolve_compute.depth[i].min_pipeline);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
+                                                  VK_RESOLVE_MODE_MAX_BIT_KHR,
+                                                  &state->resolve_compute.stencil[i].max_pipeline);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
+                                                  VK_RESOLVE_MODE_MIN_BIT_KHR,
+                                                  &state->resolve_compute.stencil[i].min_pipeline);
+      if (res != VK_SUCCESS)
+         goto fail;
+   }
+
+   res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
+                                               VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+                                               &state->resolve_compute.depth_zero_pipeline);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
+                                               VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+                                               &state->resolve_compute.stencil_zero_pipeline);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   return VK_SUCCESS;
 fail:
-	radv_device_finish_meta_resolve_compute_state(device);
-	return res;
+   radv_device_finish_meta_resolve_compute_state(device);
+   return res;
 }
 
 void
 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_compute.rc[i].pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_compute.rc[i].i_pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_compute.rc[i].srgb_pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_compute.depth[i].average_pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_compute.depth[i].max_pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_compute.depth[i].min_pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_compute.stencil[i].max_pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_compute.stencil[i].min_pipeline,
-				     &state->alloc);
-	}
-
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->resolve_compute.depth_zero_pipeline,
-			     &state->alloc);
-
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->resolve_compute.stencil_zero_pipeline,
-			     &state->alloc);
-
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-					state->resolve_compute.ds_layout,
-					&state->alloc);
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->resolve_compute.p_layout,
-				   &state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+      radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline,
+                           &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline,
+                           &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_compute.depth[i].average_pipeline, &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_compute.depth[i].max_pipeline, &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_compute.depth[i].min_pipeline, &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_compute.stencil[i].max_pipeline, &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_compute.stencil[i].min_pipeline, &state->alloc);
+   }
+
+   radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline,
+                        &state->alloc);
+
+   radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline,
+                        &state->alloc);
+
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,
+                                   &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout,
+                              &state->alloc);
 }
 
 static VkPipeline *
-radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer,
-			  struct radv_image_view *src_iview)
+radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_meta_state *state = &device->meta_state;
-	uint32_t samples = src_iview->image->info.samples;
-	uint32_t samples_log2 = ffs(samples) - 1;
-	VkPipeline *pipeline;
-
-	if (vk_format_is_int(src_iview->vk_format))
-		pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
-	else if (vk_format_is_srgb(src_iview->vk_format))
-		pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
-	else
-		pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
-
-	if (!*pipeline) {
-		VkResult ret;
-
-		ret = create_resolve_pipeline(device, samples,
-					      vk_format_is_int(src_iview->vk_format),
-					      vk_format_is_srgb(src_iview->vk_format),
-					      pipeline);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return NULL;
-		}
-	}
-
-	return pipeline;
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_state *state = &device->meta_state;
+   uint32_t samples = src_iview->image->info.samples;
+   uint32_t samples_log2 = ffs(samples) - 1;
+   VkPipeline *pipeline;
+
+   if (vk_format_is_int(src_iview->vk_format))
+      pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
+   else if (vk_format_is_srgb(src_iview->vk_format))
+      pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
+   else
+      pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
+
+   if (!*pipeline) {
+      VkResult ret;
+
+      ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk_format),
+                                    vk_format_is_srgb(src_iview->vk_format), pipeline);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return NULL;
+      }
+   }
+
+   return pipeline;
 }
 
 static void
-emit_resolve(struct radv_cmd_buffer *cmd_buffer,
-	     struct radv_image_view *src_iview,
-	     struct radv_image_view *dest_iview,
-	     const VkOffset2D *src_offset,
-             const VkOffset2D *dest_offset,
-             const VkExtent2D *resolve_extent)
+emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+             struct radv_image_view *dest_iview, const VkOffset2D *src_offset,
+             const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)
 {
-	struct radv_device *device = cmd_buffer->device;
-	VkPipeline *pipeline;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.resolve_compute.p_layout,
-				      0, /* set */
-				      2, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-					{
-						.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-						.dstBinding = 0,
-						.dstArrayElement = 0,
-						.descriptorCount = 1,
-						.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-			                      .pImageInfo = (VkDescriptorImageInfo[]) {
-		                              {
-	                                      .sampler = VK_NULL_HANDLE,
-					      .imageView = radv_image_view_to_handle(src_iview),
-	                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL	                              },
-	                      }
-		              },
-		              {
-		                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-		                      .dstBinding = 1,
-		                      .dstArrayElement = 0,
-				      .descriptorCount = 1,
-				      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-	                      .pImageInfo = (VkDescriptorImageInfo[]) {
-                              {
+   struct radv_device *device = cmd_buffer->device;
+   VkPipeline *pipeline;
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
+      0, /* set */
+      2, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 0,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                .pImageInfo =
+                                   (VkDescriptorImageInfo[]){
+                                      {.sampler = VK_NULL_HANDLE,
+                                       .imageView = radv_image_view_to_handle(src_iview),
+                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+                                   }},
+                               {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 1,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                .pImageInfo = (VkDescriptorImageInfo[]){
+                                   {
                                       .sampler = VK_NULL_HANDLE,
-                                     .imageView = radv_image_view_to_handle(dest_iview),
-                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-                              },
-                      }
-			      }
-				      });
-
-	pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
-
-	unsigned push_constants[4] = {
-		src_offset->x,
-		src_offset->y,
-		dest_offset->x,
-		dest_offset->y,
-	};
-	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-			      device->meta_state.resolve_compute.p_layout,
-			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
-			      push_constants);
-	radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
-
+                                      .imageView = radv_image_view_to_handle(dest_iview),
+                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                   },
+                                }}});
+
+   pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        *pipeline);
+
+   unsigned push_constants[4] = {
+      src_offset->x,
+      src_offset->y,
+      dest_offset->x,
+      dest_offset->y,
+   };
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                         device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
+                         0, 16, push_constants);
+   radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
 }
 
 static void
-emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
-			   struct radv_image_view *src_iview,
-			   struct radv_image_view *dest_iview,
-			   const VkExtent3D *resolve_extent,
-			   VkImageAspectFlags aspects,
-			   VkResolveModeFlagBits resolve_mode)
+emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+                           struct radv_image_view *dest_iview, const VkExtent3D *resolve_extent,
+                           VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)
 {
-	struct radv_device *device = cmd_buffer->device;
-	const uint32_t samples = src_iview->image->info.samples;
-	const uint32_t samples_log2 = ffs(samples) - 1;
-	VkPipeline *pipeline;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.resolve_compute.p_layout,
-				      0, /* set */
-				      2, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-					{
-						.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-						.dstBinding = 0,
-						.dstArrayElement = 0,
-						.descriptorCount = 1,
-						.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-			                      .pImageInfo = (VkDescriptorImageInfo[]) {
-		                              {
-	                                      .sampler = VK_NULL_HANDLE,
-					      .imageView = radv_image_view_to_handle(src_iview),
-	                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL	                              },
-	                      }
-		              },
-		              {
-		                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-		                      .dstBinding = 1,
-		                      .dstArrayElement = 0,
-				      .descriptorCount = 1,
-				      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-	                      .pImageInfo = (VkDescriptorImageInfo[]) {
-                              {
+   struct radv_device *device = cmd_buffer->device;
+   const uint32_t samples = src_iview->image->info.samples;
+   const uint32_t samples_log2 = ffs(samples) - 1;
+   VkPipeline *pipeline;
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
+      0, /* set */
+      2, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 0,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                .pImageInfo =
+                                   (VkDescriptorImageInfo[]){
+                                      {.sampler = VK_NULL_HANDLE,
+                                       .imageView = radv_image_view_to_handle(src_iview),
+                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+                                   }},
+                               {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 1,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                .pImageInfo = (VkDescriptorImageInfo[]){
+                                   {
                                       .sampler = VK_NULL_HANDLE,
-                                     .imageView = radv_image_view_to_handle(dest_iview),
-                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-                              },
-                      }
-			      }
-				      });
-
-	switch (resolve_mode) {
-	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
-		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
-			pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
-		else
-			pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
-		break;
-	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
-		assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
-		pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
-		break;
-	case VK_RESOLVE_MODE_MIN_BIT_KHR:
-		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
-			pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
-		else
-			pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
-		break;
-	case VK_RESOLVE_MODE_MAX_BIT_KHR:
-		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
-			pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
-		else
-			pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
-		break;
-	default:
-		unreachable("invalid resolve mode");
-	}
-
-	if (!*pipeline) {
-		int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
-		VkResult ret;
-
-		ret = create_depth_stencil_resolve_pipeline(device, samples,
-							    index, resolve_mode,
-							    pipeline);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return;
-		}
-	}
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
-
-	radv_unaligned_dispatch(cmd_buffer, resolve_extent->width,
-				resolve_extent->height, resolve_extent->depth);
-
+                                      .imageView = radv_image_view_to_handle(dest_iview),
+                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                   },
+                                }}});
+
+   switch (resolve_mode) {
+   case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+         pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
+      else
+         pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
+      break;
+   case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+      assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
+      pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
+      break;
+   case VK_RESOLVE_MODE_MIN_BIT_KHR:
+      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+         pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
+      else
+         pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
+      break;
+   case VK_RESOLVE_MODE_MAX_BIT_KHR:
+      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+         pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
+      else
+         pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
+      break;
+   default:
+      unreachable("invalid resolve mode");
+   }
+
+   if (!*pipeline) {
+      int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
+      VkResult ret;
+
+      ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return;
+      }
+   }
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        *pipeline);
+
+   radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height,
+                           resolve_extent->depth);
 }
 
-void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
-				     struct radv_image *src_image,
-				     VkFormat src_format,
-				     VkImageLayout src_image_layout,
-				     struct radv_image *dest_image,
-				     VkFormat dest_format,
-				     VkImageLayout dest_image_layout,
-				     const VkImageResolve2KHR *region)
+void
+radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+                                VkFormat src_format, VkImageLayout src_image_layout,
+                                struct radv_image *dest_image, VkFormat dest_format,
+                                VkImageLayout dest_image_layout, const VkImageResolve2KHR *region)
 {
-	struct radv_meta_saved_state saved_state;
-
-	radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout,
-				    region);
-
-	/* For partial resolves, DCC should be decompressed before resolving
-	 * because the metadata is re-initialized to the uncompressed after.
-	 */
-	uint32_t queue_mask = radv_image_queue_family_mask(dest_image,
-	                                                   cmd_buffer->queue_family_index,
-	                                                   cmd_buffer->queue_family_index);
-
-	if (radv_layout_dcc_compressed(cmd_buffer->device, dest_image,
-				       dest_image_layout, false, queue_mask) &&
-	    (region->dstOffset.x ||
-	     region->dstOffset.y ||
-	     region->dstOffset.z ||
-	     region->extent.width != dest_image->info.width ||
-	     region->extent.height != dest_image->info.height ||
-	     region->extent.depth != dest_image->info.depth)) {
-		radv_decompress_dcc(cmd_buffer, dest_image, &(VkImageSubresourceRange) {
-					.aspectMask = region->dstSubresource.aspectMask,
-					.baseMipLevel = region->dstSubresource.mipLevel,
-					.levelCount = 1,
-					.baseArrayLayer = region->dstSubresource.baseArrayLayer,
-					.layerCount = region->dstSubresource.layerCount,
-				    });
-	}
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_COMPUTE_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
-	assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
-	assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
-
-	const uint32_t src_base_layer =
-		radv_meta_get_iview_layer(src_image, &region->srcSubresource,
-					  &region->srcOffset);
-
-	const uint32_t dest_base_layer =
-		radv_meta_get_iview_layer(dest_image, &region->dstSubresource,
-					  &region->dstOffset);
-
-	const struct VkExtent3D extent =
-		radv_sanitize_image_extent(src_image->type, region->extent);
-	const struct VkOffset3D srcOffset =
-		radv_sanitize_image_offset(src_image->type, region->srcOffset);
-	const struct VkOffset3D dstOffset =
-		radv_sanitize_image_offset(dest_image->type, region->dstOffset);
-
-	for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
-	     ++layer) {
-
-		struct radv_image_view src_iview;
-		radv_image_view_init(&src_iview, cmd_buffer->device,
-				     &(VkImageViewCreateInfo) {
-					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-						     .image = radv_image_to_handle(src_image),
-						     .viewType = radv_meta_get_view_type(src_image),
-						     .format = src_format,
-						     .subresourceRange = {
-						     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-						     .baseMipLevel = region->srcSubresource.mipLevel,
-						     .levelCount = 1,
-						     .baseArrayLayer = src_base_layer + layer,
-						     .layerCount = 1,
-					     },
-				     }, NULL);
-
-		struct radv_image_view dest_iview;
-		radv_image_view_init(&dest_iview, cmd_buffer->device,
-				     &(VkImageViewCreateInfo) {
-					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-						     .image = radv_image_to_handle(dest_image),
-						     .viewType = radv_meta_get_view_type(dest_image),
-						     .format = vk_to_non_srgb_format(dest_format),
-						     .subresourceRange = {
-						     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-						     .baseMipLevel = region->dstSubresource.mipLevel,
-						     .levelCount = 1,
-						     .baseArrayLayer = dest_base_layer + layer,
-						     .layerCount = 1,
-					     },
-				     }, NULL);
-
-		emit_resolve(cmd_buffer,
-			     &src_iview,
-			     &dest_iview,
-			     &(VkOffset2D) {srcOffset.x, srcOffset.y },
-			     &(VkOffset2D) {dstOffset.x, dstOffset.y },
-			     &(VkExtent2D) {extent.width, extent.height });
-	}
-
-	radv_meta_restore(&saved_state, cmd_buffer);
-
-	if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
-	    radv_layout_dcc_compressed(cmd_buffer->device, dest_image,
-				       dest_image_layout, false, queue_mask)) {
-
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-			                        RADV_CMD_FLAG_INV_VCACHE;
-
-		VkImageSubresourceRange range = {
-			.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-			.baseMipLevel = region->dstSubresource.mipLevel,
-			.levelCount = 1,
-			.baseArrayLayer = dest_base_layer,
-			.layerCount = region->dstSubresource.layerCount,
-		};
-
-		cmd_buffer->state.flush_bits |=
-			radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
-	}
+   struct radv_meta_saved_state saved_state;
+
+   radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
+
+   /* For partial resolves, DCC should be decompressed before resolving
+    * because the metadata is re-initialized to the uncompressed after.
+    */
+   uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index,
+                                                      cmd_buffer->queue_family_index);
+
+   if (radv_layout_dcc_compressed(cmd_buffer->device, dest_image, dest_image_layout, false,
+                                  queue_mask) &&
+       (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
+        region->extent.width != dest_image->info.width ||
+        region->extent.height != dest_image->info.height ||
+        region->extent.depth != dest_image->info.depth)) {
+      radv_decompress_dcc(cmd_buffer, dest_image,
+                          &(VkImageSubresourceRange){
+                             .aspectMask = region->dstSubresource.aspectMask,
+                             .baseMipLevel = region->dstSubresource.mipLevel,
+                             .levelCount = 1,
+                             .baseArrayLayer = region->dstSubresource.baseArrayLayer,
+                             .layerCount = region->dstSubresource.layerCount,
+                          });
+   }
+
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+   assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+   assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
+
+   const uint32_t src_base_layer =
+      radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
+
+   const uint32_t dest_base_layer =
+      radv_meta_get_iview_layer(dest_image, &region->dstSubresource, &region->dstOffset);
+
+   const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);
+   const struct VkOffset3D srcOffset =
+      radv_sanitize_image_offset(src_image->type, region->srcOffset);
+   const struct VkOffset3D dstOffset =
+      radv_sanitize_image_offset(dest_image->type, region->dstOffset);
+
+   for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
+
+      struct radv_image_view src_iview;
+      radv_image_view_init(&src_iview, cmd_buffer->device,
+                           &(VkImageViewCreateInfo){
+                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                              .image = radv_image_to_handle(src_image),
+                              .viewType = radv_meta_get_view_type(src_image),
+                              .format = src_format,
+                              .subresourceRange =
+                                 {
+                                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                    .baseMipLevel = region->srcSubresource.mipLevel,
+                                    .levelCount = 1,
+                                    .baseArrayLayer = src_base_layer + layer,
+                                    .layerCount = 1,
+                                 },
+                           },
+                           NULL);
+
+      struct radv_image_view dest_iview;
+      radv_image_view_init(&dest_iview, cmd_buffer->device,
+                           &(VkImageViewCreateInfo){
+                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                              .image = radv_image_to_handle(dest_image),
+                              .viewType = radv_meta_get_view_type(dest_image),
+                              .format = vk_to_non_srgb_format(dest_format),
+                              .subresourceRange =
+                                 {
+                                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                    .baseMipLevel = region->dstSubresource.mipLevel,
+                                    .levelCount = 1,
+                                    .baseArrayLayer = dest_base_layer + layer,
+                                    .layerCount = 1,
+                                 },
+                           },
+                           NULL);
+
+      emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
+                   &(VkOffset2D){dstOffset.x, dstOffset.y},
+                   &(VkExtent2D){extent.width, extent.height});
+   }
+
+   radv_meta_restore(&saved_state, cmd_buffer);
+
+   if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
+       radv_layout_dcc_compressed(cmd_buffer->device, dest_image, dest_image_layout, false,
+                                  queue_mask)) {
+
+      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
+
+      VkImageSubresourceRange range = {
+         .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+         .baseMipLevel = region->dstSubresource.mipLevel,
+         .levelCount = 1,
+         .baseArrayLayer = dest_base_layer,
+         .layerCount = region->dstSubresource.layerCount,
+      };
+
+      cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
+   }
 }
 
 /**
@@ -886,174 +783,164 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 void
 radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	struct radv_subpass_barrier barrier;
-	uint32_t layer_count = fb->layers;
-
-	if (subpass->view_mask)
-		layer_count = util_last_bit(subpass->view_mask);
-
-	/* Resolves happen before the end-of-subpass barriers get executed, so
-	 * we have to make the attachment shader-readable.
-	 */
-	barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
-	barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
-	barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT |
-	                          VK_ACCESS_SHADER_WRITE_BIT;
-	radv_subpass_barrier(cmd_buffer, &barrier);
-
-	for (uint32_t i = 0; i < subpass->color_count; ++i) {
-		struct radv_subpass_attachment src_att = subpass->color_attachments[i];
-		struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
-
-		if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
-			continue;
-
-		struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
-		struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
-
-		VkImageResolve2KHR region = {
-			.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
-			.extent = (VkExtent3D){ fb->width, fb->height, 1 },
-			.srcSubresource = (VkImageSubresourceLayers) {
-				.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-				.mipLevel = src_iview->base_mip,
-				.baseArrayLayer = src_iview->base_layer,
-				.layerCount = layer_count,
-			},
-			.dstSubresource = (VkImageSubresourceLayers) {
-				.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-				.mipLevel = dst_iview->base_mip,
-				.baseArrayLayer = dst_iview->base_layer,
-				.layerCount = layer_count,
-			},
-			.srcOffset = (VkOffset3D){ 0, 0, 0 },
-			.dstOffset = (VkOffset3D){ 0, 0, 0 },
-		};
-
-		radv_meta_resolve_compute_image(cmd_buffer,
-						src_iview->image,
-						src_iview->vk_format,
-						src_att.layout,
-						dst_iview->image,
-						dst_iview->vk_format,
-						dst_att.layout,
-						&region);
-	}
-
-	cmd_buffer->state.flush_bits |=
-		RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-		RADV_CMD_FLAG_INV_VCACHE |
-		radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
+   struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   struct radv_subpass_barrier barrier;
+   uint32_t layer_count = fb->layers;
+
+   if (subpass->view_mask)
+      layer_count = util_last_bit(subpass->view_mask);
+
+   /* Resolves happen before the end-of-subpass barriers get executed, so
+    * we have to make the attachment shader-readable.
+    */
+   barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+   barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+   barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+   radv_subpass_barrier(cmd_buffer, &barrier);
+
+   for (uint32_t i = 0; i < subpass->color_count; ++i) {
+      struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+      struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
+
+      if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+      struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
+
+      VkImageResolve2KHR region = {
+         .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
+         .extent = (VkExtent3D){fb->width, fb->height, 1},
+         .srcSubresource =
+            (VkImageSubresourceLayers){
+               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+               .mipLevel = src_iview->base_mip,
+               .baseArrayLayer = src_iview->base_layer,
+               .layerCount = layer_count,
+            },
+         .dstSubresource =
+            (VkImageSubresourceLayers){
+               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+               .mipLevel = dst_iview->base_mip,
+               .baseArrayLayer = dst_iview->base_layer,
+               .layerCount = layer_count,
+            },
+         .srcOffset = (VkOffset3D){0, 0, 0},
+         .dstOffset = (VkOffset3D){0, 0, 0},
+      };
+
+      radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk_format,
+                                      src_att.layout, dst_iview->image, dst_iview->vk_format,
+                                      dst_att.layout, &region);
+   }
+
+   cmd_buffer->state.flush_bits |=
+      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+      radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
 }
 
 void
 radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
-				      VkImageAspectFlags aspects,
-				      VkResolveModeFlagBits resolve_mode)
+                                      VkImageAspectFlags aspects,
+                                      VkResolveModeFlagBits resolve_mode)
 {
-	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	struct radv_meta_saved_state saved_state;
-	uint32_t layer_count = fb->layers;
-
-	if (subpass->view_mask)
-		layer_count = util_last_bit(subpass->view_mask);
-
-	/* Resolves happen before the end-of-subpass barriers get executed, so
-	 * we have to make the attachment shader-readable.
-	 */
-	cmd_buffer->state.flush_bits |=
-		radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
-		radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, NULL) |
-		radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
-
-	struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
-	struct radv_image_view *src_iview =
-		cmd_buffer->state.attachments[src_att.attachment].iview;
-	struct radv_image *src_image = src_iview->image;
-
-	VkImageResolve2KHR region = {0};
-	region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
-	region.srcSubresource.aspectMask = aspects;
-	region.srcSubresource.mipLevel = 0;
-	region.srcSubresource.baseArrayLayer = src_iview->base_layer;
-	region.srcSubresource.layerCount = layer_count;
-
-	radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_COMPUTE_PIPELINE |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
-	struct radv_image_view *dst_iview =
-		cmd_buffer->state.attachments[dest_att.attachment].iview;
-	struct radv_image *dst_image = dst_iview->image;
-
-	struct radv_image_view tsrc_iview;
-	radv_image_view_init(&tsrc_iview, cmd_buffer->device,
-			     &(VkImageViewCreateInfo) {
-					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-					.image = radv_image_to_handle(src_image),
-					.viewType = radv_meta_get_view_type(src_image),
-					.format = src_iview->vk_format,
-					.subresourceRange = {
-						.aspectMask = aspects,
-						.baseMipLevel = src_iview->base_mip,
-						.levelCount = 1,
-						.baseArrayLayer = src_iview->base_layer,
-						.layerCount = layer_count,
-					},
-			     }, NULL);
-
-	struct radv_image_view tdst_iview;
-	radv_image_view_init(&tdst_iview, cmd_buffer->device,
-			     &(VkImageViewCreateInfo) {
-					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-					.image = radv_image_to_handle(dst_image),
-					.viewType = radv_meta_get_view_type(dst_image),
-					.format = dst_iview->vk_format,
-					.subresourceRange = {
-						.aspectMask = aspects,
-						.baseMipLevel = dst_iview->base_mip,
-						.levelCount = 1,
-						.baseArrayLayer = dst_iview->base_layer,
-						.layerCount = layer_count,
-					},
-			     }, NULL);
-
-	emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
-				   &(VkExtent3D) { fb->width, fb->height, layer_count },
-				   aspects, resolve_mode);
-
-	cmd_buffer->state.flush_bits |=
-		RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-		RADV_CMD_FLAG_INV_VCACHE |
-		radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
-
-	VkImageLayout layout =
-		cmd_buffer->state.attachments[dest_att.attachment].current_layout;
-	uint32_t queue_mask = radv_image_queue_family_mask(dst_image,
-							   cmd_buffer->queue_family_index,
-							   cmd_buffer->queue_family_index);
-
-	if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image,
-					    layout, false, queue_mask)) {
-		VkImageSubresourceRange range = {0};
-		range.aspectMask = aspects;
-		range.baseMipLevel = dst_iview->base_mip;
-		range.levelCount = 1;
-		range.baseArrayLayer = dst_iview->base_layer;
-		range.layerCount = layer_count;
-
-		uint32_t htile_value =
-			radv_get_htile_initial_value(cmd_buffer->device, dst_image);
-
-		cmd_buffer->state.flush_bits |=
-			radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
-	}
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   struct radv_meta_saved_state saved_state;
+   uint32_t layer_count = fb->layers;
+
+   if (subpass->view_mask)
+      layer_count = util_last_bit(subpass->view_mask);
+
+   /* Resolves happen before the end-of-subpass barriers get executed, so
+    * we have to make the attachment shader-readable.
+    */
+   cmd_buffer->state.flush_bits |=
+      radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
+      radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, NULL) |
+      radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
+
+   struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+   struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+   struct radv_image *src_image = src_iview->image;
+
+   VkImageResolve2KHR region = {0};
+   region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
+   region.srcSubresource.aspectMask = aspects;
+   region.srcSubresource.mipLevel = 0;
+   region.srcSubresource.baseArrayLayer = src_iview->base_layer;
+   region.srcSubresource.layerCount = layer_count;
+
+   radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
+
+   radv_meta_save(&saved_state, cmd_buffer,
+                  RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
+
+   struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
+   struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
+   struct radv_image *dst_image = dst_iview->image;
+
+   struct radv_image_view tsrc_iview;
+   radv_image_view_init(&tsrc_iview, cmd_buffer->device,
+                        &(VkImageViewCreateInfo){
+                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                           .image = radv_image_to_handle(src_image),
+                           .viewType = radv_meta_get_view_type(src_image),
+                           .format = src_iview->vk_format,
+                           .subresourceRange =
+                              {
+                                 .aspectMask = aspects,
+                                 .baseMipLevel = src_iview->base_mip,
+                                 .levelCount = 1,
+                                 .baseArrayLayer = src_iview->base_layer,
+                                 .layerCount = layer_count,
+                              },
+                        },
+                        NULL);
+
+   struct radv_image_view tdst_iview;
+   radv_image_view_init(&tdst_iview, cmd_buffer->device,
+                        &(VkImageViewCreateInfo){
+                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                           .image = radv_image_to_handle(dst_image),
+                           .viewType = radv_meta_get_view_type(dst_image),
+                           .format = dst_iview->vk_format,
+                           .subresourceRange =
+                              {
+                                 .aspectMask = aspects,
+                                 .baseMipLevel = dst_iview->base_mip,
+                                 .levelCount = 1,
+                                 .baseArrayLayer = dst_iview->base_layer,
+                                 .layerCount = layer_count,
+                              },
+                        },
+                        NULL);
+
+   emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
+                              &(VkExtent3D){fb->width, fb->height, layer_count}, aspects,
+                              resolve_mode);
+
+   cmd_buffer->state.flush_bits |=
+      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+      radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
+
+   VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;
+   uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index,
+                                                      cmd_buffer->queue_family_index);
+
+   if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) {
+      VkImageSubresourceRange range = {0};
+      range.aspectMask = aspects;
+      range.baseMipLevel = dst_iview->base_mip;
+      range.levelCount = 1;
+      range.baseArrayLayer = dst_iview->base_layer;
+      range.layerCount = layer_count;
+
+      uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);
+
+      cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
+   }
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c b/src/amd/vulkan/radv_meta_resolve_fs.c
index eae098e1a6b..d926bf62775 100644
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -21,1165 +21,1094 @@
  * IN THE SOFTWARE.
  */
 
-
 #include <assert.h>
 #include <stdbool.h>
 
+#include "nir/nir_builder.h"
 #include "radv_meta.h"
 #include "radv_private.h"
-#include "nir/nir_builder.h"
 #include "sid.h"
 #include "vk_format.h"
 
 static nir_shader *
 build_nir_vertex_shader(void)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_resolve_vs");
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_resolve_vs");
 
-	nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
-						    vec4, "gl_Position");
-	pos_out->data.location = VARYING_SLOT_POS;
+   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
+   pos_out->data.location = VARYING_SLOT_POS;
 
-	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+   nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
 
-	nir_store_var(&b, pos_out, outvec, 0xf);
-	return b.shader;
+   nir_store_var(&b, pos_out, outvec, 0xf);
+   return b.shader;
 }
 
 static nir_shader *
 build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samples)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
-								 false,
-								 false,
-								 GLSL_TYPE_FLOAT);
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *sampler_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
 
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float");
+   nir_builder b = nir_builder_init_simple_shader(
+      MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float");
 
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      sampler_type, "s_tex");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
 
-	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
-						      vec4, "f_color");
-	color_out->data.location = FRAG_RESULT_DATA0;
+   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+   color_out->data.location = FRAG_RESULT_DATA0;
 
-	nir_ssa_def *pos_in = nir_channels(&b, nir_load_frag_coord(&b), 0x3);
-	nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), 0, 8);
+   nir_ssa_def *pos_in = nir_channels(&b, nir_load_frag_coord(&b), 0x3);
+   nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), 0, 8);
 
-	nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
+   nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
 
-	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, src_offset), 0x3);
-	nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
+   nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, src_offset), 0x3);
+   nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
 
-	radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
-	                                    color, img_coord);
+   radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, img_coord);
 
-	nir_ssa_def *outval = nir_load_var(&b, color);
-	nir_store_var(&b, color_out, outval, 0xf);
-	return b.shader;
+   nir_ssa_def *outval = nir_load_var(&b, color);
+   nir_store_var(&b, color_out, outval, 0xf);
+   return b.shader;
 }
 
-
 static VkResult
 create_layout(struct radv_device *device)
 {
-	VkResult result;
-	/*
-	 * one descriptors for the image being sampled
-	 */
-	VkDescriptorSetLayoutCreateInfo ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 1,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.resolve_fragment.ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-
-	VkPipelineLayoutCreateInfo pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.resolve_fragment.ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.resolve_fragment.p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-	return VK_SUCCESS;
+   VkResult result;
+   /*
+    * one descriptors for the image being sampled
+    */
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 1,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.resolve_fragment.ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.resolve_fragment.ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8},
+   };
+
+   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                      &device->meta_state.alloc,
+                                      &device->meta_state.resolve_fragment.p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+   return VK_SUCCESS;
 fail:
-	return result;
+   return result;
 }
 
 static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
-	.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-	.vertexBindingDescriptionCount = 0,
-	.vertexAttributeDescriptionCount = 0,
+   .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+   .vertexBindingDescriptionCount = 0,
+   .vertexAttributeDescriptionCount = 0,
 };
 
 static VkResult
-create_resolve_pipeline(struct radv_device *device,
-			int samples_log2,
-			VkFormat format)
+create_resolve_pipeline(struct radv_device *device, int samples_log2, VkFormat format)
 {
-	mtx_lock(&device->meta_state.mtx);
-
-	unsigned fs_key = radv_format_meta_fs_key(device, format);
-	VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
-	if (*pipeline) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	VkResult result;
-	bool is_integer = false;
-	uint32_t samples = 1 << samples_log2;
-	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
-	vi_create_info = &normal_vi_create_info;
-	if (vk_format_is_int(format))
-		is_integer = true;
-
-	nir_shader *fs = build_resolve_fragment_shader(device, is_integer, samples);
-	nir_shader *vs = build_nir_vertex_shader();
-
-	VkRenderPass *rp = &device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][0];
-
-	assert(!*rp);
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-		{
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_VERTEX_BIT,
-			.module = vk_shader_module_handle_from_nir(vs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		}, {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-			.module = vk_shader_module_handle_from_nir(fs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		},
-	};
-
-
-	for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
-		VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
-		result = radv_CreateRenderPass2(radv_device_to_handle(device),
-					&(VkRenderPassCreateInfo2) {
-						.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-						.attachmentCount = 1,
-						.pAttachments = &(VkAttachmentDescription2) {
-							.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-							.format = format,
-							.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-							.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-							.initialLayout = layout,
-							.finalLayout = layout,
-						},
-						.subpassCount = 1,
-						.pSubpasses = &(VkSubpassDescription2) {
-							.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-							.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-							.inputAttachmentCount = 0,
-							.colorAttachmentCount = 1,
-							.pColorAttachments = &(VkAttachmentReference2) {
-								.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-								.attachment = 0,
-								.layout = layout,
-							},
-						.pResolveAttachments = NULL,
-						.pDepthStencilAttachment = &(VkAttachmentReference2) {
-							.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-							.attachment = VK_ATTACHMENT_UNUSED,
-							.layout = VK_IMAGE_LAYOUT_GENERAL,
-						},
-						.preserveAttachmentCount = 0,
-						.pPreserveAttachments = NULL,
-					},
-					.dependencyCount = 2,
-					.pDependencies = (VkSubpassDependency2[]) {
-						{
-							.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-							.srcSubpass = VK_SUBPASS_EXTERNAL,
-							.dstSubpass = 0,
-							.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-							.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-							.srcAccessMask = 0,
-							.dstAccessMask = 0,
-							.dependencyFlags = 0
-						},
-						{
-							.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-							.srcSubpass = 0,
-							.dstSubpass = VK_SUBPASS_EXTERNAL,
-							.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-							.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-							.srcAccessMask = 0,
-							.dstAccessMask = 0,
-							.dependencyFlags = 0
-						}
-					},
-				}, &device->meta_state.alloc, rp + dst_layout);
-	}
-
-
-	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
-		.pStages = pipeline_shader_stages,
-		.pVertexInputState = vi_create_info,
-		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-			.primitiveRestartEnable = false,
-		},
-		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-			.viewportCount = 1,
-			.scissorCount = 1,
-		},
-		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-			.rasterizerDiscardEnable = false,
-			.polygonMode = VK_POLYGON_MODE_FILL,
-			.cullMode = VK_CULL_MODE_NONE,
-			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-		},
-		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.rasterizationSamples = 1,
-			.sampleShadingEnable = false,
-			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-		},
-		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-			.attachmentCount = 1,
-			.pAttachments = (VkPipelineColorBlendAttachmentState []) {
-				{ .colorWriteMask =
-				  VK_COLOR_COMPONENT_A_BIT |
-				  VK_COLOR_COMPONENT_R_BIT |
-				  VK_COLOR_COMPONENT_G_BIT |
-				  VK_COLOR_COMPONENT_B_BIT },
-			}
-		},
-		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-			.dynamicStateCount = 9,
-			.pDynamicStates = (VkDynamicState[]) {
-				VK_DYNAMIC_STATE_VIEWPORT,
-				VK_DYNAMIC_STATE_SCISSOR,
-				VK_DYNAMIC_STATE_LINE_WIDTH,
-				VK_DYNAMIC_STATE_DEPTH_BIAS,
-				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-				VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
-				VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
-				VK_DYNAMIC_STATE_STENCIL_REFERENCE,
-			},
-		},
-		.flags = 0,
-		.layout = device->meta_state.resolve_fragment.p_layout,
-		.renderPass = *rp,
-		.subpass = 0,
-	};
-
-	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-		.use_rectlist = true
-	};
-
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc,
-					       pipeline);
-	ralloc_free(vs);
-	ralloc_free(fs);
-
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   mtx_lock(&device->meta_state.mtx);
+
+   unsigned fs_key = radv_format_meta_fs_key(device, format);
+   VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+   if (*pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   VkResult result;
+   bool is_integer = false;
+   uint32_t samples = 1 << samples_log2;
+   const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+   vi_create_info = &normal_vi_create_info;
+   if (vk_format_is_int(format))
+      is_integer = true;
+
+   nir_shader *fs = build_resolve_fragment_shader(device, is_integer, samples);
+   nir_shader *vs = build_nir_vertex_shader();
+
+   VkRenderPass *rp = &device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][0];
+
+   assert(!*rp);
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_VERTEX_BIT,
+       .module = vk_shader_module_handle_from_nir(vs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+       .module = vk_shader_module_handle_from_nir(fs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+   };
+
+   for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
+      VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
+      result = radv_CreateRenderPass2(
+         radv_device_to_handle(device),
+         &(VkRenderPassCreateInfo2){
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+            .attachmentCount = 1,
+            .pAttachments =
+               &(VkAttachmentDescription2){
+                  .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                  .format = format,
+                  .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                  .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                  .initialLayout = layout,
+                  .finalLayout = layout,
+               },
+            .subpassCount = 1,
+            .pSubpasses =
+               &(VkSubpassDescription2){
+                  .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+                  .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                  .inputAttachmentCount = 0,
+                  .colorAttachmentCount = 1,
+                  .pColorAttachments =
+                     &(VkAttachmentReference2){
+                        .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                        .attachment = 0,
+                        .layout = layout,
+                     },
+                  .pResolveAttachments = NULL,
+                  .pDepthStencilAttachment =
+                     &(VkAttachmentReference2){
+                        .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                        .attachment = VK_ATTACHMENT_UNUSED,
+                        .layout = VK_IMAGE_LAYOUT_GENERAL,
+                     },
+                  .preserveAttachmentCount = 0,
+                  .pPreserveAttachments = NULL,
+               },
+            .dependencyCount = 2,
+            .pDependencies =
+               (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                         .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                         .dstSubpass = 0,
+                                         .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                         .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                         .srcAccessMask = 0,
+                                         .dstAccessMask = 0,
+                                         .dependencyFlags = 0},
+                                        {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                         .srcSubpass = 0,
+                                         .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                         .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                         .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                         .srcAccessMask = 0,
+                                         .dstAccessMask = 0,
+                                         .dependencyFlags = 0}},
+         },
+         &device->meta_state.alloc, rp + dst_layout);
+   }
+
+   const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+      .pStages = pipeline_shader_stages,
+      .pVertexInputState = vi_create_info,
+      .pInputAssemblyState =
+         &(VkPipelineInputAssemblyStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+            .primitiveRestartEnable = false,
+         },
+      .pViewportState =
+         &(VkPipelineViewportStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+            .viewportCount = 1,
+            .scissorCount = 1,
+         },
+      .pRasterizationState =
+         &(VkPipelineRasterizationStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+            .rasterizerDiscardEnable = false,
+            .polygonMode = VK_POLYGON_MODE_FILL,
+            .cullMode = VK_CULL_MODE_NONE,
+            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+      .pMultisampleState =
+         &(VkPipelineMultisampleStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+            .rasterizationSamples = 1,
+            .sampleShadingEnable = false,
+            .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+         },
+      .pColorBlendState =
+         &(VkPipelineColorBlendStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+            .attachmentCount = 1,
+            .pAttachments =
+               (VkPipelineColorBlendAttachmentState[]){
+                  {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
+                                     VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+               }},
+      .pDynamicState =
+         &(VkPipelineDynamicStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+            .dynamicStateCount = 9,
+            .pDynamicStates =
+               (VkDynamicState[]){
+                  VK_DYNAMIC_STATE_VIEWPORT,
+                  VK_DYNAMIC_STATE_SCISSOR,
+                  VK_DYNAMIC_STATE_LINE_WIDTH,
+                  VK_DYNAMIC_STATE_DEPTH_BIAS,
+                  VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+                  VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+                  VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+                  VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+                  VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+               },
+         },
+      .flags = 0,
+      .layout = device->meta_state.resolve_fragment.p_layout,
+      .renderPass = *rp,
+      .subpass = 0,
+   };
+
+   const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+   result = radv_graphics_pipeline_create(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc, pipeline);
+   ralloc_free(vs);
+   ralloc_free(fs);
+
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
-enum {
-	DEPTH_RESOLVE,
-	STENCIL_RESOLVE
-};
+enum { DEPTH_RESOLVE, STENCIL_RESOLVE };
 
 static const char *
 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
 {
-	switch (resolve_mode) {
-	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
-		return "zero";
-	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
-		return "average";
-	case VK_RESOLVE_MODE_MIN_BIT_KHR:
-		return "min";
-	case VK_RESOLVE_MODE_MAX_BIT_KHR:
-		return "max";
-	default:
-		unreachable("invalid resolve mode");
-	}
+   switch (resolve_mode) {
+   case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+      return "zero";
+   case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+      return "average";
+   case VK_RESOLVE_MODE_MIN_BIT_KHR:
+      return "min";
+   case VK_RESOLVE_MODE_MAX_BIT_KHR:
+      return "max";
+   default:
+      unreachable("invalid resolve mode");
+   }
 }
 
 static nir_shader *
-build_depth_stencil_resolve_fragment_shader(struct radv_device *dev, int samples,
-					    int index,
-					    VkResolveModeFlagBits resolve_mode)
+build_depth_stencil_resolve_fragment_shader(struct radv_device *dev, int samples, int index,
+                                            VkResolveModeFlagBits resolve_mode)
 {
-	const struct glsl_type *vec4 = glsl_vec4_type();
-	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
-								 false,
-								 false,
-								 GLSL_TYPE_FLOAT);
-
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
-						       "meta_resolve_fs_%s-%s-%d",
-						       index == DEPTH_RESOLVE ? "depth" : "stencil",
-						       get_resolve_mode_str(resolve_mode), samples);
-
-	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-						      sampler_type, "s_tex");
-	input_img->data.descriptor_set = 0;
-	input_img->data.binding = 0;
-
-	nir_variable *fs_out = nir_variable_create(b.shader,
-						   nir_var_shader_out, vec4,
-						   "f_out");
-	fs_out->data.location =
-		index == DEPTH_RESOLVE ? FRAG_RESULT_DEPTH : FRAG_RESULT_STENCIL;
-
-	nir_ssa_def *pos_in = nir_channels(&b, nir_load_frag_coord(&b), 0x3);
-
-	nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
-
-	nir_ssa_def *img_coord = nir_channels(&b, pos_int, 0x3);
-
-	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
-	nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
-
-	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
-	tex->op = nir_texop_txf_ms;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(img_coord);
-	tex->src[1].src_type = nir_tex_src_ms_index;
-	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-	tex->src[2].src_type = nir_tex_src_texture_deref;
-	tex->src[2].src = nir_src_for_ssa(input_img_deref);
-	tex->dest_type = type;
-	tex->is_array = false;
-	tex->coord_components = 2;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(&b, &tex->instr);
-
-	nir_ssa_def *outval = &tex->dest.ssa;
-
-	if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
-		for (int i = 1; i < samples; i++) {
-			nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
-			tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
-			tex_add->op = nir_texop_txf_ms;
-			tex_add->src[0].src_type = nir_tex_src_coord;
-			tex_add->src[0].src = nir_src_for_ssa(img_coord);
-			tex_add->src[1].src_type = nir_tex_src_ms_index;
-			tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
-			tex_add->src[2].src_type = nir_tex_src_texture_deref;
-			tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
-			tex_add->dest_type = type;
-			tex_add->is_array = false;
-			tex_add->coord_components = 2;
-
-			nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
-			nir_builder_instr_insert(&b, &tex_add->instr);
-
-			switch (resolve_mode) {
-			case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
-				assert(index == DEPTH_RESOLVE);
-				outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
-				break;
-			case VK_RESOLVE_MODE_MIN_BIT_KHR:
-				if (index == DEPTH_RESOLVE)
-					outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
-				else
-					outval = nir_umin(&b, outval, &tex_add->dest.ssa);
-				break;
-			case VK_RESOLVE_MODE_MAX_BIT_KHR:
-				if (index == DEPTH_RESOLVE)
-					outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
-				else
-					outval = nir_umax(&b, outval, &tex_add->dest.ssa);
-				break;
-			default:
-				unreachable("invalid resolve mode");
-			}
-		}
-
-		if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
-			outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
-	}
-
-	nir_store_var(&b, fs_out, outval, 0x1);
-
-	return b.shader;
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *sampler_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
+
+   nir_builder b = nir_builder_init_simple_shader(
+      MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs_%s-%s-%d",
+      index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
+
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *fs_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_out");
+   fs_out->data.location = index == DEPTH_RESOLVE ? FRAG_RESULT_DEPTH : FRAG_RESULT_STENCIL;
+
+   nir_ssa_def *pos_in = nir_channels(&b, nir_load_frag_coord(&b), 0x3);
+
+   nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
+
+   nir_ssa_def *img_coord = nir_channels(&b, pos_int, 0x3);
+
+   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+   nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+   tex->op = nir_texop_txf_ms;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(img_coord);
+   tex->src[1].src_type = nir_tex_src_ms_index;
+   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+   tex->src[2].src_type = nir_tex_src_texture_deref;
+   tex->src[2].src = nir_src_for_ssa(input_img_deref);
+   tex->dest_type = type;
+   tex->is_array = false;
+   tex->coord_components = 2;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(&b, &tex->instr);
+
+   nir_ssa_def *outval = &tex->dest.ssa;
+
+   if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
+      for (int i = 1; i < samples; i++) {
+         nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
+         tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+         tex_add->op = nir_texop_txf_ms;
+         tex_add->src[0].src_type = nir_tex_src_coord;
+         tex_add->src[0].src = nir_src_for_ssa(img_coord);
+         tex_add->src[1].src_type = nir_tex_src_ms_index;
+         tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+         tex_add->src[2].src_type = nir_tex_src_texture_deref;
+         tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+         tex_add->dest_type = type;
+         tex_add->is_array = false;
+         tex_add->coord_components = 2;
+
+         nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+         nir_builder_instr_insert(&b, &tex_add->instr);
+
+         switch (resolve_mode) {
+         case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+            assert(index == DEPTH_RESOLVE);
+            outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
+            break;
+         case VK_RESOLVE_MODE_MIN_BIT_KHR:
+            if (index == DEPTH_RESOLVE)
+               outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
+            else
+               outval = nir_umin(&b, outval, &tex_add->dest.ssa);
+            break;
+         case VK_RESOLVE_MODE_MAX_BIT_KHR:
+            if (index == DEPTH_RESOLVE)
+               outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
+            else
+               outval = nir_umax(&b, outval, &tex_add->dest.ssa);
+            break;
+         default:
+            unreachable("invalid resolve mode");
+         }
+      }
+
+      if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
+         outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
+   }
+
+   nir_store_var(&b, fs_out, outval, 0x1);
+
+   return b.shader;
 }
 
 static VkResult
-create_depth_stencil_resolve_pipeline(struct radv_device *device,
-				      int samples_log2,
-				      int index,
-				      VkResolveModeFlagBits resolve_mode)
+create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples_log2, int index,
+                                      VkResolveModeFlagBits resolve_mode)
 {
-	VkRenderPass *render_pass;
-	VkPipeline *pipeline;
-	VkFormat src_format;
-	VkResult result;
-
-	mtx_lock(&device->meta_state.mtx);
-
-	switch (resolve_mode) {
-	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
-		if (index == DEPTH_RESOLVE)
-			pipeline = &device->meta_state.resolve_fragment.depth_zero_pipeline;
-		else
-			pipeline = &device->meta_state.resolve_fragment.stencil_zero_pipeline;
-		break;
-	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
-		assert(index == DEPTH_RESOLVE);
-		pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].average_pipeline;
-		break;
-	case VK_RESOLVE_MODE_MIN_BIT_KHR:
-		if (index == DEPTH_RESOLVE)
-			pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].min_pipeline;
-		else
-			pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].min_pipeline;
-		break;
-	case VK_RESOLVE_MODE_MAX_BIT_KHR:
-		if (index == DEPTH_RESOLVE)
-			pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].max_pipeline;
-		else
-			pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].max_pipeline;
-		break;
-	default:
-		unreachable("invalid resolve mode");
-	}
-
-	if (*pipeline) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-
-	uint32_t samples = 1 << samples_log2;
-	nir_shader *fs = build_depth_stencil_resolve_fragment_shader(device, samples, index, resolve_mode);
-	nir_shader *vs = build_nir_vertex_shader();
-
-	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-		{
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_VERTEX_BIT,
-			.module = vk_shader_module_handle_from_nir(vs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		}, {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-			.module = vk_shader_module_handle_from_nir(fs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		},
-	};
-
-	if (index == DEPTH_RESOLVE) {
-		src_format = VK_FORMAT_D32_SFLOAT;
-		render_pass = &device->meta_state.resolve_fragment.depth_render_pass;
-	} else {
-		render_pass = &device->meta_state.resolve_fragment.stencil_render_pass;
-		src_format = VK_FORMAT_S8_UINT;
-	}
-
-	if (!*render_pass) {
-		result = radv_CreateRenderPass2(radv_device_to_handle(device),
-						&(VkRenderPassCreateInfo2) {
-							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
-							.attachmentCount = 1,
-							.pAttachments = &(VkAttachmentDescription2) {
-								.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
-								.format = src_format,
-								.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
-								.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
-								.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-								.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
-								.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-								.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-							},
-							.subpassCount = 1,
-							.pSubpasses = &(VkSubpassDescription2) {
-								.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
-								.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-								.inputAttachmentCount = 0,
-								.colorAttachmentCount = 0,
-								.pColorAttachments = NULL,
-							.pResolveAttachments = NULL,
-							.pDepthStencilAttachment = &(VkAttachmentReference2) {
-								.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
-								.attachment = 0,
-								.layout = VK_IMAGE_LAYOUT_GENERAL,
-							},
-							.preserveAttachmentCount = 0,
-							.pPreserveAttachments = NULL,
-						},
-						.dependencyCount = 2,
-						.pDependencies = (VkSubpassDependency2[]) {
-							{
-								.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-								.srcSubpass = VK_SUBPASS_EXTERNAL,
-								.dstSubpass = 0,
-								.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-								.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-								.srcAccessMask = 0,
-								.dstAccessMask = 0,
-								.dependencyFlags = 0
-							},
-							{
-								.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-								.srcSubpass = 0,
-								.dstSubpass = VK_SUBPASS_EXTERNAL,
-								.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-								.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-								.srcAccessMask = 0,
-								.dstAccessMask = 0,
-								.dependencyFlags = 0
-							}
-						},
-					}, &device->meta_state.alloc, render_pass);
-	}
-
-	VkStencilOp stencil_op =
-		index == DEPTH_RESOLVE ? VK_STENCIL_OP_KEEP : VK_STENCIL_OP_REPLACE;
-
-	VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-		.depthTestEnable = true,
-		.depthWriteEnable = index == DEPTH_RESOLVE,
-		.stencilTestEnable = index == STENCIL_RESOLVE,
-		.depthCompareOp = VK_COMPARE_OP_ALWAYS,
-		.front = {
-			.failOp = stencil_op,
-			.passOp = stencil_op,
-			.depthFailOp = stencil_op,
-			.compareOp = VK_COMPARE_OP_ALWAYS,
-		},
-		.back = {
-			.failOp = stencil_op,
-			.passOp = stencil_op,
-			.depthFailOp = stencil_op,
-			.compareOp = VK_COMPARE_OP_ALWAYS,
-		}
-	};
-
-	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
-	vi_create_info = &normal_vi_create_info;
-
-	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
-		.pStages = pipeline_shader_stages,
-		.pVertexInputState = vi_create_info,
-		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-			.primitiveRestartEnable = false,
-		},
-		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-			.viewportCount = 1,
-			.scissorCount = 1,
-		},
-		.pDepthStencilState = &depth_stencil_state,
-		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-			.rasterizerDiscardEnable = false,
-			.polygonMode = VK_POLYGON_MODE_FILL,
-			.cullMode = VK_CULL_MODE_NONE,
-			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-		},
-		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.rasterizationSamples = 1,
-			.sampleShadingEnable = false,
-			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-		},
-		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-			.attachmentCount = 0,
-			.pAttachments = (VkPipelineColorBlendAttachmentState []) {
-				{ .colorWriteMask =
-				  VK_COLOR_COMPONENT_A_BIT |
-				  VK_COLOR_COMPONENT_R_BIT |
-				  VK_COLOR_COMPONENT_G_BIT |
-				  VK_COLOR_COMPONENT_B_BIT },
-			}
-		},
-		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-			.dynamicStateCount = 9,
-			.pDynamicStates = (VkDynamicState[]) {
-				VK_DYNAMIC_STATE_VIEWPORT,
-				VK_DYNAMIC_STATE_SCISSOR,
-				VK_DYNAMIC_STATE_LINE_WIDTH,
-				VK_DYNAMIC_STATE_DEPTH_BIAS,
-				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-				VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
-				VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
-				VK_DYNAMIC_STATE_STENCIL_REFERENCE,
-			},
-		},
-		.flags = 0,
-		.layout = device->meta_state.resolve_fragment.p_layout,
-		.renderPass = *render_pass,
-		.subpass = 0,
-	};
-
-	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-		.use_rectlist = true
-	};
-
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc,
-					       pipeline);
-
-	ralloc_free(vs);
-	ralloc_free(fs);
-
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   VkRenderPass *render_pass;
+   VkPipeline *pipeline;
+   VkFormat src_format;
+   VkResult result;
+
+   mtx_lock(&device->meta_state.mtx);
+
+   switch (resolve_mode) {
+   case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+      if (index == DEPTH_RESOLVE)
+         pipeline = &device->meta_state.resolve_fragment.depth_zero_pipeline;
+      else
+         pipeline = &device->meta_state.resolve_fragment.stencil_zero_pipeline;
+      break;
+   case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+      assert(index == DEPTH_RESOLVE);
+      pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].average_pipeline;
+      break;
+   case VK_RESOLVE_MODE_MIN_BIT_KHR:
+      if (index == DEPTH_RESOLVE)
+         pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].min_pipeline;
+      else
+         pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].min_pipeline;
+      break;
+   case VK_RESOLVE_MODE_MAX_BIT_KHR:
+      if (index == DEPTH_RESOLVE)
+         pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].max_pipeline;
+      else
+         pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].max_pipeline;
+      break;
+   default:
+      unreachable("invalid resolve mode");
+   }
+
+   if (*pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   uint32_t samples = 1 << samples_log2;
+   nir_shader *fs =
+      build_depth_stencil_resolve_fragment_shader(device, samples, index, resolve_mode);
+   nir_shader *vs = build_nir_vertex_shader();
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_VERTEX_BIT,
+       .module = vk_shader_module_handle_from_nir(vs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+      {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+       .module = vk_shader_module_handle_from_nir(fs),
+       .pName = "main",
+       .pSpecializationInfo = NULL},
+   };
+
+   if (index == DEPTH_RESOLVE) {
+      src_format = VK_FORMAT_D32_SFLOAT;
+      render_pass = &device->meta_state.resolve_fragment.depth_render_pass;
+   } else {
+      render_pass = &device->meta_state.resolve_fragment.stencil_render_pass;
+      src_format = VK_FORMAT_S8_UINT;
+   }
+
+   if (!*render_pass) {
+      result = radv_CreateRenderPass2(
+         radv_device_to_handle(device),
+         &(VkRenderPassCreateInfo2){
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+            .attachmentCount = 1,
+            .pAttachments =
+               &(VkAttachmentDescription2){
+                  .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+                  .format = src_format,
+                  .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+                  .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+                  .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                  .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+                  .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+                  .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+               },
+            .subpassCount = 1,
+            .pSubpasses =
+               &(VkSubpassDescription2){
+                  .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+                  .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                  .inputAttachmentCount = 0,
+                  .colorAttachmentCount = 0,
+                  .pColorAttachments = NULL,
+                  .pResolveAttachments = NULL,
+                  .pDepthStencilAttachment =
+                     &(VkAttachmentReference2){
+                        .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+                        .attachment = 0,
+                        .layout = VK_IMAGE_LAYOUT_GENERAL,
+                     },
+                  .preserveAttachmentCount = 0,
+                  .pPreserveAttachments = NULL,
+               },
+            .dependencyCount = 2,
+            .pDependencies =
+               (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                         .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                         .dstSubpass = 0,
+                                         .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                         .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                         .srcAccessMask = 0,
+                                         .dstAccessMask = 0,
+                                         .dependencyFlags = 0},
+                                        {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+                                         .srcSubpass = 0,
+                                         .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                         .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                         .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                         .srcAccessMask = 0,
+                                         .dstAccessMask = 0,
+                                         .dependencyFlags = 0}},
+         },
+         &device->meta_state.alloc, render_pass);
+   }
+
+   VkStencilOp stencil_op = index == DEPTH_RESOLVE ? VK_STENCIL_OP_KEEP : VK_STENCIL_OP_REPLACE;
+
+   VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+      .depthTestEnable = true,
+      .depthWriteEnable = index == DEPTH_RESOLVE,
+      .stencilTestEnable = index == STENCIL_RESOLVE,
+      .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+      .front =
+         {
+            .failOp = stencil_op,
+            .passOp = stencil_op,
+            .depthFailOp = stencil_op,
+            .compareOp = VK_COMPARE_OP_ALWAYS,
+         },
+      .back = {
+         .failOp = stencil_op,
+         .passOp = stencil_op,
+         .depthFailOp = stencil_op,
+         .compareOp = VK_COMPARE_OP_ALWAYS,
+      }};
+
+   const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+   vi_create_info = &normal_vi_create_info;
+
+   const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+      .pStages = pipeline_shader_stages,
+      .pVertexInputState = vi_create_info,
+      .pInputAssemblyState =
+         &(VkPipelineInputAssemblyStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+            .primitiveRestartEnable = false,
+         },
+      .pViewportState =
+         &(VkPipelineViewportStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+            .viewportCount = 1,
+            .scissorCount = 1,
+         },
+      .pDepthStencilState = &depth_stencil_state,
+      .pRasterizationState =
+         &(VkPipelineRasterizationStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+            .rasterizerDiscardEnable = false,
+            .polygonMode = VK_POLYGON_MODE_FILL,
+            .cullMode = VK_CULL_MODE_NONE,
+            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+      .pMultisampleState =
+         &(VkPipelineMultisampleStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+            .rasterizationSamples = 1,
+            .sampleShadingEnable = false,
+            .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+         },
+      .pColorBlendState =
+         &(VkPipelineColorBlendStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+            .attachmentCount = 0,
+            .pAttachments =
+               (VkPipelineColorBlendAttachmentState[]){
+                  {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
+                                     VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+               }},
+      .pDynamicState =
+         &(VkPipelineDynamicStateCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+            .dynamicStateCount = 9,
+            .pDynamicStates =
+               (VkDynamicState[]){
+                  VK_DYNAMIC_STATE_VIEWPORT,
+                  VK_DYNAMIC_STATE_SCISSOR,
+                  VK_DYNAMIC_STATE_LINE_WIDTH,
+                  VK_DYNAMIC_STATE_DEPTH_BIAS,
+                  VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+                  VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+                  VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+                  VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+                  VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+               },
+         },
+      .flags = 0,
+      .layout = device->meta_state.resolve_fragment.p_layout,
+      .renderPass = *render_pass,
+      .subpass = 0,
+   };
+
+   const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+   result = radv_graphics_pipeline_create(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+      &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc, pipeline);
+
+   ralloc_free(vs);
+   ralloc_free(fs);
+
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
 VkResult
 radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand)
 {
-	VkResult res;
-
-	res = create_layout(device);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	if (on_demand)
-		return VK_SUCCESS;
-
-	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
-		for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
-			res = create_resolve_pipeline(device, i, radv_fs_key_format_exemplars[j]);
-			if (res != VK_SUCCESS)
-				goto fail;
-		}
-
-		res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
-							    VK_RESOLVE_MODE_AVERAGE_BIT_KHR);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
-							    VK_RESOLVE_MODE_MIN_BIT_KHR);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
-							    VK_RESOLVE_MODE_MAX_BIT_KHR);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE,
-							    VK_RESOLVE_MODE_MIN_BIT_KHR);
-		if (res != VK_SUCCESS)
-			goto fail;
-
-		res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE,
-							    VK_RESOLVE_MODE_MAX_BIT_KHR);
-		if (res != VK_SUCCESS)
-			goto fail;
-	}
-
-	res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
-						    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
-						    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR);
-	if (res != VK_SUCCESS)
-		goto fail;
-
-	return VK_SUCCESS;
+   VkResult res;
+
+   res = create_layout(device);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   if (on_demand)
+      return VK_SUCCESS;
+
+   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+      for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+         res = create_resolve_pipeline(device, i, radv_fs_key_format_exemplars[j]);
+         if (res != VK_SUCCESS)
+            goto fail;
+      }
+
+      res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
+                                                  VK_RESOLVE_MODE_AVERAGE_BIT_KHR);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
+                                                  VK_RESOLVE_MODE_MIN_BIT_KHR);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
+                                                  VK_RESOLVE_MODE_MAX_BIT_KHR);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE,
+                                                  VK_RESOLVE_MODE_MIN_BIT_KHR);
+      if (res != VK_SUCCESS)
+         goto fail;
+
+      res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE,
+                                                  VK_RESOLVE_MODE_MAX_BIT_KHR);
+      if (res != VK_SUCCESS)
+         goto fail;
+   }
+
+   res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
+                                               VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
+                                               VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   return VK_SUCCESS;
 fail:
-	radv_device_finish_meta_resolve_fragment_state(device);
-	return res;
+   radv_device_finish_meta_resolve_fragment_state(device);
+   return res;
 }
 
 void
 radv_device_finish_meta_resolve_fragment_state(struct radv_device *device)
 {
-	struct radv_meta_state *state = &device->meta_state;
-	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
-		for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
-			for(unsigned k =0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
-				radv_DestroyRenderPass(radv_device_to_handle(device),
-				                       state->resolve_fragment.rc[i].render_pass[j][k],
-				                       &state->alloc);
-			}
-			radv_DestroyPipeline(radv_device_to_handle(device),
-					     state->resolve_fragment.rc[i].pipeline[j],
-					     &state->alloc);
-		}
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_fragment.depth[i].average_pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_fragment.depth[i].max_pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_fragment.depth[i].min_pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_fragment.stencil[i].max_pipeline,
-				     &state->alloc);
-
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_fragment.stencil[i].min_pipeline,
-				     &state->alloc);
-	}
-
-	radv_DestroyRenderPass(radv_device_to_handle(device),
-			       state->resolve_fragment.depth_render_pass,
-			       &state->alloc);
-	radv_DestroyRenderPass(radv_device_to_handle(device),
-			       state->resolve_fragment.stencil_render_pass,
-			       &state->alloc);
-
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->resolve_fragment.depth_zero_pipeline,
-			     &state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->resolve_fragment.stencil_zero_pipeline,
-			     &state->alloc);
-
-	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-					state->resolve_fragment.ds_layout,
-					&state->alloc);
-	radv_DestroyPipelineLayout(radv_device_to_handle(device),
-				   state->resolve_fragment.p_layout,
-				   &state->alloc);
+   struct radv_meta_state *state = &device->meta_state;
+   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+      for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+         for (unsigned k = 0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
+            radv_DestroyRenderPass(radv_device_to_handle(device),
+                                   state->resolve_fragment.rc[i].render_pass[j][k], &state->alloc);
+         }
+         radv_DestroyPipeline(radv_device_to_handle(device),
+                              state->resolve_fragment.rc[i].pipeline[j], &state->alloc);
+      }
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_fragment.depth[i].average_pipeline, &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_fragment.depth[i].max_pipeline, &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_fragment.depth[i].min_pipeline, &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_fragment.stencil[i].max_pipeline, &state->alloc);
+
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           state->resolve_fragment.stencil[i].min_pipeline, &state->alloc);
+   }
+
+   radv_DestroyRenderPass(radv_device_to_handle(device), state->resolve_fragment.depth_render_pass,
+                          &state->alloc);
+   radv_DestroyRenderPass(radv_device_to_handle(device),
+                          state->resolve_fragment.stencil_render_pass, &state->alloc);
+
+   radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth_zero_pipeline,
+                        &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device),
+                        state->resolve_fragment.stencil_zero_pipeline, &state->alloc);
+
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_fragment.ds_layout,
+                                   &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_fragment.p_layout,
+                              &state->alloc);
 }
 
 static VkPipeline *
-radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer,
-			  struct radv_image_view *src_iview,
-			  struct radv_image_view *dst_iview)
+radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+                          struct radv_image_view *dst_iview)
 {
-	struct radv_device *device = cmd_buffer->device;
-	unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk_format);
-	const uint32_t samples = src_iview->image->info.samples;
-	const uint32_t samples_log2 = ffs(samples) - 1;
-	VkPipeline *pipeline;
-
-	pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
-	if (!*pipeline ) {
-		VkResult ret;
-
-		ret = create_resolve_pipeline(device, samples_log2,
-					      radv_fs_key_format_exemplars[fs_key]);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return NULL;
-		}
-	}
-
-	return pipeline;
+   struct radv_device *device = cmd_buffer->device;
+   unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk_format);
+   const uint32_t samples = src_iview->image->info.samples;
+   const uint32_t samples_log2 = ffs(samples) - 1;
+   VkPipeline *pipeline;
+
+   pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+   if (!*pipeline) {
+      VkResult ret;
+
+      ret = create_resolve_pipeline(device, samples_log2, radv_fs_key_format_exemplars[fs_key]);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return NULL;
+      }
+   }
+
+   return pipeline;
 }
 
 static void
-emit_resolve(struct radv_cmd_buffer *cmd_buffer,
-	     struct radv_image_view *src_iview,
-	     struct radv_image_view *dest_iview,
-	     const VkOffset2D *src_offset,
-             const VkOffset2D *dest_offset,
-             const VkExtent2D *resolve_extent)
+emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+             struct radv_image_view *dest_iview, const VkOffset2D *src_offset,
+             const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)
 {
-	struct radv_device *device = cmd_buffer->device;
-	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-	VkPipeline *pipeline;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_GRAPHICS,
-				      cmd_buffer->device->meta_state.resolve_fragment.p_layout,
-				      0, /* set */
-				      1, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-					      {
-						      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-					              .dstBinding = 0,
-					              .dstArrayElement = 0,
-					              .descriptorCount = 1,
-					              .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-						      .pImageInfo = (VkDescriptorImageInfo[]) {
-						      {
-						      .sampler = VK_NULL_HANDLE,
-						      .imageView = radv_image_view_to_handle(src_iview),
-						      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-						      },
-						      }
-					      },
-				      });
-
-	cmd_buffer->state.flush_bits |=
-		radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, src_iview->image) |
-		radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dest_iview->image);
-
-	unsigned push_constants[2] = {
-		src_offset->x - dest_offset->x,
-		src_offset->y - dest_offset->y,
-	};
-	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-			      device->meta_state.resolve_fragment.p_layout,
-			      VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8,
-			      push_constants);
-
-	pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview, dest_iview);
-
-	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-			     *pipeline);
-
-	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-		.x = dest_offset->x,
-		.y = dest_offset->y,
-		.width = resolve_extent->width,
-		.height = resolve_extent->height,
-		.minDepth = 0.0f,
-		.maxDepth = 1.0f
-	});
-
-	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-		.offset = *dest_offset,
-		.extent = *resolve_extent,
-	});
-
-	radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
-	cmd_buffer->state.flush_bits |=
-		radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dest_iview->image);
+   struct radv_device *device = cmd_buffer->device;
+   VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+   VkPipeline *pipeline;
+
+   radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                                 cmd_buffer->device->meta_state.resolve_fragment.p_layout,
+                                 0, /* set */
+                                 1, /* descriptorWriteCount */
+                                 (VkWriteDescriptorSet[]){
+                                    {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                     .dstBinding = 0,
+                                     .dstArrayElement = 0,
+                                     .descriptorCount = 1,
+                                     .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                     .pImageInfo =
+                                        (VkDescriptorImageInfo[]){
+                                           {
+                                              .sampler = VK_NULL_HANDLE,
+                                              .imageView = radv_image_view_to_handle(src_iview),
+                                              .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                           },
+                                        }},
+                                 });
+
+   cmd_buffer->state.flush_bits |=
+      radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, src_iview->image) |
+      radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dest_iview->image);
+
+   unsigned push_constants[2] = {
+      src_offset->x - dest_offset->x,
+      src_offset->y - dest_offset->y,
+   };
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                         device->meta_state.resolve_fragment.p_layout, VK_SHADER_STAGE_FRAGMENT_BIT,
+                         0, 8, push_constants);
+
+   pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview, dest_iview);
+
+   radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+   radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                       &(VkViewport){.x = dest_offset->x,
+                                     .y = dest_offset->y,
+                                     .width = resolve_extent->width,
+                                     .height = resolve_extent->height,
+                                     .minDepth = 0.0f,
+                                     .maxDepth = 1.0f});
+
+   radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                      &(VkRect2D){
+                         .offset = *dest_offset,
+                         .extent = *resolve_extent,
+                      });
+
+   radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+   cmd_buffer->state.flush_bits |=
+      radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dest_iview->image);
 }
 
 static void
-emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
-			   struct radv_image_view *src_iview,
-			   struct radv_image_view *dst_iview,
-			   const VkExtent2D *resolve_extent,
-			   VkImageAspectFlags aspects,
-			   VkResolveModeFlagBits resolve_mode)
+emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+                           struct radv_image_view *dst_iview, const VkExtent2D *resolve_extent,
+                           VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)
 {
-	struct radv_device *device = cmd_buffer->device;
-	const uint32_t samples = src_iview->image->info.samples;
-	const uint32_t samples_log2 = ffs(samples) - 1;
-	VkPipeline *pipeline;
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_GRAPHICS,
-				      cmd_buffer->device->meta_state.resolve_fragment.p_layout,
-				      0, /* set */
-				      1, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-					      {
-						      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-					              .dstBinding = 0,
-					              .dstArrayElement = 0,
-					              .descriptorCount = 1,
-					              .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-						      .pImageInfo = (VkDescriptorImageInfo[]) {
-						      {
-						      .sampler = VK_NULL_HANDLE,
-						      .imageView = radv_image_view_to_handle(src_iview),
-						      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-						      },
-						      }
-					      },
-				      });
-
-	switch (resolve_mode) {
-	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
-		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
-			pipeline = &device->meta_state.resolve_fragment.depth_zero_pipeline;
-		else
-			pipeline = &device->meta_state.resolve_fragment.stencil_zero_pipeline;
-		break;
-	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
-		assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
-		pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].average_pipeline;
-		break;
-	case VK_RESOLVE_MODE_MIN_BIT_KHR:
-		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
-			pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].min_pipeline;
-		else
-			pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].min_pipeline;
-		break;
-	case VK_RESOLVE_MODE_MAX_BIT_KHR:
-		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
-			pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].max_pipeline;
-		else
-			pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].max_pipeline;
-		break;
-	default:
-		unreachable("invalid resolve mode");
-	}
-
-	if (!*pipeline) {
-		int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
-		VkResult ret;
-
-		ret = create_depth_stencil_resolve_pipeline(device, samples_log2,
-							    index, resolve_mode);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return;
-		}
-	}
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
-
-	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-		.x = 0,
-		.y = 0,
-		.width = resolve_extent->width,
-		.height = resolve_extent->height,
-		.minDepth = 0.0f,
-		.maxDepth = 1.0f
-	});
-
-	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-		.offset = (VkOffset2D) { 0, 0 },
-		.extent = *resolve_extent,
-	});
-
-	radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+   struct radv_device *device = cmd_buffer->device;
+   const uint32_t samples = src_iview->image->info.samples;
+   const uint32_t samples_log2 = ffs(samples) - 1;
+   VkPipeline *pipeline;
+
+   radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                                 cmd_buffer->device->meta_state.resolve_fragment.p_layout,
+                                 0, /* set */
+                                 1, /* descriptorWriteCount */
+                                 (VkWriteDescriptorSet[]){
+                                    {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                     .dstBinding = 0,
+                                     .dstArrayElement = 0,
+                                     .descriptorCount = 1,
+                                     .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                     .pImageInfo =
+                                        (VkDescriptorImageInfo[]){
+                                           {
+                                              .sampler = VK_NULL_HANDLE,
+                                              .imageView = radv_image_view_to_handle(src_iview),
+                                              .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                           },
+                                        }},
+                                 });
+
+   switch (resolve_mode) {
+   case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+         pipeline = &device->meta_state.resolve_fragment.depth_zero_pipeline;
+      else
+         pipeline = &device->meta_state.resolve_fragment.stencil_zero_pipeline;
+      break;
+   case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+      assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
+      pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].average_pipeline;
+      break;
+   case VK_RESOLVE_MODE_MIN_BIT_KHR:
+      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+         pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].min_pipeline;
+      else
+         pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].min_pipeline;
+      break;
+   case VK_RESOLVE_MODE_MAX_BIT_KHR:
+      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+         pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].max_pipeline;
+      else
+         pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].max_pipeline;
+      break;
+   default:
+      unreachable("invalid resolve mode");
+   }
+
+   if (!*pipeline) {
+      int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
+      VkResult ret;
+
+      ret = create_depth_stencil_resolve_pipeline(device, samples_log2, index, resolve_mode);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return;
+      }
+   }
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+                        *pipeline);
+
+   radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                       &(VkViewport){.x = 0,
+                                     .y = 0,
+                                     .width = resolve_extent->width,
+                                     .height = resolve_extent->height,
+                                     .minDepth = 0.0f,
+                                     .maxDepth = 1.0f});
+
+   radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+                      &(VkRect2D){
+                         .offset = (VkOffset2D){0, 0},
+                         .extent = *resolve_extent,
+                      });
+
+   radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
 }
 
-void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
-				      struct radv_image *src_image,
-				      VkImageLayout src_image_layout,
-				      struct radv_image *dest_image,
-				      VkImageLayout dest_image_layout,
-				      const VkImageResolve2KHR *region)
+void
+radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+                                 VkImageLayout src_image_layout, struct radv_image *dest_image,
+                                 VkImageLayout dest_image_layout, const VkImageResolve2KHR *region)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_meta_saved_state saved_state;
-	const uint32_t samples = src_image->info.samples;
-	const uint32_t samples_log2 = ffs(samples) - 1;
-	unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dest_image->vk_format);
-	unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
-	VkRenderPass rp;
-
-	radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout,
-				    region);
-
-	if (!device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][dst_layout]) {
-		VkResult ret = create_resolve_pipeline(device, samples_log2, radv_fs_key_format_exemplars[fs_key]);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return;
-		}
-	}
-
-	rp = device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][dst_layout];
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
-	assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
-	assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
-
-	const uint32_t src_base_layer =
-		radv_meta_get_iview_layer(src_image, &region->srcSubresource,
-					  &region->srcOffset);
-
-	const uint32_t dest_base_layer =
-		radv_meta_get_iview_layer(dest_image, &region->dstSubresource,
-					  &region->dstOffset);
-
-	const struct VkExtent3D extent =
-		radv_sanitize_image_extent(src_image->type, region->extent);
-	const struct VkOffset3D srcOffset =
-		radv_sanitize_image_offset(src_image->type, region->srcOffset);
-	const struct VkOffset3D dstOffset =
-		radv_sanitize_image_offset(dest_image->type, region->dstOffset);
-
-	for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
-	     ++layer) {
-
-		struct radv_image_view src_iview;
-		radv_image_view_init(&src_iview, cmd_buffer->device,
-				     &(VkImageViewCreateInfo) {
-					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-						     .image = radv_image_to_handle(src_image),
-						     .viewType = radv_meta_get_view_type(src_image),
-						     .format = src_image->vk_format,
-						     .subresourceRange = {
-						     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-						     .baseMipLevel = region->srcSubresource.mipLevel,
-						     .levelCount = 1,
-						     .baseArrayLayer = src_base_layer + layer,
-						     .layerCount = 1,
-					     },
-				     }, NULL);
-
-		struct radv_image_view dest_iview;
-		radv_image_view_init(&dest_iview, cmd_buffer->device,
-				     &(VkImageViewCreateInfo) {
-					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-						     .image = radv_image_to_handle(dest_image),
-						     .viewType = radv_meta_get_view_type(dest_image),
-						     .format = dest_image->vk_format,
-						     .subresourceRange = {
-						     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-						     .baseMipLevel = region->dstSubresource.mipLevel,
-						     .levelCount = 1,
-						     .baseArrayLayer = dest_base_layer + layer,
-						     .layerCount = 1,
-					     },
-				     }, NULL);
-
-
-		VkFramebuffer fb;
-		radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
-		       &(VkFramebufferCreateInfo) {
-			       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
-				       .attachmentCount = 1,
-				       .pAttachments = (VkImageView[]) {
-				       radv_image_view_to_handle(&dest_iview),
-			       },
-			       .width = extent.width + dstOffset.x,
-			       .height = extent.height + dstOffset.y,
-			       .layers = 1
-			}, &cmd_buffer->pool->alloc, &fb);
-
-		radv_cmd_buffer_begin_render_pass(cmd_buffer,
-						  &(VkRenderPassBeginInfo) {
-							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-								.renderPass = rp,
-								.framebuffer = fb,
-								.renderArea = {
-									.offset = { dstOffset.x, dstOffset.y, },
-									.extent = { extent.width, extent.height },
-								},
-							.clearValueCount = 0,
-							.pClearValues = NULL,
-					}, NULL);
-
-		radv_cmd_buffer_set_subpass(cmd_buffer,
-					    &cmd_buffer->state.pass->subpasses[0]);
-
-		emit_resolve(cmd_buffer,
-			     &src_iview,
-			     &dest_iview,
-			     &(VkOffset2D) { srcOffset.x, srcOffset.y },
-			     &(VkOffset2D) { dstOffset.x, dstOffset.y },
-			     &(VkExtent2D) { extent.width, extent.height });
-
-		radv_cmd_buffer_end_render_pass(cmd_buffer);
-
-		radv_DestroyFramebuffer(radv_device_to_handle(cmd_buffer->device), fb, &cmd_buffer->pool->alloc);
-	}
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_saved_state saved_state;
+   const uint32_t samples = src_image->info.samples;
+   const uint32_t samples_log2 = ffs(samples) - 1;
+   unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dest_image->vk_format);
+   unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
+   VkRenderPass rp;
+
+   radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
+
+   if (!device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][dst_layout]) {
+      VkResult ret =
+         create_resolve_pipeline(device, samples_log2, radv_fs_key_format_exemplars[fs_key]);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return;
+      }
+   }
+
+   rp = device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][dst_layout];
+
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+   assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+   assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
+
+   const uint32_t src_base_layer =
+      radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
+
+   const uint32_t dest_base_layer =
+      radv_meta_get_iview_layer(dest_image, &region->dstSubresource, &region->dstOffset);
+
+   const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);
+   const struct VkOffset3D srcOffset =
+      radv_sanitize_image_offset(src_image->type, region->srcOffset);
+   const struct VkOffset3D dstOffset =
+      radv_sanitize_image_offset(dest_image->type, region->dstOffset);
+
+   for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
+
+      struct radv_image_view src_iview;
+      radv_image_view_init(&src_iview, cmd_buffer->device,
+                           &(VkImageViewCreateInfo){
+                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                              .image = radv_image_to_handle(src_image),
+                              .viewType = radv_meta_get_view_type(src_image),
+                              .format = src_image->vk_format,
+                              .subresourceRange =
+                                 {
+                                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                    .baseMipLevel = region->srcSubresource.mipLevel,
+                                    .levelCount = 1,
+                                    .baseArrayLayer = src_base_layer + layer,
+                                    .layerCount = 1,
+                                 },
+                           },
+                           NULL);
+
+      struct radv_image_view dest_iview;
+      radv_image_view_init(&dest_iview, cmd_buffer->device,
+                           &(VkImageViewCreateInfo){
+                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                              .image = radv_image_to_handle(dest_image),
+                              .viewType = radv_meta_get_view_type(dest_image),
+                              .format = dest_image->vk_format,
+                              .subresourceRange =
+                                 {
+                                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                    .baseMipLevel = region->dstSubresource.mipLevel,
+                                    .levelCount = 1,
+                                    .baseArrayLayer = dest_base_layer + layer,
+                                    .layerCount = 1,
+                                 },
+                           },
+                           NULL);
+
+      VkFramebuffer fb;
+      radv_CreateFramebuffer(
+         radv_device_to_handle(cmd_buffer->device),
+         &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+                                    .attachmentCount = 1,
+                                    .pAttachments =
+                                       (VkImageView[]){
+                                          radv_image_view_to_handle(&dest_iview),
+                                       },
+                                    .width = extent.width + dstOffset.x,
+                                    .height = extent.height + dstOffset.y,
+                                    .layers = 1},
+         &cmd_buffer->pool->alloc, &fb);
+
+      radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                        &(VkRenderPassBeginInfo){
+                                           .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                                           .renderPass = rp,
+                                           .framebuffer = fb,
+                                           .renderArea =
+                                              {
+                                                 .offset =
+                                                    {
+                                                       dstOffset.x,
+                                                       dstOffset.y,
+                                                    },
+                                                 .extent = {extent.width, extent.height},
+                                              },
+                                           .clearValueCount = 0,
+                                           .pClearValues = NULL,
+                                        },
+                                        NULL);
+
+      radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+      emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
+                   &(VkOffset2D){dstOffset.x, dstOffset.y},
+                   &(VkExtent2D){extent.width, extent.height});
+
+      radv_cmd_buffer_end_render_pass(cmd_buffer);
+
+      radv_DestroyFramebuffer(radv_device_to_handle(cmd_buffer->device), fb,
+                              &cmd_buffer->pool->alloc);
+   }
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
-
 /**
  * Emit any needed resolves for the current subpass.
  */
 void
 radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
 {
-	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	struct radv_meta_saved_state saved_state;
-	struct radv_subpass_barrier barrier;
-
-	/* Resolves happen before the end-of-subpass barriers get executed,
-	 * so we have to make the attachment shader-readable */
-	barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
-	barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
-	barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
-	radv_subpass_barrier(cmd_buffer, &barrier);
-
-	radv_decompress_resolve_subpass_src(cmd_buffer);
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	for (uint32_t i = 0; i < subpass->color_count; ++i) {
-		struct radv_subpass_attachment src_att = subpass->color_attachments[i];
-		struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
-
-		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
-			continue;
-
-		struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
-		struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
-
-		struct radv_subpass resolve_subpass = {
-			.color_count = 1,
-			.color_attachments = (struct radv_subpass_attachment[]) { dest_att },
-			.depth_stencil_attachment = NULL,
-		};
-
-		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
-
-		emit_resolve(cmd_buffer,
-			     src_iview,
-			     dest_iview,
-			     &(VkOffset2D) { 0, 0 },
-			     &(VkOffset2D) { 0, 0 },
-			     &(VkExtent2D) { fb->width, fb->height });
-	}
-
-	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   struct radv_meta_saved_state saved_state;
+   struct radv_subpass_barrier barrier;
+
+   /* Resolves happen before the end-of-subpass barriers get executed,
+    * so we have to make the attachment shader-readable */
+   barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+   barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+   barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+   radv_subpass_barrier(cmd_buffer, &barrier);
+
+   radv_decompress_resolve_subpass_src(cmd_buffer);
+
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   for (uint32_t i = 0; i < subpass->color_count; ++i) {
+      struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+      struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
+
+      if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
+      struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+
+      struct radv_subpass resolve_subpass = {
+         .color_count = 1,
+         .color_attachments = (struct radv_subpass_attachment[]){dest_att},
+         .depth_stencil_attachment = NULL,
+      };
+
+      radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+
+      emit_resolve(cmd_buffer, src_iview, dest_iview, &(VkOffset2D){0, 0}, &(VkOffset2D){0, 0},
+                   &(VkExtent2D){fb->width, fb->height});
+   }
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
 /**
@@ -1187,73 +1116,70 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
  */
 void
 radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
-				      VkImageAspectFlags aspects,
-				      VkResolveModeFlagBits resolve_mode)
+                                      VkImageAspectFlags aspects,
+                                      VkResolveModeFlagBits resolve_mode)
 {
-	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	struct radv_meta_saved_state saved_state;
-	struct radv_subpass_barrier barrier;
-
-	/* Resolves happen before the end-of-subpass barriers get executed,
-	 * so we have to make the attachment shader-readable */
-	barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
-	barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
-	barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
-	radv_subpass_barrier(cmd_buffer, &barrier);
-
-	struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
-	struct radv_image_view *src_iview =
-		cmd_buffer->state.attachments[src_att.attachment].iview;
-	struct radv_image *src_image = src_iview->image;
-
-	VkImageResolve2KHR region = {0};
-	region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
-	region.srcSubresource.aspectMask = aspects;
-	region.srcSubresource.mipLevel = 0;
-	region.srcSubresource.baseArrayLayer = 0;
-	region.srcSubresource.layerCount = 1;
-
-	radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_GRAPHICS_PIPELINE |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;
-	struct radv_image_view *dst_iview =
-		cmd_buffer->state.attachments[dst_att.attachment].iview;
-
-	struct radv_subpass resolve_subpass = {
-		.color_count = 0,
-		.color_attachments = NULL,
-		.depth_stencil_attachment = (struct radv_subpass_attachment *) { &dst_att },
-	};
-
-	radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
-
-	struct radv_image_view tsrc_iview;
-	radv_image_view_init(&tsrc_iview, cmd_buffer->device,
-			     &(VkImageViewCreateInfo) {
-				.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-				.image = radv_image_to_handle(src_image),
-				.viewType = radv_meta_get_view_type(src_image),
-				.format = src_iview->vk_format,
-				.subresourceRange = {
-					.aspectMask = aspects,
-					.baseMipLevel = 0,
-					.levelCount = 1,
-					.baseArrayLayer = 0,
-					.layerCount = 1,
-				},
-			      }, NULL);
-
-	emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, dst_iview,
-				   &(VkExtent2D) { fb->width, fb->height },
-				   aspects,
-				   resolve_mode);
-
-	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+   struct radv_meta_saved_state saved_state;
+   struct radv_subpass_barrier barrier;
+
+   /* Resolves happen before the end-of-subpass barriers get executed,
+    * so we have to make the attachment shader-readable */
+   barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+   barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+   barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+   radv_subpass_barrier(cmd_buffer, &barrier);
+
+   struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+   struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+   struct radv_image *src_image = src_iview->image;
+
+   VkImageResolve2KHR region = {0};
+   region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
+   region.srcSubresource.aspectMask = aspects;
+   region.srcSubresource.mipLevel = 0;
+   region.srcSubresource.baseArrayLayer = 0;
+   region.srcSubresource.layerCount = 1;
+
+   radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
+
+   radv_meta_save(&saved_state, cmd_buffer,
+                  RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
+
+   struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;
+   struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
+
+   struct radv_subpass resolve_subpass = {
+      .color_count = 0,
+      .color_attachments = NULL,
+      .depth_stencil_attachment = (struct radv_subpass_attachment *){&dst_att},
+   };
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+
+   struct radv_image_view tsrc_iview;
+   radv_image_view_init(&tsrc_iview, cmd_buffer->device,
+                        &(VkImageViewCreateInfo){
+                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                           .image = radv_image_to_handle(src_image),
+                           .viewType = radv_meta_get_view_type(src_image),
+                           .format = src_iview->vk_format,
+                           .subresourceRange =
+                              {
+                                 .aspectMask = aspects,
+                                 .baseMipLevel = 0,
+                                 .levelCount = 1,
+                                 .baseArrayLayer = 0,
+                                 .layerCount = 1,
+                              },
+                        },
+                        NULL);
+
+   emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, dst_iview,
+                              &(VkExtent2D){fb->width, fb->height}, aspects, resolve_mode);
+
+   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
diff --git a/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c b/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c
index ced1a83c082..a42852faac8 100644
--- a/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c
+++ b/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c
@@ -21,321 +21,289 @@
  * IN THE SOFTWARE.
  */
 
-#include "radv_private.h"
-#include "radv_shader.h"
-#include "vk_format.h"
 #include "nir/nir.h"
 #include "nir/nir_builder.h"
 #include "nir/nir_vulkan.h"
+#include "radv_private.h"
+#include "radv_shader.h"
+#include "vk_format.h"
 
 struct ycbcr_state {
-	nir_builder *builder;
-	nir_ssa_def *image_size;
-	nir_tex_instr *origin_tex;
-	nir_deref_instr *tex_deref;
-	const struct radv_sampler_ycbcr_conversion *conversion;
+   nir_builder *builder;
+   nir_ssa_def *image_size;
+   nir_tex_instr *origin_tex;
+   nir_deref_instr *tex_deref;
+   const struct radv_sampler_ycbcr_conversion *conversion;
 };
 
 static nir_ssa_def *
 get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture)
 {
-	nir_builder *b = state->builder;
-	const struct glsl_type *type = texture->type;
-	nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
+   nir_builder *b = state->builder;
+   const struct glsl_type *type = texture->type;
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
 
-	tex->op = nir_texop_txs;
-	tex->sampler_dim = glsl_get_sampler_dim(type);
-	tex->is_array = glsl_sampler_type_is_array(type);
-	tex->is_shadow = glsl_sampler_type_is_shadow(type);
-	tex->dest_type = nir_type_int32;
+   tex->op = nir_texop_txs;
+   tex->sampler_dim = glsl_get_sampler_dim(type);
+   tex->is_array = glsl_sampler_type_is_array(type);
+   tex->is_shadow = glsl_sampler_type_is_shadow(type);
+   tex->dest_type = nir_type_int32;
 
-	tex->src[0].src_type = nir_tex_src_texture_deref;
-	tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
+   tex->src[0].src_type = nir_tex_src_texture_deref;
+   tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
 
-	nir_ssa_dest_init(&tex->instr, &tex->dest,
-	                  nir_tex_instr_dest_size(tex), 32, NULL);
-	nir_builder_instr_insert(b, &tex->instr);
+   nir_ssa_dest_init(&tex->instr, &tex->dest, nir_tex_instr_dest_size(tex), 32, NULL);
+   nir_builder_instr_insert(b, &tex->instr);
 
-	return nir_i2f32(b, &tex->dest.ssa);
+   return nir_i2f32(b, &tex->dest.ssa);
 }
 
 static nir_ssa_def *
-implicit_downsampled_coord(nir_builder *b,
-                           nir_ssa_def *value,
-                           nir_ssa_def *max_value,
+implicit_downsampled_coord(nir_builder *b, nir_ssa_def *value, nir_ssa_def *max_value,
                            int div_scale)
 {
-	return nir_fadd(b,
-	                value,
-	                nir_fdiv(b,
-	                         nir_imm_float(b, 1.0f),
-	                         nir_fmul(b,
-	                                  nir_imm_float(b, div_scale),
-	                                  max_value)));
+   return nir_fadd(
+      b, value,
+      nir_fdiv(b, nir_imm_float(b, 1.0f), nir_fmul(b, nir_imm_float(b, div_scale), max_value)));
 }
 
 static nir_ssa_def *
-implicit_downsampled_coords(struct ycbcr_state *state,
-                            nir_ssa_def *old_coords)
+implicit_downsampled_coords(struct ycbcr_state *state, nir_ssa_def *old_coords)
 {
-	nir_builder *b = state->builder;
-	const struct radv_sampler_ycbcr_conversion *conversion = state->conversion;
-	nir_ssa_def *image_size = NULL;
-	nir_ssa_def *comp[4] = { NULL, };
-	enum pipe_video_chroma_format chroma_format = pipe_format_to_chroma_format(vk_format_to_pipe_format(state->conversion->format));
-	const unsigned divisors[2] = {
-		chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_422 ? 2 : 1,
-		chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_420 ? 2 : 1
-	};
-
-	for (int c = 0; c < old_coords->num_components; c++) {
-		if (c < ARRAY_SIZE(divisors) && divisors[c] > 1 &&
-		    conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
-			if (!image_size)
-				image_size = get_texture_size(state, state->tex_deref);
-
-			comp[c] = implicit_downsampled_coord(b,
-			                                     nir_channel(b, old_coords, c),
-			                                     nir_channel(b, image_size, c),
-			                                     divisors[c]);
-		} else {
-			comp[c] = nir_channel(b, old_coords, c);
-		}
-	}
-
-	return nir_vec(b, comp, old_coords->num_components);
+   nir_builder *b = state->builder;
+   const struct radv_sampler_ycbcr_conversion *conversion = state->conversion;
+   nir_ssa_def *image_size = NULL;
+   nir_ssa_def *comp[4] = {
+      NULL,
+   };
+   enum pipe_video_chroma_format chroma_format =
+      pipe_format_to_chroma_format(vk_format_to_pipe_format(state->conversion->format));
+   const unsigned divisors[2] = {chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_422 ? 2 : 1,
+                                 chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_420 ? 2 : 1};
+
+   for (int c = 0; c < old_coords->num_components; c++) {
+      if (c < ARRAY_SIZE(divisors) && divisors[c] > 1 &&
+          conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
+         if (!image_size)
+            image_size = get_texture_size(state, state->tex_deref);
+
+         comp[c] = implicit_downsampled_coord(b, nir_channel(b, old_coords, c),
+                                              nir_channel(b, image_size, c), divisors[c]);
+      } else {
+         comp[c] = nir_channel(b, old_coords, c);
+      }
+   }
+
+   return nir_vec(b, comp, old_coords->num_components);
 }
 
 static nir_ssa_def *
-create_plane_tex_instr_implicit(struct ycbcr_state *state,
-                                uint32_t plane)
+create_plane_tex_instr_implicit(struct ycbcr_state *state, uint32_t plane)
 {
-	nir_builder *b = state->builder;
-	nir_tex_instr *old_tex = state->origin_tex;
-	nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs+ 1);
-	for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
-		tex->src[i].src_type = old_tex->src[i].src_type;
-
-		switch (old_tex->src[i].src_type) {
-		case nir_tex_src_coord:
-			if (plane && true/*state->conversion->chroma_reconstruction*/) {
-				assert(old_tex->src[i].src.is_ssa);
-				tex->src[i].src =
-					nir_src_for_ssa(implicit_downsampled_coords(state,
-					                                            old_tex->src[i].src.ssa));
-				break;
-			}
-		/* fall through */
-		default:
-			nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
-			break;
-		}
-	}
-
-	tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
-	tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane;
-
-	tex->sampler_dim = old_tex->sampler_dim;
-	tex->dest_type = old_tex->dest_type;
-	tex->is_array = old_tex->is_array;
-
-	tex->op = old_tex->op;
-	tex->coord_components = old_tex->coord_components;
-	tex->is_new_style_shadow = old_tex->is_new_style_shadow;
-	tex->component = old_tex->component;
-
-	tex->texture_index = old_tex->texture_index;
-	tex->sampler_index = old_tex->sampler_index;
-
-	nir_ssa_dest_init(&tex->instr, &tex->dest,
-	                  old_tex->dest.ssa.num_components,
-	                  nir_dest_bit_size(old_tex->dest), NULL);
-	nir_builder_instr_insert(b, &tex->instr);
-
-	return &tex->dest.ssa;
+   nir_builder *b = state->builder;
+   nir_tex_instr *old_tex = state->origin_tex;
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs + 1);
+   for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
+      tex->src[i].src_type = old_tex->src[i].src_type;
+
+      switch (old_tex->src[i].src_type) {
+      case nir_tex_src_coord:
+         if (plane && true /*state->conversion->chroma_reconstruction*/) {
+            assert(old_tex->src[i].src.is_ssa);
+            tex->src[i].src =
+               nir_src_for_ssa(implicit_downsampled_coords(state, old_tex->src[i].src.ssa));
+            break;
+         }
+      /* fall through */
+      default:
+         nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
+         break;
+      }
+   }
+
+   tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
+   tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane;
+
+   tex->sampler_dim = old_tex->sampler_dim;
+   tex->dest_type = old_tex->dest_type;
+   tex->is_array = old_tex->is_array;
+
+   tex->op = old_tex->op;
+   tex->coord_components = old_tex->coord_components;
+   tex->is_new_style_shadow = old_tex->is_new_style_shadow;
+   tex->component = old_tex->component;
+
+   tex->texture_index = old_tex->texture_index;
+   tex->sampler_index = old_tex->sampler_index;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, old_tex->dest.ssa.num_components,
+                     nir_dest_bit_size(old_tex->dest), NULL);
+   nir_builder_instr_insert(b, &tex->instr);
+
+   return &tex->dest.ssa;
 }
 
 struct swizzle_info {
-	unsigned plane[4];
-	unsigned swizzle[4];
+   unsigned plane[4];
+   unsigned swizzle[4];
 };
 
 static struct swizzle_info
 get_plane_swizzles(VkFormat format)
 {
-	int planes = vk_format_get_plane_count(format);
-	switch (planes) {
-	case 3:
-		return (struct swizzle_info) {
-			{2, 0, 1, 0},
-			{0, 0, 0, 3}
-		};
-	case 2:
-		return (struct swizzle_info) {
-			{1, 0, 1, 0},
-			{1, 0, 0, 3}
-		};
-	case 1:
-		return (struct swizzle_info) {
-			{0, 0, 0, 0},
-			{0, 1, 2, 3}
-		};
-	default:
-		unreachable("unhandled plane count for ycbcr swizzling");
-	}
+   int planes = vk_format_get_plane_count(format);
+   switch (planes) {
+   case 3:
+      return (struct swizzle_info){{2, 0, 1, 0}, {0, 0, 0, 3}};
+   case 2:
+      return (struct swizzle_info){{1, 0, 1, 0}, {1, 0, 0, 3}};
+   case 1:
+      return (struct swizzle_info){{0, 0, 0, 0}, {0, 1, 2, 3}};
+   default:
+      unreachable("unhandled plane count for ycbcr swizzling");
+   }
 }
 
-
 static nir_ssa_def *
-build_swizzled_components(nir_builder *builder,
-                          VkFormat format,
-                          VkComponentMapping mapping,
+build_swizzled_components(nir_builder *builder, VkFormat format, VkComponentMapping mapping,
                           nir_ssa_def **plane_values)
 {
-	struct swizzle_info plane_swizzle = get_plane_swizzles(format);
-	enum pipe_swizzle swizzles[4];
-	nir_ssa_def *values[4];
-
-	vk_format_compose_swizzles(&mapping, (const unsigned char[4]){0,1,2,3}, swizzles);
-
-	nir_ssa_def *zero = nir_imm_float(builder, 0.0f);
-	nir_ssa_def *one = nir_imm_float(builder, 1.0f);
-
-	for (unsigned i = 0; i < 4; ++i) {
-		switch(swizzles[i]) {
-		case PIPE_SWIZZLE_X:
-		case PIPE_SWIZZLE_Y:
-		case PIPE_SWIZZLE_Z:
-		case PIPE_SWIZZLE_W: {
-			unsigned channel = swizzles[i] - PIPE_SWIZZLE_X;
-			values[i] = nir_channel(builder,
-			                        plane_values[plane_swizzle.plane[channel]],
-			                        plane_swizzle.swizzle[channel]);
-			break;
-		}
-		case PIPE_SWIZZLE_0:
-			values[i] = zero;
-			break;
-		case PIPE_SWIZZLE_1:
-			values[i] = one;
-			break;
-		default:
-			unreachable("unhandled swizzle");
-		}
-	}
-	return nir_vec(builder, values, 4);
+   struct swizzle_info plane_swizzle = get_plane_swizzles(format);
+   enum pipe_swizzle swizzles[4];
+   nir_ssa_def *values[4];
+
+   vk_format_compose_swizzles(&mapping, (const unsigned char[4]){0, 1, 2, 3}, swizzles);
+
+   nir_ssa_def *zero = nir_imm_float(builder, 0.0f);
+   nir_ssa_def *one = nir_imm_float(builder, 1.0f);
+
+   for (unsigned i = 0; i < 4; ++i) {
+      switch (swizzles[i]) {
+      case PIPE_SWIZZLE_X:
+      case PIPE_SWIZZLE_Y:
+      case PIPE_SWIZZLE_Z:
+      case PIPE_SWIZZLE_W: {
+         unsigned channel = swizzles[i] - PIPE_SWIZZLE_X;
+         values[i] = nir_channel(builder, plane_values[plane_swizzle.plane[channel]],
+                                 plane_swizzle.swizzle[channel]);
+         break;
+      }
+      case PIPE_SWIZZLE_0:
+         values[i] = zero;
+         break;
+      case PIPE_SWIZZLE_1:
+         values[i] = one;
+         break;
+      default:
+         unreachable("unhandled swizzle");
+      }
+   }
+   return nir_vec(builder, values, 4);
 }
 
 static bool
-try_lower_tex_ycbcr(const struct radv_pipeline_layout *layout,
-                    nir_builder *builder,
+try_lower_tex_ycbcr(const struct radv_pipeline_layout *layout, nir_builder *builder,
                     nir_tex_instr *tex)
 {
-	int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
-	assert(deref_src_idx >= 0);
-	nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
-
-	nir_variable *var = nir_deref_instr_get_variable(deref);
-	const struct radv_descriptor_set_layout *set_layout =
-		layout->set[var->data.descriptor_set].layout;
-	const struct radv_descriptor_set_binding_layout *binding =
-		&set_layout->binding[var->data.binding];
-	const struct radv_sampler_ycbcr_conversion *ycbcr_samplers =
-		radv_immutable_ycbcr_samplers(set_layout, var->data.binding);
-
-	if (!ycbcr_samplers)
-		return false;
-
-	/* For the following instructions, we don't apply any change and let the
-	 * instruction apply to the first plane.
-	 */
-	if (tex->op == nir_texop_txs ||
-	    tex->op == nir_texop_query_levels ||
-	    tex->op == nir_texop_lod)
-		return false;
-
-	assert(tex->texture_index == 0);
-	unsigned array_index = 0;
-	if (deref->deref_type != nir_deref_type_var) {
-		assert(deref->deref_type == nir_deref_type_array);
-		if (!nir_src_is_const(deref->arr.index))
-			return false;
-		array_index = nir_src_as_uint(deref->arr.index);
-		array_index = MIN2(array_index, binding->array_size - 1);
-	}
-	const struct radv_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
-
-	if (ycbcr_sampler->format == VK_FORMAT_UNDEFINED)
-		return false;
-
-	struct ycbcr_state state = {
-		.builder = builder,
-		.origin_tex = tex,
-		.tex_deref = deref,
-		.conversion = ycbcr_sampler,
-	};
-
-	builder->cursor = nir_before_instr(&tex->instr);
-
-	VkFormat format = state.conversion->format;
-	const int plane_count = vk_format_get_plane_count(format);
-	nir_ssa_def *plane_values[3];
-
-	for (int p = 0; p < plane_count; ++p) {
-		plane_values[p] = create_plane_tex_instr_implicit(&state, p);
-	}
-
-	nir_ssa_def *result = build_swizzled_components(builder, format, ycbcr_sampler->components, plane_values);
-	if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) {
-		VkFormat first_format = vk_format_get_plane_format(format, 0);
-		uint32_t bits = vk_format_get_component_bits(first_format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X);
-		/* TODO: swizzle and bpcs */
-		uint32_t bpcs[3] = {bits, bits, bits};
-		result = nir_convert_ycbcr_to_rgb(builder,
-		                                  state.conversion->ycbcr_model,
-		                                  state.conversion->ycbcr_range,
-		                                  result,
-		                                  bpcs);
-	}
-
-	nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
-	nir_instr_remove(&tex->instr);
-
-	return true;
+   int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
+   assert(deref_src_idx >= 0);
+   nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
+
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+   const struct radv_descriptor_set_layout *set_layout =
+      layout->set[var->data.descriptor_set].layout;
+   const struct radv_descriptor_set_binding_layout *binding =
+      &set_layout->binding[var->data.binding];
+   const struct radv_sampler_ycbcr_conversion *ycbcr_samplers =
+      radv_immutable_ycbcr_samplers(set_layout, var->data.binding);
+
+   if (!ycbcr_samplers)
+      return false;
+
+   /* For the following instructions, we don't apply any change and let the
+    * instruction apply to the first plane.
+    */
+   if (tex->op == nir_texop_txs || tex->op == nir_texop_query_levels || tex->op == nir_texop_lod)
+      return false;
+
+   assert(tex->texture_index == 0);
+   unsigned array_index = 0;
+   if (deref->deref_type != nir_deref_type_var) {
+      assert(deref->deref_type == nir_deref_type_array);
+      if (!nir_src_is_const(deref->arr.index))
+         return false;
+      array_index = nir_src_as_uint(deref->arr.index);
+      array_index = MIN2(array_index, binding->array_size - 1);
+   }
+   const struct radv_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
+
+   if (ycbcr_sampler->format == VK_FORMAT_UNDEFINED)
+      return false;
+
+   struct ycbcr_state state = {
+      .builder = builder,
+      .origin_tex = tex,
+      .tex_deref = deref,
+      .conversion = ycbcr_sampler,
+   };
+
+   builder->cursor = nir_before_instr(&tex->instr);
+
+   VkFormat format = state.conversion->format;
+   const int plane_count = vk_format_get_plane_count(format);
+   nir_ssa_def *plane_values[3];
+
+   for (int p = 0; p < plane_count; ++p) {
+      plane_values[p] = create_plane_tex_instr_implicit(&state, p);
+   }
+
+   nir_ssa_def *result =
+      build_swizzled_components(builder, format, ycbcr_sampler->components, plane_values);
+   if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) {
+      VkFormat first_format = vk_format_get_plane_format(format, 0);
+      uint32_t bits =
+         vk_format_get_component_bits(first_format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X);
+      /* TODO: swizzle and bpcs */
+      uint32_t bpcs[3] = {bits, bits, bits};
+      result = nir_convert_ycbcr_to_rgb(builder, state.conversion->ycbcr_model,
+                                        state.conversion->ycbcr_range, result, bpcs);
+   }
+
+   nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
+   nir_instr_remove(&tex->instr);
+
+   return true;
 }
 
 bool
-radv_nir_lower_ycbcr_textures(nir_shader *shader,
-                             const struct radv_pipeline_layout *layout)
+radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipeline_layout *layout)
 {
-	bool progress = false;
+   bool progress = false;
 
-	nir_foreach_function(function, shader) {
-		if (!function->impl)
-			continue;
+   nir_foreach_function (function, shader) {
+      if (!function->impl)
+         continue;
 
-		bool function_progress = false;
-		nir_builder builder;
-		nir_builder_init(&builder, function->impl);
+      bool function_progress = false;
+      nir_builder builder;
+      nir_builder_init(&builder, function->impl);
 
-		nir_foreach_block(block, function->impl) {
-			nir_foreach_instr_safe(instr, block) {
-				if (instr->type != nir_instr_type_tex)
-					continue;
+      nir_foreach_block (block, function->impl) {
+         nir_foreach_instr_safe (instr, block) {
+            if (instr->type != nir_instr_type_tex)
+               continue;
 
-				nir_tex_instr *tex = nir_instr_as_tex(instr);
-				function_progress |= try_lower_tex_ycbcr(layout, &builder, tex);
-			}
-		}
+            nir_tex_instr *tex = nir_instr_as_tex(instr);
+            function_progress |= try_lower_tex_ycbcr(layout, &builder, tex);
+         }
+      }
 
-		if (function_progress) {
-			nir_metadata_preserve(function->impl,
-			                      nir_metadata_block_index |
-			                      nir_metadata_dominance);
-		}
+      if (function_progress) {
+         nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
+      }
 
-		progress |= function_progress;
-	}
+      progress |= function_progress;
+   }
 
-	return progress;
+   return progress;
 }
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index a91f8af6b4a..59e9fee3118 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -25,1597 +25,1468 @@
  * IN THE SOFTWARE.
  */
 
+#include "nir/nir.h"
+#include "radv_debug.h"
 #include "radv_private.h"
 #include "radv_shader.h"
-#include "radv_shader_helper.h"
 #include "radv_shader_args.h"
-#include "radv_debug.h"
-#include "nir/nir.h"
+#include "radv_shader_helper.h"
 
-#include "sid.h"
 #include "ac_binary.h"
-#include "ac_llvm_util.h"
+#include "ac_exp_param.h"
 #include "ac_llvm_build.h"
+#include "ac_llvm_util.h"
 #include "ac_shader_abi.h"
 #include "ac_shader_util.h"
-#include "ac_exp_param.h"
+#include "sid.h"
 
 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
 
 struct radv_shader_context {
-	struct ac_llvm_context ac;
-	const struct nir_shader *shader;
-	struct ac_shader_abi abi;
-	const struct radv_shader_args *args;
+   struct ac_llvm_context ac;
+   const struct nir_shader *shader;
+   struct ac_shader_abi abi;
+   const struct radv_shader_args *args;
 
-	gl_shader_stage stage;
+   gl_shader_stage stage;
 
-	unsigned max_workgroup_size;
-	LLVMContextRef context;
-	LLVMValueRef main_function;
+   unsigned max_workgroup_size;
+   LLVMContextRef context;
+   LLVMValueRef main_function;
 
-	LLVMValueRef descriptor_sets[MAX_SETS];
+   LLVMValueRef descriptor_sets[MAX_SETS];
 
-	LLVMValueRef ring_offsets;
+   LLVMValueRef ring_offsets;
 
-	LLVMValueRef vs_rel_patch_id;
+   LLVMValueRef vs_rel_patch_id;
 
-	LLVMValueRef gs_wave_id;
-	LLVMValueRef gs_vtx_offset[6];
+   LLVMValueRef gs_wave_id;
+   LLVMValueRef gs_vtx_offset[6];
 
-	LLVMValueRef esgs_ring;
-	LLVMValueRef gsvs_ring[4];
-	LLVMValueRef hs_ring_tess_offchip;
-	LLVMValueRef hs_ring_tess_factor;
+   LLVMValueRef esgs_ring;
+   LLVMValueRef gsvs_ring[4];
+   LLVMValueRef hs_ring_tess_offchip;
+   LLVMValueRef hs_ring_tess_factor;
 
-	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
+   LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
 
-	uint64_t output_mask;
+   uint64_t output_mask;
 
-	LLVMValueRef gs_next_vertex[4];
-	LLVMValueRef gs_curprim_verts[4];
-	LLVMValueRef gs_generated_prims[4];
-	LLVMValueRef gs_ngg_emit;
-	LLVMValueRef gs_ngg_scratch;
+   LLVMValueRef gs_next_vertex[4];
+   LLVMValueRef gs_curprim_verts[4];
+   LLVMValueRef gs_generated_prims[4];
+   LLVMValueRef gs_ngg_emit;
+   LLVMValueRef gs_ngg_scratch;
 
-	LLVMValueRef vertexptr; /* GFX10 only */
+   LLVMValueRef vertexptr; /* GFX10 only */
 };
 
 struct radv_shader_output_values {
-	LLVMValueRef values[4];
-	unsigned slot_name;
-	unsigned slot_index;
-	unsigned usage_mask;
+   LLVMValueRef values[4];
+   unsigned slot_name;
+   unsigned slot_index;
+   unsigned usage_mask;
 };
 
 static inline struct radv_shader_context *
 radv_shader_context_from_abi(struct ac_shader_abi *abi)
 {
-	return container_of(abi, struct radv_shader_context, abi);
+   return container_of(abi, struct radv_shader_context, abi);
 }
 
 static LLVMValueRef
-create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module,
-                     LLVMBuilderRef builder,
-		     const struct ac_shader_args *args,
-		     enum ac_llvm_calling_convention convention,
-		     unsigned max_workgroup_size,
-		     const struct radv_nir_compiler_options *options)
+create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuilderRef builder,
+                     const struct ac_shader_args *args, enum ac_llvm_calling_convention convention,
+                     unsigned max_workgroup_size, const struct radv_nir_compiler_options *options)
 {
-	LLVMValueRef main_function =
-		ac_build_main(args, ctx, convention, "main", ctx->voidt, module);
+   LLVMValueRef main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module);
 
-	if (options->address32_hi) {
-		ac_llvm_add_target_dep_function_attr(main_function,
-						     "amdgpu-32bit-address-high-bits",
-						     options->address32_hi);
-	}
+   if (options->address32_hi) {
+      ac_llvm_add_target_dep_function_attr(main_function, "amdgpu-32bit-address-high-bits",
+                                           options->address32_hi);
+   }
 
-	ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
+   ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
 
-	return main_function;
+   return main_function;
 }
 
 static void
 load_descriptor_sets(struct radv_shader_context *ctx)
 {
-	uint32_t mask = ctx->args->shader_info->desc_set_used_mask;
-	if (ctx->args->shader_info->need_indirect_descriptor_sets) {
-		LLVMValueRef desc_sets =
-			ac_get_arg(&ctx->ac, ctx->args->descriptor_sets[0]);
-		while (mask) {
-			int i = u_bit_scan(&mask);
-
-			ctx->descriptor_sets[i] =
-				ac_build_load_to_sgpr(&ctx->ac, desc_sets,
-						      LLVMConstInt(ctx->ac.i32, i, false));
+   uint32_t mask = ctx->args->shader_info->desc_set_used_mask;
+   if (ctx->args->shader_info->need_indirect_descriptor_sets) {
+      LLVMValueRef desc_sets = ac_get_arg(&ctx->ac, ctx->args->descriptor_sets[0]);
+      while (mask) {
+         int i = u_bit_scan(&mask);
 
-		}
-	} else {
-		while (mask) {
-			int i = u_bit_scan(&mask);
+         ctx->descriptor_sets[i] =
+            ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->ac.i32, i, false));
+      }
+   } else {
+      while (mask) {
+         int i = u_bit_scan(&mask);
 
-			ctx->descriptor_sets[i] =
-				ac_get_arg(&ctx->ac, ctx->args->descriptor_sets[i]);
-		}
-	}
+         ctx->descriptor_sets[i] = ac_get_arg(&ctx->ac, ctx->args->descriptor_sets[i]);
+      }
+   }
 }
 
 static enum ac_llvm_calling_convention
 get_llvm_calling_convention(LLVMValueRef func, gl_shader_stage stage)
 {
-	switch (stage) {
-	case MESA_SHADER_VERTEX:
-	case MESA_SHADER_TESS_EVAL:
-		return AC_LLVM_AMDGPU_VS;
-		break;
-	case MESA_SHADER_GEOMETRY:
-		return AC_LLVM_AMDGPU_GS;
-		break;
-	case MESA_SHADER_TESS_CTRL:
-		return AC_LLVM_AMDGPU_HS;
-		break;
-	case MESA_SHADER_FRAGMENT:
-		return AC_LLVM_AMDGPU_PS;
-		break;
-	case MESA_SHADER_COMPUTE:
-		return AC_LLVM_AMDGPU_CS;
-		break;
-	default:
-		unreachable("Unhandle shader type");
-	}
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+   case MESA_SHADER_TESS_EVAL:
+      return AC_LLVM_AMDGPU_VS;
+      break;
+   case MESA_SHADER_GEOMETRY:
+      return AC_LLVM_AMDGPU_GS;
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      return AC_LLVM_AMDGPU_HS;
+      break;
+   case MESA_SHADER_FRAGMENT:
+      return AC_LLVM_AMDGPU_PS;
+      break;
+   case MESA_SHADER_COMPUTE:
+      return AC_LLVM_AMDGPU_CS;
+      break;
+   default:
+      unreachable("Unhandle shader type");
+   }
 }
 
 /* Returns whether the stage is a stage that can be directly before the GS */
-static bool is_pre_gs_stage(gl_shader_stage stage)
+static bool
+is_pre_gs_stage(gl_shader_stage stage)
 {
-	return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
+   return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
 }
 
-static void create_function(struct radv_shader_context *ctx,
-                            gl_shader_stage stage,
-                            bool has_previous_stage)
+static void
+create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has_previous_stage)
 {
-	if (ctx->ac.chip_class >= GFX10) {
-		if (is_pre_gs_stage(stage) && ctx->args->options->key.vs_common_out.as_ngg) {
-			/* On GFX10, VS is merged into GS for NGG. */
-			stage = MESA_SHADER_GEOMETRY;
-			has_previous_stage = true;
-		}
-	}
+   if (ctx->ac.chip_class >= GFX10) {
+      if (is_pre_gs_stage(stage) && ctx->args->options->key.vs_common_out.as_ngg) {
+         /* On GFX10, VS is merged into GS for NGG. */
+         stage = MESA_SHADER_GEOMETRY;
+         has_previous_stage = true;
+      }
+   }
 
-	ctx->main_function = create_llvm_function(
-	    &ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac,
-	    get_llvm_calling_convention(ctx->main_function, stage),
-	    ctx->max_workgroup_size,
-	    ctx->args->options);
+   ctx->main_function =
+      create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac,
+                           get_llvm_calling_convention(ctx->main_function, stage),
+                           ctx->max_workgroup_size, ctx->args->options);
 
-	ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
-					       LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
-					       NULL, 0, AC_FUNC_ATTR_READNONE);
-	ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
-					     ac_array_in_const_addr_space(ctx->ac.v4i32), "");
+   ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
+                                          LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST), NULL, 0,
+                                          AC_FUNC_ATTR_READNONE);
+   ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
+                                        ac_array_in_const_addr_space(ctx->ac.v4i32), "");
 
-	load_descriptor_sets(ctx);
-
-	if (stage == MESA_SHADER_TESS_CTRL ||
-	    (stage == MESA_SHADER_VERTEX && ctx->args->options->key.vs_common_out.as_ls) ||
-	    /* GFX9 has the ESGS ring buffer in LDS. */
-	    (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) {
-		ac_declare_lds_as_pointer(&ctx->ac);
-	}
+   load_descriptor_sets(ctx);
 
+   if (stage == MESA_SHADER_TESS_CTRL ||
+       (stage == MESA_SHADER_VERTEX && ctx->args->options->key.vs_common_out.as_ls) ||
+       /* GFX9 has the ESGS ring buffer in LDS. */
+       (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) {
+      ac_declare_lds_as_pointer(&ctx->ac);
+   }
 }
 
-
 static LLVMValueRef
-radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index,
-		   unsigned desc_set, unsigned binding)
+radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_set,
+                   unsigned binding)
 {
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-	LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
-	struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout;
-	struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
-	unsigned base_offset = layout->binding[binding].offset;
-	LLVMValueRef offset, stride;
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
+   struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout;
+   struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
+   unsigned base_offset = layout->binding[binding].offset;
+   LLVMValueRef offset, stride;
 
-	if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
-	    layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
-		unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
-			layout->binding[binding].dynamic_offset_offset;
-		desc_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.push_constants);
-		base_offset = pipeline_layout->push_constant_size + 16 * idx;
-		stride = LLVMConstInt(ctx->ac.i32, 16, false);
-	} else
-		stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false);
+   if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+       layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
+      unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
+                     layout->binding[binding].dynamic_offset_offset;
+      desc_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.push_constants);
+      base_offset = pipeline_layout->push_constant_size + 16 * idx;
+      stride = LLVMConstInt(ctx->ac.i32, 16, false);
+   } else
+      stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false);
 
-	offset = LLVMConstInt(ctx->ac.i32, base_offset, false);
+   offset = LLVMConstInt(ctx->ac.i32, base_offset, false);
 
-	if (layout->binding[binding].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
-		offset = ac_build_imad(&ctx->ac, index, stride, offset);
-	}
+   if (layout->binding[binding].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+      offset = ac_build_imad(&ctx->ac, index, stride, offset);
+   }
 
-	desc_ptr = LLVMBuildGEP(ctx->ac.builder, desc_ptr, &offset, 1, "");
-	desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32);
+   desc_ptr = LLVMBuildGEP(ctx->ac.builder, desc_ptr, &offset, 1, "");
+   desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32);
 
-	return desc_ptr;
+   return desc_ptr;
 }
 
 static uint32_t
 radv_get_sample_pos_offset(uint32_t num_samples)
 {
-	uint32_t sample_pos_offset = 0;
+   uint32_t sample_pos_offset = 0;
 
-	switch (num_samples) {
-	case 2:
-		sample_pos_offset = 1;
-		break;
-	case 4:
-		sample_pos_offset = 3;
-		break;
-	case 8:
-		sample_pos_offset = 7;
-		break;
-	default:
-		break;
-	}
-	return sample_pos_offset;
+   switch (num_samples) {
+   case 2:
+      sample_pos_offset = 1;
+      break;
+   case 4:
+      sample_pos_offset = 3;
+      break;
+   case 8:
+      sample_pos_offset = 7;
+      break;
+   default:
+      break;
+   }
+   return sample_pos_offset;
 }
 
-static LLVMValueRef load_sample_position(struct ac_shader_abi *abi,
-					 LLVMValueRef sample_id)
+static LLVMValueRef
+load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id)
 {
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
 
-	LLVMValueRef result;
-	LLVMValueRef index = LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false);
-	LLVMValueRef ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ring_offsets, &index, 1, "");
+   LLVMValueRef result;
+   LLVMValueRef index = LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false);
+   LLVMValueRef ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ring_offsets, &index, 1, "");
 
-	ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
-			       ac_array_in_const_addr_space(ctx->ac.v2f32), "");
+   ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ac_array_in_const_addr_space(ctx->ac.v2f32), "");
 
-	uint32_t sample_pos_offset =
-		radv_get_sample_pos_offset(ctx->args->options->key.fs.num_samples);
+   uint32_t sample_pos_offset = radv_get_sample_pos_offset(ctx->args->options->key.fs.num_samples);
 
-	sample_id =
-		LLVMBuildAdd(ctx->ac.builder, sample_id,
-			     LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), "");
-	result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
+   sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id,
+                            LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), "");
+   result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
 
-	return result;
+   return result;
 }
 
-
-static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi)
+static LLVMValueRef
+load_sample_mask_in(struct ac_shader_abi *abi)
 {
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-	uint8_t log2_ps_iter_samples;
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   uint8_t log2_ps_iter_samples;
 
-	if (ctx->args->shader_info->ps.uses_sample_shading) {
-		log2_ps_iter_samples =
-			util_logbase2(ctx->args->options->key.fs.num_samples);
-	} else {
-		log2_ps_iter_samples = ctx->args->options->key.fs.log2_ps_iter_samples;
-	}
+   if (ctx->args->shader_info->ps.uses_sample_shading) {
+      log2_ps_iter_samples = util_logbase2(ctx->args->options->key.fs.num_samples);
+   } else {
+      log2_ps_iter_samples = ctx->args->options->key.fs.log2_ps_iter_samples;
+   }
 
-	LLVMValueRef result, sample_id;
-	if (log2_ps_iter_samples) {
-		/* gl_SampleMaskIn[0] = (SampleCoverage & (1 << gl_SampleID)). */
-		sample_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.ancillary), 8, 4);
-		sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, 1, false), sample_id, "");
-		result = LLVMBuildAnd(ctx->ac.builder, sample_id,
-				      ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage), "");
-	} else {
-		result = ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage);
-	}
+   LLVMValueRef result, sample_id;
+   if (log2_ps_iter_samples) {
+      /* gl_SampleMaskIn[0] = (SampleCoverage & (1 << gl_SampleID)). */
+      sample_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.ancillary), 8, 4);
+      sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, 1, false), sample_id, "");
+      result = LLVMBuildAnd(ctx->ac.builder, sample_id,
+                            ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage), "");
+   } else {
+      result = ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage);
+   }
 
-	return result;
+   return result;
 }
 
-
-static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
-				     unsigned stream,
-				     LLVMValueRef vertexidx,
-				     LLVMValueRef *addrs);
+static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream,
+                                     LLVMValueRef vertexidx, LLVMValueRef *addrs);
 
 static void
-visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream,
-			       LLVMValueRef vertexidx, LLVMValueRef *addrs)
-{
-	unsigned offset = 0;
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-
-	if (ctx->args->options->key.vs_common_out.as_ngg) {
-		gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs);
-		return;
-	}
-
-	for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
-		unsigned output_usage_mask =
-			ctx->args->shader_info->gs.output_usage_mask[i];
-		uint8_t output_stream =
-			ctx->args->shader_info->gs.output_streams[i];
-		LLVMValueRef *out_ptr = &addrs[i * 4];
-		int length = util_last_bit(output_usage_mask);
-
-		if (!(ctx->output_mask & (1ull << i)) ||
-		    output_stream != stream)
-			continue;
-
-		for (unsigned j = 0; j < length; j++) {
-			if (!(output_usage_mask & (1 << j)))
-				continue;
-
-			LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder,
-							     out_ptr[j], "");
-			LLVMValueRef voffset =
-				LLVMConstInt(ctx->ac.i32, offset *
-					     ctx->shader->info.gs.vertices_out, false);
-
-			offset++;
-
-			voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
-			voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
-
-			out_val = ac_to_integer(&ctx->ac, out_val);
-			out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
-
-			ac_build_buffer_store_dword(&ctx->ac,
-						    ctx->gsvs_ring[stream],
-						    out_val, 1,
-						    voffset,
-						    ac_get_arg(&ctx->ac,
-							       ctx->args->ac.gs2vs_offset),
-						    0, ac_glc | ac_slc | ac_swizzled);
-		}
-	}
-
-	ac_build_sendmsg(&ctx->ac,
-			 AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
-			 ctx->gs_wave_id);
+visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx,
+                               LLVMValueRef *addrs)
+{
+   unsigned offset = 0;
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+   if (ctx->args->options->key.vs_common_out.as_ngg) {
+      gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs);
+      return;
+   }
+
+   for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+      unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+      uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
+      LLVMValueRef *out_ptr = &addrs[i * 4];
+      int length = util_last_bit(output_usage_mask);
+
+      if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
+         continue;
+
+      for (unsigned j = 0; j < length; j++) {
+         if (!(output_usage_mask & (1 << j)))
+            continue;
+
+         LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
+         LLVMValueRef voffset =
+            LLVMConstInt(ctx->ac.i32, offset * ctx->shader->info.gs.vertices_out, false);
+
+         offset++;
+
+         voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
+         voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
+
+         out_val = ac_to_integer(&ctx->ac, out_val);
+         out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
+
+         ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, 1, voffset,
+                                     ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset), 0,
+                                     ac_glc | ac_slc | ac_swizzled);
+      }
+   }
+
+   ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
+                    ctx->gs_wave_id);
 }
 
 static void
 visit_end_primitive(struct ac_shader_abi *abi, unsigned stream)
 {
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
 
-	if (ctx->args->options->key.vs_common_out.as_ngg) {
-		LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
-		return;
-	}
+   if (ctx->args->options->key.vs_common_out.as_ngg) {
+      LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
+      return;
+   }
 
-	ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id);
+   ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
+                    ctx->gs_wave_id);
 }
 
 static LLVMValueRef
 load_tess_coord(struct ac_shader_abi *abi)
 {
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
 
-	LLVMValueRef coord[4] = {
-		ac_get_arg(&ctx->ac, ctx->args->ac.tes_u),
-		ac_get_arg(&ctx->ac, ctx->args->ac.tes_v),
-		ctx->ac.f32_0,
-		ctx->ac.f32_0,
-	};
+   LLVMValueRef coord[4] = {
+      ac_get_arg(&ctx->ac, ctx->args->ac.tes_u),
+      ac_get_arg(&ctx->ac, ctx->args->ac.tes_v),
+      ctx->ac.f32_0,
+      ctx->ac.f32_0,
+   };
 
-	if (ctx->shader->info.tess.primitive_mode == GL_TRIANGLES)
-		coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
-					LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");
+   if (ctx->shader->info.tess.primitive_mode == GL_TRIANGLES)
+      coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
+                               LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");
 
-	return ac_build_gather_values(&ctx->ac, coord, 3);
+   return ac_build_gather_values(&ctx->ac, coord, 3);
 }
 
 static LLVMValueRef
 load_ring_tess_factors(struct ac_shader_abi *abi)
 {
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-	assert(ctx->stage == MESA_SHADER_TESS_CTRL);
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   assert(ctx->stage == MESA_SHADER_TESS_CTRL);
 
-	return ctx->hs_ring_tess_factor;
+   return ctx->hs_ring_tess_factor;
 }
 
 static LLVMValueRef
 load_ring_tess_offchip(struct ac_shader_abi *abi)
 {
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-	assert(ctx->stage == MESA_SHADER_TESS_CTRL ||
-	       ctx->stage == MESA_SHADER_TESS_EVAL);
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   assert(ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL);
 
-	return ctx->hs_ring_tess_offchip;
+   return ctx->hs_ring_tess_offchip;
 }
 
 static LLVMValueRef
 load_ring_esgs(struct ac_shader_abi *abi)
 {
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-	assert(ctx->stage == MESA_SHADER_VERTEX ||
-	       ctx->stage == MESA_SHADER_TESS_EVAL ||
-	       ctx->stage == MESA_SHADER_GEOMETRY);
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   assert(ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL ||
+          ctx->stage == MESA_SHADER_GEOMETRY);
 
-	return ctx->esgs_ring;
+   return ctx->esgs_ring;
 }
 
-static LLVMValueRef radv_load_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero)
+static LLVMValueRef
+radv_load_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero)
 {
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-	return ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex);
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   return ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex);
 }
 
-static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi,
-				   LLVMValueRef buffer_ptr, bool write, bool non_uniform)
-{
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-	LLVMValueRef result;
-
-	if (!non_uniform)
-		LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
-
-	if (non_uniform && LLVMGetPointerAddressSpace(LLVMTypeOf(buffer_ptr)) == AC_ADDR_SPACE_CONST_32BIT) {
-		/* 32-bit seems to always use SMEM. addrspacecast from 32-bit -> 64-bit is broken. */
-		buffer_ptr = LLVMBuildPtrToInt(ctx->ac.builder, buffer_ptr, ctx->ac.i32, ""),
-		buffer_ptr = LLVMBuildZExt(ctx->ac.builder, buffer_ptr, ctx->ac.i64, "");
-		uint64_t hi = (uint64_t)ctx->args->options->address32_hi << 32;
-		buffer_ptr = LLVMBuildOr(ctx->ac.builder, buffer_ptr, LLVMConstInt(ctx->ac.i64, hi, false), "");
-		buffer_ptr = LLVMBuildIntToPtr(ctx->ac.builder, buffer_ptr, LLVMPointerType(ctx->ac.v4i32, AC_ADDR_SPACE_CONST), "");
-	}
-
-	result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, "");
-	LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
-
-	return result;
-}
-
-static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi,
-				  unsigned desc_set, unsigned binding,
-				  bool valid_binding, LLVMValueRef buffer_ptr)
-{
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-	LLVMValueRef result;
-
-	if (valid_binding) {
-		struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout;
-		struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
-
-		if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
-			uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-					     S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-					     S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-					     S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
-			if (ctx->ac.chip_class >= GFX10) {
-				desc_type |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
-					     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
-					     S_008F0C_RESOURCE_LEVEL(1);
-			} else {
-				desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-					     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-			}
-
-			LLVMValueRef desc_components[4] = {
-				LLVMBuildPtrToInt(ctx->ac.builder, buffer_ptr, ctx->ac.intptr, ""),
-				LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->args->options->address32_hi), false),
-				LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
-				LLVMConstInt(ctx->ac.i32, desc_type, false),
-			};
-
-			return ac_build_gather_values(&ctx->ac, desc_components, 4);
-		}
-	}
-
-	LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
-
-	result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, "");
-	LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
-
-	return result;
-}
-
-static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
-					  unsigned descriptor_set,
-					  unsigned base_index,
-					  unsigned constant_index,
-					  LLVMValueRef index,
-					  enum ac_descriptor_type desc_type,
-					  bool image, bool write,
-					  bool bindless)
-{
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-	LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
-	struct radv_descriptor_set_layout *layout = ctx->args->options->layout->set[descriptor_set].layout;
-	struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
-	unsigned offset = binding->offset;
-	unsigned stride = binding->size;
-	unsigned type_size;
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMTypeRef type;
-
-	assert(base_index < layout->binding_count);
-
-	switch (desc_type) {
-	case AC_DESC_IMAGE:
-		type = ctx->ac.v8i32;
-		type_size = 32;
-		break;
-	case AC_DESC_FMASK:
-		type = ctx->ac.v8i32;
-		offset += 32;
-		type_size = 32;
-		break;
-	case AC_DESC_SAMPLER:
-		type = ctx->ac.v4i32;
-		if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
-			offset += radv_combined_image_descriptor_sampler_offset(binding);
-		}
-
-		type_size = 16;
-		break;
-	case AC_DESC_BUFFER:
-		type = ctx->ac.v4i32;
-		type_size = 16;
-		break;
-	case AC_DESC_PLANE_0:
-	case AC_DESC_PLANE_1:
-	case AC_DESC_PLANE_2:
-		type = ctx->ac.v8i32;
-		type_size = 32;
-		offset += 32 * (desc_type - AC_DESC_PLANE_0);
-		break;
-	default:
-		unreachable("invalid desc_type\n");
-	}
-
-	offset += constant_index * stride;
-
-	if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
-	    (!index || binding->immutable_samplers_equal)) {
-		if (binding->immutable_samplers_equal)
-			constant_index = 0;
-
-		const uint32_t *samplers = radv_immutable_samplers(layout, binding);
-
-		LLVMValueRef constants[] = {
-			LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
-			LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
-			LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
-			LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
-		};
-		return ac_build_gather_values(&ctx->ac, constants, 4);
-	}
-
-	assert(stride % type_size == 0);
-
-	LLVMValueRef adjusted_index = index;
-	if (!adjusted_index)
-		adjusted_index = ctx->ac.i32_0;
-
-	adjusted_index = LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
-
-	LLVMValueRef val_offset = LLVMConstInt(ctx->ac.i32, offset, 0);
-	list = LLVMBuildGEP(builder, list, &val_offset, 1, "");
-	list = LLVMBuildPointerCast(builder, list,
-				    ac_array_in_const32_addr_space(type), "");
-
-	LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index);
-
-	/* 3 plane formats always have same size and format for plane 1 & 2, so
-	 * use the tail from plane 1 so that we can store only the first 16 bytes
-	 * of the last plane. */
-	if (desc_type == AC_DESC_PLANE_2) {
-		LLVMValueRef descriptor2 = radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index, AC_DESC_PLANE_1,image, write, bindless);
-
-		LLVMValueRef components[8];
-		for (unsigned i = 0; i < 4; ++i)
-			components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
-
-		for (unsigned i = 4; i < 8; ++i)
-			components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
-		descriptor = ac_build_gather_values(&ctx->ac, components, 8);
-	}
-
-	return descriptor;
+static LLVMValueRef
+radv_load_ssbo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr, bool write, bool non_uniform)
+{
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   LLVMValueRef result;
+
+   if (!non_uniform)
+      LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
+
+   if (non_uniform &&
+       LLVMGetPointerAddressSpace(LLVMTypeOf(buffer_ptr)) == AC_ADDR_SPACE_CONST_32BIT) {
+      /* 32-bit seems to always use SMEM. addrspacecast from 32-bit -> 64-bit is broken. */
+      buffer_ptr = LLVMBuildPtrToInt(ctx->ac.builder, buffer_ptr, ctx->ac.i32, ""),
+      buffer_ptr = LLVMBuildZExt(ctx->ac.builder, buffer_ptr, ctx->ac.i64, "");
+      uint64_t hi = (uint64_t)ctx->args->options->address32_hi << 32;
+      buffer_ptr =
+         LLVMBuildOr(ctx->ac.builder, buffer_ptr, LLVMConstInt(ctx->ac.i64, hi, false), "");
+      buffer_ptr = LLVMBuildIntToPtr(ctx->ac.builder, buffer_ptr,
+                                     LLVMPointerType(ctx->ac.v4i32, AC_ADDR_SPACE_CONST), "");
+   }
+
+   result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, "");
+   LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
+
+   return result;
+}
+
+static LLVMValueRef
+radv_load_ubo(struct ac_shader_abi *abi, unsigned desc_set, unsigned binding, bool valid_binding,
+              LLVMValueRef buffer_ptr)
+{
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   LLVMValueRef result;
+
+   if (valid_binding) {
+      struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout;
+      struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
+
+      if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+         uint32_t desc_type =
+            S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+            S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+         if (ctx->ac.chip_class >= GFX10) {
+            desc_type |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                         S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+         } else {
+            desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+         }
+
+         LLVMValueRef desc_components[4] = {
+            LLVMBuildPtrToInt(ctx->ac.builder, buffer_ptr, ctx->ac.intptr, ""),
+            LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->args->options->address32_hi),
+                         false),
+            LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
+            LLVMConstInt(ctx->ac.i32, desc_type, false),
+         };
+
+         return ac_build_gather_values(&ctx->ac, desc_components, 4);
+      }
+   }
+
+   LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
+
+   result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, "");
+   LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
+
+   return result;
+}
+
+static LLVMValueRef
+radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsigned base_index,
+                      unsigned constant_index, LLVMValueRef index,
+                      enum ac_descriptor_type desc_type, bool image, bool write, bool bindless)
+{
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+   LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
+   struct radv_descriptor_set_layout *layout =
+      ctx->args->options->layout->set[descriptor_set].layout;
+   struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
+   unsigned offset = binding->offset;
+   unsigned stride = binding->size;
+   unsigned type_size;
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMTypeRef type;
+
+   assert(base_index < layout->binding_count);
+
+   switch (desc_type) {
+   case AC_DESC_IMAGE:
+      type = ctx->ac.v8i32;
+      type_size = 32;
+      break;
+   case AC_DESC_FMASK:
+      type = ctx->ac.v8i32;
+      offset += 32;
+      type_size = 32;
+      break;
+   case AC_DESC_SAMPLER:
+      type = ctx->ac.v4i32;
+      if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
+         offset += radv_combined_image_descriptor_sampler_offset(binding);
+      }
+
+      type_size = 16;
+      break;
+   case AC_DESC_BUFFER:
+      type = ctx->ac.v4i32;
+      type_size = 16;
+      break;
+   case AC_DESC_PLANE_0:
+   case AC_DESC_PLANE_1:
+   case AC_DESC_PLANE_2:
+      type = ctx->ac.v8i32;
+      type_size = 32;
+      offset += 32 * (desc_type - AC_DESC_PLANE_0);
+      break;
+   default:
+      unreachable("invalid desc_type\n");
+   }
+
+   offset += constant_index * stride;
+
+   if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
+       (!index || binding->immutable_samplers_equal)) {
+      if (binding->immutable_samplers_equal)
+         constant_index = 0;
+
+      const uint32_t *samplers = radv_immutable_samplers(layout, binding);
+
+      LLVMValueRef constants[] = {
+         LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
+         LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
+         LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
+         LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
+      };
+      return ac_build_gather_values(&ctx->ac, constants, 4);
+   }
+
+   assert(stride % type_size == 0);
+
+   LLVMValueRef adjusted_index = index;
+   if (!adjusted_index)
+      adjusted_index = ctx->ac.i32_0;
+
+   adjusted_index =
+      LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
+
+   LLVMValueRef val_offset = LLVMConstInt(ctx->ac.i32, offset, 0);
+   list = LLVMBuildGEP(builder, list, &val_offset, 1, "");
+   list = LLVMBuildPointerCast(builder, list, ac_array_in_const32_addr_space(type), "");
+
+   LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index);
+
+   /* 3 plane formats always have same size and format for plane 1 & 2, so
+    * use the tail from plane 1 so that we can store only the first 16 bytes
+    * of the last plane. */
+   if (desc_type == AC_DESC_PLANE_2) {
+      LLVMValueRef descriptor2 =
+         radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index,
+                               AC_DESC_PLANE_1, image, write, bindless);
+
+      LLVMValueRef components[8];
+      for (unsigned i = 0; i < 4; ++i)
+         components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
+
+      for (unsigned i = 4; i < 8; ++i)
+         components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
+      descriptor = ac_build_gather_values(&ctx->ac, components, 8);
+   }
+
+   return descriptor;
 }
 
 /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
  * so we may need to fix it up. */
 static LLVMValueRef
-adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
-                          unsigned adjustment,
-                          LLVMValueRef alpha)
-{
-	if (adjustment == AC_FETCH_FORMAT_NONE)
-		return alpha;
-
-	LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
-
-	alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, "");
-
-	if (adjustment == AC_FETCH_FORMAT_SSCALED)
-		alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
-	else
-		alpha = ac_to_integer(&ctx->ac, alpha);
-
-	/* For the integer-like cases, do a natural sign extension.
-	 *
-	 * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
-	 * and happen to contain 0, 1, 2, 3 as the two LSBs of the
-	 * exponent.
-	 */
-	alpha = LLVMBuildShl(ctx->ac.builder, alpha,
-	                     adjustment == AC_FETCH_FORMAT_SNORM ?
-	                     LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
-	alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, "");
-
-	/* Convert back to the right type. */
-	if (adjustment == AC_FETCH_FORMAT_SNORM) {
-		LLVMValueRef clamp;
-		LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
-		alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
-		clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, "");
-		alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, "");
-	} else if (adjustment == AC_FETCH_FORMAT_SSCALED) {
-		alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
-	}
-
-	return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
+adjust_vertex_fetch_alpha(struct radv_shader_context *ctx, unsigned adjustment, LLVMValueRef alpha)
+{
+   if (adjustment == AC_FETCH_FORMAT_NONE)
+      return alpha;
+
+   LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
+
+   alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, "");
+
+   if (adjustment == AC_FETCH_FORMAT_SSCALED)
+      alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
+   else
+      alpha = ac_to_integer(&ctx->ac, alpha);
+
+   /* For the integer-like cases, do a natural sign extension.
+    *
+    * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
+    * and happen to contain 0, 1, 2, 3 as the two LSBs of the
+    * exponent.
+    */
+   alpha =
+      LLVMBuildShl(ctx->ac.builder, alpha,
+                   adjustment == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
+   alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, "");
+
+   /* Convert back to the right type. */
+   if (adjustment == AC_FETCH_FORMAT_SNORM) {
+      LLVMValueRef clamp;
+      LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
+      alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+      clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, "");
+      alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, "");
+   } else if (adjustment == AC_FETCH_FORMAT_SSCALED) {
+      alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+   }
+
+   return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
 }
 
 static LLVMValueRef
-radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
-				LLVMValueRef value,
-				unsigned num_channels,
-				bool is_float)
+radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx, LLVMValueRef value,
+                                unsigned num_channels, bool is_float)
 {
-	LLVMValueRef zero = is_float ? ctx->ac.f32_0 : ctx->ac.i32_0;
-	LLVMValueRef one = is_float ? ctx->ac.f32_1 : ctx->ac.i32_1;
-	LLVMValueRef chan[4];
+   LLVMValueRef zero = is_float ? ctx->ac.f32_0 : ctx->ac.i32_0;
+   LLVMValueRef one = is_float ? ctx->ac.f32_1 : ctx->ac.i32_1;
+   LLVMValueRef chan[4];
 
-	if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
-		unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
+   if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
+      unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
 
-		if (num_channels == 4 && num_channels == vec_size)
-			return value;
+      if (num_channels == 4 && num_channels == vec_size)
+         return value;
 
-		num_channels = MIN2(num_channels, vec_size);
+      num_channels = MIN2(num_channels, vec_size);
 
-		for (unsigned i = 0; i < num_channels; i++)
-			chan[i] = ac_llvm_extract_elem(&ctx->ac, value, i);
-	} else {
-		assert(num_channels == 1);
-		chan[0] = value;
-	}
+      for (unsigned i = 0; i < num_channels; i++)
+         chan[i] = ac_llvm_extract_elem(&ctx->ac, value, i);
+   } else {
+      assert(num_channels == 1);
+      chan[0] = value;
+   }
 
-	for (unsigned i = num_channels; i < 4; i++) {
-		chan[i] = i == 3 ? one : zero;
-		chan[i] = ac_to_integer(&ctx->ac, chan[i]);
-	}
+   for (unsigned i = num_channels; i < 4; i++) {
+      chan[i] = i == 3 ? one : zero;
+      chan[i] = ac_to_integer(&ctx->ac, chan[i]);
+   }
 
-	return ac_build_gather_values(&ctx->ac, chan, 4);
+   return ac_build_gather_values(&ctx->ac, chan, 4);
 }
 
 static void
-handle_vs_input_decl(struct radv_shader_context *ctx,
-		     struct nir_variable *variable)
-{
-	LLVMValueRef t_list_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.vertex_buffers);
-	LLVMValueRef t_offset;
-	LLVMValueRef t_list;
-	LLVMValueRef input;
-	LLVMValueRef buffer_index;
-	unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
-
-
-	enum glsl_base_type type = glsl_get_base_type(variable->type);
-	for (unsigned i = 0; i < attrib_count; ++i) {
-		LLVMValueRef output[4];
-		unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0;
-		unsigned attrib_format = ctx->args->options->key.vs.vertex_attribute_formats[attrib_index];
-		unsigned data_format = attrib_format & 0x0f;
-		unsigned num_format = (attrib_format >> 4) & 0x07;
-		bool is_float = num_format != V_008F0C_BUF_NUM_FORMAT_UINT &&
-		                num_format != V_008F0C_BUF_NUM_FORMAT_SINT;
-		uint8_t input_usage_mask =
-			ctx->args->shader_info->vs.input_usage_mask[variable->data.location + i];
-		unsigned num_input_channels = util_last_bit(input_usage_mask);
-
-		if (num_input_channels == 0)
-			continue;
-
-		if (ctx->args->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
-			uint32_t divisor = ctx->args->options->key.vs.instance_rate_divisors[attrib_index];
-
-			if (divisor) {
-				buffer_index = ctx->abi.instance_id;
-
-				if (divisor != 1) {
-					buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
-					                             LLVMConstInt(ctx->ac.i32, divisor, 0), "");
-				}
-			} else {
-				buffer_index = ctx->ac.i32_0;
-			}
-
-			buffer_index = LLVMBuildAdd(ctx->ac.builder,
-						    ac_get_arg(&ctx->ac,
-							       ctx->args->ac.start_instance),\
-						    buffer_index, "");
-		} else {
-			buffer_index = LLVMBuildAdd(ctx->ac.builder,
-						    ctx->abi.vertex_id,
-			                            ac_get_arg(&ctx->ac,
-							       ctx->args->ac.base_vertex), "");
-		}
-
-		const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
-
-		/* Adjust the number of channels to load based on the vertex
-		 * attribute format.
-		 */
-		unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
-		unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
-		unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
-		unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
-		unsigned alpha_adjust = ctx->args->options->key.vs.alpha_adjust[attrib_index];
-
-		if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
-			/* Always load, at least, 3 channels for formats that
-			 * need to be shuffled because X<->Z.
-			 */
-			num_channels = MAX2(num_channels, 3);
-		}
-
-		t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
-		t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
-
-		/* Always split typed vertex buffer loads on GFX6 and GFX10+
-		 * to avoid any alignment issues that triggers memory
-		 * violations and eventually a GPU hang. This can happen if
-		 * the stride (static or dynamic) is unaligned and also if the
-		 * VBO offset is aligned to a scalar (eg. stride is 8 and VBO
-		 * offset is 2 for R16G16B16A16_SNORM).
-		 */
-		if (ctx->ac.chip_class == GFX6 ||
-		    ctx->ac.chip_class >= GFX10) {
-			unsigned chan_format = vtx_info->chan_format;
-			LLVMValueRef values[4];
-
-			assert(ctx->ac.chip_class == GFX6 ||
-			       ctx->ac.chip_class >= GFX10);
-
-			for (unsigned chan  = 0; chan < num_channels; chan++) {
-				unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size;
-				LLVMValueRef chan_index = buffer_index;
-
-				if (attrib_stride != 0 && chan_offset > attrib_stride) {
-					LLVMValueRef buffer_offset =
-						LLVMConstInt(ctx->ac.i32,
-							     chan_offset / attrib_stride, false);
-
-					chan_index = LLVMBuildAdd(ctx->ac.builder,
-								  buffer_index,
-								  buffer_offset, "");
-
-					chan_offset = chan_offset % attrib_stride;
-				}
-
-				values[chan] = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
-									   chan_index,
-									   LLVMConstInt(ctx->ac.i32, chan_offset, false),
-									   ctx->ac.i32_0, ctx->ac.i32_0, 1,
-									   chan_format, num_format, 0, true);
-			}
-
-			input = ac_build_gather_values(&ctx->ac, values, num_channels);
-		} else {
-			if (attrib_stride != 0 && attrib_offset > attrib_stride) {
-				LLVMValueRef buffer_offset =
-					LLVMConstInt(ctx->ac.i32,
-						     attrib_offset / attrib_stride, false);
-
-				buffer_index = LLVMBuildAdd(ctx->ac.builder,
-							    buffer_index,
-							    buffer_offset, "");
-
-				attrib_offset = attrib_offset % attrib_stride;
-			}
-
-			input = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
-							     buffer_index,
-							     LLVMConstInt(ctx->ac.i32, attrib_offset, false),
-							     ctx->ac.i32_0, ctx->ac.i32_0,
-							     num_channels,
-							     data_format, num_format, 0, true);
-		}
-
-		if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
-			LLVMValueRef c[4];
-			c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
-			c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
-			c[2] = ac_llvm_extract_elem(&ctx->ac, input, 0);
-			c[3] = ac_llvm_extract_elem(&ctx->ac, input, 3);
-
-			input = ac_build_gather_values(&ctx->ac, c, 4);
-		}
-
-		input = radv_fixup_vertex_input_fetches(ctx, input, num_channels,
-							is_float);
-
-		for (unsigned chan = 0; chan < 4; chan++) {
-			LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
-			output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, "");
-			if (type == GLSL_TYPE_FLOAT16) {
-				output[chan] = LLVMBuildBitCast(ctx->ac.builder, output[chan], ctx->ac.f32, "");
-				output[chan] = LLVMBuildFPTrunc(ctx->ac.builder, output[chan], ctx->ac.f16, "");
-			}
-		}
-
-		output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]);
-
-		for (unsigned chan = 0; chan < 4; chan++) {
-			output[chan] = ac_to_integer(&ctx->ac, output[chan]);
-			if (type == GLSL_TYPE_UINT16 || type == GLSL_TYPE_INT16)
-				output[chan] = LLVMBuildTrunc(ctx->ac.builder, output[chan], ctx->ac.i16, "");
-
-			ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] = output[chan];
-		}
-	}
+handle_vs_input_decl(struct radv_shader_context *ctx, struct nir_variable *variable)
+{
+   LLVMValueRef t_list_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.vertex_buffers);
+   LLVMValueRef t_offset;
+   LLVMValueRef t_list;
+   LLVMValueRef input;
+   LLVMValueRef buffer_index;
+   unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
+
+   enum glsl_base_type type = glsl_get_base_type(variable->type);
+   for (unsigned i = 0; i < attrib_count; ++i) {
+      LLVMValueRef output[4];
+      unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0;
+      unsigned attrib_format = ctx->args->options->key.vs.vertex_attribute_formats[attrib_index];
+      unsigned data_format = attrib_format & 0x0f;
+      unsigned num_format = (attrib_format >> 4) & 0x07;
+      bool is_float =
+         num_format != V_008F0C_BUF_NUM_FORMAT_UINT && num_format != V_008F0C_BUF_NUM_FORMAT_SINT;
+      uint8_t input_usage_mask =
+         ctx->args->shader_info->vs.input_usage_mask[variable->data.location + i];
+      unsigned num_input_channels = util_last_bit(input_usage_mask);
+
+      if (num_input_channels == 0)
+         continue;
+
+      if (ctx->args->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
+         uint32_t divisor = ctx->args->options->key.vs.instance_rate_divisors[attrib_index];
+
+         if (divisor) {
+            buffer_index = ctx->abi.instance_id;
+
+            if (divisor != 1) {
+               buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
+                                            LLVMConstInt(ctx->ac.i32, divisor, 0), "");
+            }
+         } else {
+            buffer_index = ctx->ac.i32_0;
+         }
+
+         buffer_index = LLVMBuildAdd(
+            ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->ac.start_instance), buffer_index, "");
+      } else {
+         buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
+                                     ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex), "");
+      }
+
+      const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
+
+      /* Adjust the number of channels to load based on the vertex
+       * attribute format.
+       */
+      unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
+      unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
+      unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
+      unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
+      unsigned alpha_adjust = ctx->args->options->key.vs.alpha_adjust[attrib_index];
+
+      if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
+         /* Always load, at least, 3 channels for formats that
+          * need to be shuffled because X<->Z.
+          */
+         num_channels = MAX2(num_channels, 3);
+      }
+
+      t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
+      t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
+
+      /* Always split typed vertex buffer loads on GFX6 and GFX10+
+       * to avoid any alignment issues that triggers memory
+       * violations and eventually a GPU hang. This can happen if
+       * the stride (static or dynamic) is unaligned and also if the
+       * VBO offset is aligned to a scalar (eg. stride is 8 and VBO
+       * offset is 2 for R16G16B16A16_SNORM).
+       */
+      if (ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) {
+         unsigned chan_format = vtx_info->chan_format;
+         LLVMValueRef values[4];
+
+         assert(ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10);
+
+         for (unsigned chan = 0; chan < num_channels; chan++) {
+            unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size;
+            LLVMValueRef chan_index = buffer_index;
+
+            if (attrib_stride != 0 && chan_offset > attrib_stride) {
+               LLVMValueRef buffer_offset =
+                  LLVMConstInt(ctx->ac.i32, chan_offset / attrib_stride, false);
+
+               chan_index = LLVMBuildAdd(ctx->ac.builder, buffer_index, buffer_offset, "");
+
+               chan_offset = chan_offset % attrib_stride;
+            }
+
+            values[chan] = ac_build_struct_tbuffer_load(
+               &ctx->ac, t_list, chan_index, LLVMConstInt(ctx->ac.i32, chan_offset, false),
+               ctx->ac.i32_0, ctx->ac.i32_0, 1, chan_format, num_format, 0, true);
+         }
+
+         input = ac_build_gather_values(&ctx->ac, values, num_channels);
+      } else {
+         if (attrib_stride != 0 && attrib_offset > attrib_stride) {
+            LLVMValueRef buffer_offset =
+               LLVMConstInt(ctx->ac.i32, attrib_offset / attrib_stride, false);
+
+            buffer_index = LLVMBuildAdd(ctx->ac.builder, buffer_index, buffer_offset, "");
+
+            attrib_offset = attrib_offset % attrib_stride;
+         }
+
+         input = ac_build_struct_tbuffer_load(
+            &ctx->ac, t_list, buffer_index, LLVMConstInt(ctx->ac.i32, attrib_offset, false),
+            ctx->ac.i32_0, ctx->ac.i32_0, num_channels, data_format, num_format, 0, true);
+      }
+
+      if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
+         LLVMValueRef c[4];
+         c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
+         c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
+         c[2] = ac_llvm_extract_elem(&ctx->ac, input, 0);
+         c[3] = ac_llvm_extract_elem(&ctx->ac, input, 3);
+
+         input = ac_build_gather_values(&ctx->ac, c, 4);
+      }
+
+      input = radv_fixup_vertex_input_fetches(ctx, input, num_channels, is_float);
+
+      for (unsigned chan = 0; chan < 4; chan++) {
+         LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
+         output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, "");
+         if (type == GLSL_TYPE_FLOAT16) {
+            output[chan] = LLVMBuildBitCast(ctx->ac.builder, output[chan], ctx->ac.f32, "");
+            output[chan] = LLVMBuildFPTrunc(ctx->ac.builder, output[chan], ctx->ac.f16, "");
+         }
+      }
+
+      output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]);
+
+      for (unsigned chan = 0; chan < 4; chan++) {
+         output[chan] = ac_to_integer(&ctx->ac, output[chan]);
+         if (type == GLSL_TYPE_UINT16 || type == GLSL_TYPE_INT16)
+            output[chan] = LLVMBuildTrunc(ctx->ac.builder, output[chan], ctx->ac.i16, "");
+
+         ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] = output[chan];
+      }
+   }
 }
 
 static void
-handle_vs_inputs(struct radv_shader_context *ctx,
-                 struct nir_shader *nir) {
-	nir_foreach_shader_in_variable(variable, nir)
-		handle_vs_input_decl(ctx, variable);
+handle_vs_inputs(struct radv_shader_context *ctx, struct nir_shader *nir)
+{
+   nir_foreach_shader_in_variable (variable, nir)
+      handle_vs_input_decl(ctx, variable);
 }
 
 static void
-prepare_interp_optimize(struct radv_shader_context *ctx,
-                        struct nir_shader *nir)
-{
-	bool uses_center = false;
-	bool uses_centroid = false;
-	nir_foreach_shader_in_variable(variable, nir) {
-		if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT ||
-		    variable->data.sample)
-			continue;
-
-		if (variable->data.centroid)
-			uses_centroid = true;
-		else
-			uses_center = true;
-	}
-
-	ctx->abi.persp_centroid = ac_get_arg(&ctx->ac, ctx->args->ac.persp_centroid);
-	ctx->abi.linear_centroid = ac_get_arg(&ctx->ac, ctx->args->ac.linear_centroid);
-
-	if (uses_center && uses_centroid) {
-		LLVMValueRef sel = LLVMBuildICmp(ctx->ac.builder, LLVMIntSLT,
-						 ac_get_arg(&ctx->ac, ctx->args->ac.prim_mask),
-						 ctx->ac.i32_0, "");
-		ctx->abi.persp_centroid =
-			LLVMBuildSelect(ctx->ac.builder, sel,
-					ac_get_arg(&ctx->ac, ctx->args->ac.persp_center),
-					ctx->abi.persp_centroid, "");
-		ctx->abi.linear_centroid =
-			LLVMBuildSelect(ctx->ac.builder, sel,
-					ac_get_arg(&ctx->ac, ctx->args->ac.linear_center),
-					ctx->abi.linear_centroid, "");
-	}
+prepare_interp_optimize(struct radv_shader_context *ctx, struct nir_shader *nir)
+{
+   bool uses_center = false;
+   bool uses_centroid = false;
+   nir_foreach_shader_in_variable (variable, nir) {
+      if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT ||
+          variable->data.sample)
+         continue;
+
+      if (variable->data.centroid)
+         uses_centroid = true;
+      else
+         uses_center = true;
+   }
+
+   ctx->abi.persp_centroid = ac_get_arg(&ctx->ac, ctx->args->ac.persp_centroid);
+   ctx->abi.linear_centroid = ac_get_arg(&ctx->ac, ctx->args->ac.linear_centroid);
+
+   if (uses_center && uses_centroid) {
+      LLVMValueRef sel =
+         LLVMBuildICmp(ctx->ac.builder, LLVMIntSLT, ac_get_arg(&ctx->ac, ctx->args->ac.prim_mask),
+                       ctx->ac.i32_0, "");
+      ctx->abi.persp_centroid =
+         LLVMBuildSelect(ctx->ac.builder, sel, ac_get_arg(&ctx->ac, ctx->args->ac.persp_center),
+                         ctx->abi.persp_centroid, "");
+      ctx->abi.linear_centroid =
+         LLVMBuildSelect(ctx->ac.builder, sel, ac_get_arg(&ctx->ac, ctx->args->ac.linear_center),
+                         ctx->abi.linear_centroid, "");
+   }
 }
 
 static void
-scan_shader_output_decl(struct radv_shader_context *ctx,
-			struct nir_variable *variable,
-			struct nir_shader *shader,
-			gl_shader_stage stage)
+scan_shader_output_decl(struct radv_shader_context *ctx, struct nir_variable *variable,
+                        struct nir_shader *shader, gl_shader_stage stage)
 {
-	int idx = variable->data.driver_location;
-	unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
-	uint64_t mask_attribs;
+   int idx = variable->data.driver_location;
+   unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+   uint64_t mask_attribs;
 
-	if (variable->data.compact) {
-		unsigned component_count = variable->data.location_frac +
-		                           glsl_get_length(variable->type);
-		attrib_count = (component_count + 3) / 4;
-	}
+   if (variable->data.compact) {
+      unsigned component_count = variable->data.location_frac + glsl_get_length(variable->type);
+      attrib_count = (component_count + 3) / 4;
+   }
 
-	mask_attribs = ((1ull << attrib_count) - 1) << idx;
+   mask_attribs = ((1ull << attrib_count) - 1) << idx;
 
-	ctx->output_mask |= mask_attribs;
+   ctx->output_mask |= mask_attribs;
 }
 
-
 /* Initialize arguments for the shader export intrinsic */
 static void
-si_llvm_init_export_args(struct radv_shader_context *ctx,
-			 LLVMValueRef *values,
-			 unsigned enabled_channels,
-			 unsigned target,
-			 struct ac_export_args *args)
-{
-	/* Specify the channels that are enabled. */
-	args->enabled_channels = enabled_channels;
-
-	/* Specify whether the EXEC mask represents the valid mask */
-	args->valid_mask = 0;
-
-	/* Specify whether this is the last export */
-	args->done = 0;
-
-	/* Specify the target we are exporting */
-	args->target = target;
-
-	args->compr = false;
-	args->out[0] = LLVMGetUndef(ctx->ac.f32);
-	args->out[1] = LLVMGetUndef(ctx->ac.f32);
-	args->out[2] = LLVMGetUndef(ctx->ac.f32);
-	args->out[3] = LLVMGetUndef(ctx->ac.f32);
-
-	if (!values)
-		return;
-
-	bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
-	if (ctx->stage == MESA_SHADER_FRAGMENT) {
-		unsigned index = target - V_008DFC_SQ_EXP_MRT;
-		unsigned col_format = (ctx->args->options->key.fs.col_format >> (4 * index)) & 0xf;
-		bool is_int8 = (ctx->args->options->key.fs.is_int8 >> index) & 1;
-		bool is_int10 = (ctx->args->options->key.fs.is_int10 >> index) & 1;
-
-		LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef args[2]) = NULL;
-		LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2],
-				      unsigned bits, bool hi) = NULL;
-
-		switch(col_format) {
-		case V_028714_SPI_SHADER_ZERO:
-			args->enabled_channels = 0; /* writemask */
-			args->target = V_008DFC_SQ_EXP_NULL;
-			break;
-
-		case V_028714_SPI_SHADER_32_R:
-			args->enabled_channels = 1;
-			args->out[0] = values[0];
-			break;
-
-		case V_028714_SPI_SHADER_32_GR:
-			args->enabled_channels = 0x3;
-			args->out[0] = values[0];
-			args->out[1] = values[1];
-			break;
-
-		case V_028714_SPI_SHADER_32_AR:
-			if (ctx->ac.chip_class >= GFX10) {
-				args->enabled_channels = 0x3;
-				args->out[0] = values[0];
-				args->out[1] = values[3];
-			} else {
-				args->enabled_channels = 0x9;
-				args->out[0] = values[0];
-				args->out[3] = values[3];
-			}
-			break;
-
-		case V_028714_SPI_SHADER_FP16_ABGR:
-			args->enabled_channels = 0xf;
-			packf = ac_build_cvt_pkrtz_f16;
-			if (is_16bit) {
-				for (unsigned chan = 0; chan < 4; chan++)
-					values[chan] = LLVMBuildFPExt(ctx->ac.builder,
-								      values[chan],
-								      ctx->ac.f32, "");
-			}
-			break;
-
-		case V_028714_SPI_SHADER_UNORM16_ABGR:
-			args->enabled_channels = 0xf;
-			packf = ac_build_cvt_pknorm_u16;
-			break;
-
-		case V_028714_SPI_SHADER_SNORM16_ABGR:
-			args->enabled_channels = 0xf;
-			packf = ac_build_cvt_pknorm_i16;
-			break;
-
-		case V_028714_SPI_SHADER_UINT16_ABGR:
-			args->enabled_channels = 0xf;
-			packi = ac_build_cvt_pk_u16;
-			if (is_16bit) {
-				for (unsigned chan = 0; chan < 4; chan++)
-					values[chan] = LLVMBuildZExt(ctx->ac.builder,
-								      ac_to_integer(&ctx->ac, values[chan]),
-								      ctx->ac.i32, "");
-			}
-			break;
-
-		case V_028714_SPI_SHADER_SINT16_ABGR:
-			args->enabled_channels = 0xf;
-			packi = ac_build_cvt_pk_i16;
-			if (is_16bit) {
-				for (unsigned chan = 0; chan < 4; chan++)
-					values[chan] = LLVMBuildSExt(ctx->ac.builder,
-								      ac_to_integer(&ctx->ac, values[chan]),
-								      ctx->ac.i32, "");
-			}
-			break;
-
-		default:
-		case V_028714_SPI_SHADER_32_ABGR:
-			memcpy(&args->out[0], values, sizeof(values[0]) * 4);
-			break;
-		}
-
-		/* Replace NaN by zero (only 32-bit) to fix game bugs if
-		 * requested.
-		 */
-		if (ctx->args->options->enable_mrt_output_nan_fixup &&
-		    !is_16bit &&
-		    (col_format == V_028714_SPI_SHADER_32_R ||
-		     col_format == V_028714_SPI_SHADER_32_GR ||
-		     col_format == V_028714_SPI_SHADER_32_AR ||
-		     col_format == V_028714_SPI_SHADER_32_ABGR ||
-		     col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
-			for (unsigned i = 0; i < 4; i++) {
-				LLVMValueRef class_args[2] = {
-					values[i],
-					LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN, false)
-				};
-				LLVMValueRef isnan =
-					ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
-		                                           class_args, 2, AC_FUNC_ATTR_READNONE);
-				values[i] = LLVMBuildSelect(ctx->ac.builder, isnan,
-							    ctx->ac.f32_0,
-							    values[i], "");
-			}
-		}
-
-		/* Pack f16 or norm_i16/u16. */
-		if (packf) {
-			for (unsigned chan = 0; chan < 2; chan++) {
-				LLVMValueRef pack_args[2] = {
-					values[2 * chan],
-					values[2 * chan + 1]
-				};
-				LLVMValueRef packed;
-
-				packed = packf(&ctx->ac, pack_args);
-				args->out[chan] = ac_to_float(&ctx->ac, packed);
-			}
-			args->compr = 1; /* COMPR flag */
-		}
-
-		/* Pack i16/u16. */
-		if (packi) {
-			for (unsigned chan = 0; chan < 2; chan++) {
-				LLVMValueRef pack_args[2] = {
-					ac_to_integer(&ctx->ac, values[2 * chan]),
-					ac_to_integer(&ctx->ac, values[2 * chan + 1])
-				};
-				LLVMValueRef packed;
-
-				packed = packi(&ctx->ac, pack_args,
-					       is_int8 ? 8 : is_int10 ? 10 : 16,
-					       chan == 1);
-				args->out[chan] = ac_to_float(&ctx->ac, packed);
-			}
-			args->compr = 1; /* COMPR flag */
-		}
-		return;
-	}
-
-	if (is_16bit) {
-		for (unsigned chan = 0; chan < 4; chan++) {
-			values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i16, "");
-			args->out[chan] = LLVMBuildZExt(ctx->ac.builder, values[chan], ctx->ac.i32, "");
-		}
-	} else
-		memcpy(&args->out[0], values, sizeof(values[0]) * 4);
-
-	for (unsigned i = 0; i < 4; ++i)
-		args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
+si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values,
+                         unsigned enabled_channels, unsigned target, struct ac_export_args *args)
+{
+   /* Specify the channels that are enabled. */
+   args->enabled_channels = enabled_channels;
+
+   /* Specify whether the EXEC mask represents the valid mask */
+   args->valid_mask = 0;
+
+   /* Specify whether this is the last export */
+   args->done = 0;
+
+   /* Specify the target we are exporting */
+   args->target = target;
+
+   args->compr = false;
+   args->out[0] = LLVMGetUndef(ctx->ac.f32);
+   args->out[1] = LLVMGetUndef(ctx->ac.f32);
+   args->out[2] = LLVMGetUndef(ctx->ac.f32);
+   args->out[3] = LLVMGetUndef(ctx->ac.f32);
+
+   if (!values)
+      return;
+
+   bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
+   if (ctx->stage == MESA_SHADER_FRAGMENT) {
+      unsigned index = target - V_008DFC_SQ_EXP_MRT;
+      unsigned col_format = (ctx->args->options->key.fs.col_format >> (4 * index)) & 0xf;
+      bool is_int8 = (ctx->args->options->key.fs.is_int8 >> index) & 1;
+      bool is_int10 = (ctx->args->options->key.fs.is_int10 >> index) & 1;
+
+      LLVMValueRef (*packf)(struct ac_llvm_context * ctx, LLVMValueRef args[2]) = NULL;
+      LLVMValueRef (*packi)(struct ac_llvm_context * ctx, LLVMValueRef args[2], unsigned bits,
+                            bool hi) = NULL;
+
+      switch (col_format) {
+      case V_028714_SPI_SHADER_ZERO:
+         args->enabled_channels = 0; /* writemask */
+         args->target = V_008DFC_SQ_EXP_NULL;
+         break;
+
+      case V_028714_SPI_SHADER_32_R:
+         args->enabled_channels = 1;
+         args->out[0] = values[0];
+         break;
+
+      case V_028714_SPI_SHADER_32_GR:
+         args->enabled_channels = 0x3;
+         args->out[0] = values[0];
+         args->out[1] = values[1];
+         break;
+
+      case V_028714_SPI_SHADER_32_AR:
+         if (ctx->ac.chip_class >= GFX10) {
+            args->enabled_channels = 0x3;
+            args->out[0] = values[0];
+            args->out[1] = values[3];
+         } else {
+            args->enabled_channels = 0x9;
+            args->out[0] = values[0];
+            args->out[3] = values[3];
+         }
+         break;
+
+      case V_028714_SPI_SHADER_FP16_ABGR:
+         args->enabled_channels = 0xf;
+         packf = ac_build_cvt_pkrtz_f16;
+         if (is_16bit) {
+            for (unsigned chan = 0; chan < 4; chan++)
+               values[chan] = LLVMBuildFPExt(ctx->ac.builder, values[chan], ctx->ac.f32, "");
+         }
+         break;
+
+      case V_028714_SPI_SHADER_UNORM16_ABGR:
+         args->enabled_channels = 0xf;
+         packf = ac_build_cvt_pknorm_u16;
+         break;
+
+      case V_028714_SPI_SHADER_SNORM16_ABGR:
+         args->enabled_channels = 0xf;
+         packf = ac_build_cvt_pknorm_i16;
+         break;
+
+      case V_028714_SPI_SHADER_UINT16_ABGR:
+         args->enabled_channels = 0xf;
+         packi = ac_build_cvt_pk_u16;
+         if (is_16bit) {
+            for (unsigned chan = 0; chan < 4; chan++)
+               values[chan] = LLVMBuildZExt(ctx->ac.builder, ac_to_integer(&ctx->ac, values[chan]),
+                                            ctx->ac.i32, "");
+         }
+         break;
+
+      case V_028714_SPI_SHADER_SINT16_ABGR:
+         args->enabled_channels = 0xf;
+         packi = ac_build_cvt_pk_i16;
+         if (is_16bit) {
+            for (unsigned chan = 0; chan < 4; chan++)
+               values[chan] = LLVMBuildSExt(ctx->ac.builder, ac_to_integer(&ctx->ac, values[chan]),
+                                            ctx->ac.i32, "");
+         }
+         break;
+
+      default:
+      case V_028714_SPI_SHADER_32_ABGR:
+         memcpy(&args->out[0], values, sizeof(values[0]) * 4);
+         break;
+      }
+
+      /* Replace NaN by zero (only 32-bit) to fix game bugs if
+       * requested.
+       */
+      if (ctx->args->options->enable_mrt_output_nan_fixup && !is_16bit &&
+          (col_format == V_028714_SPI_SHADER_32_R || col_format == V_028714_SPI_SHADER_32_GR ||
+           col_format == V_028714_SPI_SHADER_32_AR || col_format == V_028714_SPI_SHADER_32_ABGR ||
+           col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
+         for (unsigned i = 0; i < 4; i++) {
+            LLVMValueRef class_args[2] = {values[i],
+                                          LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN, false)};
+            LLVMValueRef isnan = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
+                                                    class_args, 2, AC_FUNC_ATTR_READNONE);
+            values[i] = LLVMBuildSelect(ctx->ac.builder, isnan, ctx->ac.f32_0, values[i], "");
+         }
+      }
+
+      /* Pack f16 or norm_i16/u16. */
+      if (packf) {
+         for (unsigned chan = 0; chan < 2; chan++) {
+            LLVMValueRef pack_args[2] = {values[2 * chan], values[2 * chan + 1]};
+            LLVMValueRef packed;
+
+            packed = packf(&ctx->ac, pack_args);
+            args->out[chan] = ac_to_float(&ctx->ac, packed);
+         }
+         args->compr = 1; /* COMPR flag */
+      }
+
+      /* Pack i16/u16. */
+      if (packi) {
+         for (unsigned chan = 0; chan < 2; chan++) {
+            LLVMValueRef pack_args[2] = {ac_to_integer(&ctx->ac, values[2 * chan]),
+                                         ac_to_integer(&ctx->ac, values[2 * chan + 1])};
+            LLVMValueRef packed;
+
+            packed = packi(&ctx->ac, pack_args, is_int8 ? 8 : is_int10 ? 10 : 16, chan == 1);
+            args->out[chan] = ac_to_float(&ctx->ac, packed);
+         }
+         args->compr = 1; /* COMPR flag */
+      }
+      return;
+   }
+
+   if (is_16bit) {
+      for (unsigned chan = 0; chan < 4; chan++) {
+         values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i16, "");
+         args->out[chan] = LLVMBuildZExt(ctx->ac.builder, values[chan], ctx->ac.i32, "");
+      }
+   } else
+      memcpy(&args->out[0], values, sizeof(values[0]) * 4);
+
+   for (unsigned i = 0; i < 4; ++i)
+      args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
 }
 
 static void
-radv_export_param(struct radv_shader_context *ctx, unsigned index,
-		  LLVMValueRef *values, unsigned enabled_channels)
+radv_export_param(struct radv_shader_context *ctx, unsigned index, LLVMValueRef *values,
+                  unsigned enabled_channels)
 {
-	struct ac_export_args args;
+   struct ac_export_args args;
 
-	si_llvm_init_export_args(ctx, values, enabled_channels,
-				 V_008DFC_SQ_EXP_PARAM + index, &args);
-	ac_build_export(&ctx->ac, &args);
+   si_llvm_init_export_args(ctx, values, enabled_channels, V_008DFC_SQ_EXP_PARAM + index, &args);
+   ac_build_export(&ctx->ac, &args);
 }
 
 static LLVMValueRef
 radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
 {
-	LLVMValueRef output = ctx->abi.outputs[ac_llvm_reg_index_soa(index, chan)];
-	return LLVMBuildLoad(ctx->ac.builder, output, "");
+   LLVMValueRef output = ctx->abi.outputs[ac_llvm_reg_index_soa(index, chan)];
+   return LLVMBuildLoad(ctx->ac.builder, output, "");
 }
 
 static void
-radv_emit_stream_output(struct radv_shader_context *ctx,
-			 LLVMValueRef const *so_buffers,
-			 LLVMValueRef const *so_write_offsets,
-			 const struct radv_stream_output *output,
-			 struct radv_shader_output_values *shader_out)
-{
-	unsigned num_comps = util_bitcount(output->component_mask);
-	unsigned buf = output->buffer;
-	unsigned offset = output->offset;
-	unsigned start;
-	LLVMValueRef out[4];
-
-	assert(num_comps && num_comps <= 4);
-	if (!num_comps || num_comps > 4)
-		return;
-
-	/* Get the first component. */
-	start = ffs(output->component_mask) - 1;
-
-	/* Load the output as int. */
-	for (int i = 0; i < num_comps; i++) {
-		out[i] = ac_to_integer(&ctx->ac, shader_out->values[start + i]);
-	}
-
-	/* Pack the output. */
-	LLVMValueRef vdata = NULL;
-
-	switch (num_comps) {
-	case 1: /* as i32 */
-		vdata = out[0];
-		break;
-	case 2: /* as v2i32 */
-	case 3: /* as v4i32 (aligned to 4) */
-		out[3] = LLVMGetUndef(ctx->ac.i32);
-		/* fall through */
-	case 4: /* as v4i32 */
-		vdata = ac_build_gather_values(&ctx->ac, out,
-					       !ac_has_vec3_support(ctx->ac.chip_class, false) ?
-					       util_next_power_of_two(num_comps) :
-					       num_comps);
-		break;
-	}
-
-	ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf],
-				    vdata, num_comps, so_write_offsets[buf],
-				    ctx->ac.i32_0, offset,
-				    ac_glc | ac_slc);
+radv_emit_stream_output(struct radv_shader_context *ctx, LLVMValueRef const *so_buffers,
+                        LLVMValueRef const *so_write_offsets,
+                        const struct radv_stream_output *output,
+                        struct radv_shader_output_values *shader_out)
+{
+   unsigned num_comps = util_bitcount(output->component_mask);
+   unsigned buf = output->buffer;
+   unsigned offset = output->offset;
+   unsigned start;
+   LLVMValueRef out[4];
+
+   assert(num_comps && num_comps <= 4);
+   if (!num_comps || num_comps > 4)
+      return;
+
+   /* Get the first component. */
+   start = ffs(output->component_mask) - 1;
+
+   /* Load the output as int. */
+   for (int i = 0; i < num_comps; i++) {
+      out[i] = ac_to_integer(&ctx->ac, shader_out->values[start + i]);
+   }
+
+   /* Pack the output. */
+   LLVMValueRef vdata = NULL;
+
+   switch (num_comps) {
+   case 1: /* as i32 */
+      vdata = out[0];
+      break;
+   case 2: /* as v2i32 */
+   case 3: /* as v4i32 (aligned to 4) */
+      out[3] = LLVMGetUndef(ctx->ac.i32);
+      /* fall through */
+   case 4: /* as v4i32 */
+      vdata = ac_build_gather_values(&ctx->ac, out,
+                                     !ac_has_vec3_support(ctx->ac.chip_class, false)
+                                        ? util_next_power_of_two(num_comps)
+                                        : num_comps);
+      break;
+   }
+
+   ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf], vdata, num_comps, so_write_offsets[buf],
+                               ctx->ac.i32_0, offset, ac_glc | ac_slc);
 }
 
 static void
 radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream)
 {
-	int i;
-
-	/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
-	assert(ctx->args->ac.streamout_config.used);
-	LLVMValueRef so_vtx_count =
-		ac_build_bfe(&ctx->ac,
-			     ac_get_arg(&ctx->ac, ctx->args->ac.streamout_config),
-			     LLVMConstInt(ctx->ac.i32, 16, false),
-			     LLVMConstInt(ctx->ac.i32, 7, false), false);
-
-	LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
-
-	/* can_emit = tid < so_vtx_count; */
-	LLVMValueRef can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
-					      tid, so_vtx_count, "");
-
-	/* Emit the streamout code conditionally. This actually avoids
-	 * out-of-bounds buffer access. The hw tells us via the SGPR
-	 * (so_vtx_count) which threads are allowed to emit streamout data.
-	 */
-	ac_build_ifcc(&ctx->ac, can_emit, 6501);
-	{
-		/* The buffer offset is computed as follows:
-		 *   ByteOffset = streamout_offset[buffer_id]*4 +
-		 *                (streamout_write_index + thread_id)*stride[buffer_id] +
-		 *                attrib_offset
-		 */
-		LLVMValueRef so_write_index =
-			ac_get_arg(&ctx->ac, ctx->args->ac.streamout_write_index);
-
-		/* Compute (streamout_write_index + thread_id). */
-		so_write_index =
-			LLVMBuildAdd(ctx->ac.builder, so_write_index, tid, "");
-
-		/* Load the descriptor and compute the write offset for each
-		 * enabled buffer.
-		 */
-		LLVMValueRef so_write_offset[4] = {0};
-		LLVMValueRef so_buffers[4] = {0};
-		LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->args->streamout_buffers);
-
-		for (i = 0; i < 4; i++) {
-			uint16_t stride = ctx->args->shader_info->so.strides[i];
-
-			if (!stride)
-				continue;
-
-			LLVMValueRef offset =
-				LLVMConstInt(ctx->ac.i32, i, false);
-
-			so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac,
-							      buf_ptr, offset);
-
-			LLVMValueRef so_offset =
-				ac_get_arg(&ctx->ac, ctx->args->ac.streamout_offset[i]);
-
-			so_offset = LLVMBuildMul(ctx->ac.builder, so_offset,
-						 LLVMConstInt(ctx->ac.i32, 4, false), "");
-
-			so_write_offset[i] =
-				ac_build_imad(&ctx->ac, so_write_index,
-					      LLVMConstInt(ctx->ac.i32,
-							   stride * 4, false),
-					      so_offset);
-		}
-
-		/* Write streamout data. */
-		for (i = 0; i < ctx->args->shader_info->so.num_outputs; i++) {
-			struct radv_shader_output_values shader_out = {0};
-			struct radv_stream_output *output =
-				&ctx->args->shader_info->so.outputs[i];
-
-			if (stream != output->stream)
-				continue;
-
-			for (int j = 0; j < 4; j++) {
-				shader_out.values[j] =
-					radv_load_output(ctx, output->location, j);
-			}
-
-			radv_emit_stream_output(ctx, so_buffers,so_write_offset,
-						output, &shader_out);
-		}
-	}
-	ac_build_endif(&ctx->ac, 6501);
-}
+   int i;
 
-static void
-radv_build_param_exports(struct radv_shader_context *ctx,
-			 struct radv_shader_output_values *outputs,
-			 unsigned noutput,
-			 struct radv_vs_output_info *outinfo,
-			 bool export_clip_dists)
-{
-	unsigned param_count = 0;
+   /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
+   assert(ctx->args->ac.streamout_config.used);
+   LLVMValueRef so_vtx_count = ac_build_bfe(
+      &ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.streamout_config),
+      LLVMConstInt(ctx->ac.i32, 16, false), LLVMConstInt(ctx->ac.i32, 7, false), false);
 
-	for (unsigned i = 0; i < noutput; i++) {
-		unsigned slot_name = outputs[i].slot_name;
-		unsigned usage_mask = outputs[i].usage_mask;
+   LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
 
-		if (slot_name != VARYING_SLOT_LAYER &&
-		    slot_name != VARYING_SLOT_PRIMITIVE_ID &&
-		    slot_name != VARYING_SLOT_VIEWPORT &&
-		    slot_name != VARYING_SLOT_CLIP_DIST0 &&
-		    slot_name != VARYING_SLOT_CLIP_DIST1 &&
-		    slot_name < VARYING_SLOT_VAR0)
-			continue;
+   /* can_emit = tid < so_vtx_count; */
+   LLVMValueRef can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, tid, so_vtx_count, "");
 
-		if ((slot_name == VARYING_SLOT_CLIP_DIST0 ||
-		     slot_name == VARYING_SLOT_CLIP_DIST1) && !export_clip_dists)
-			continue;
+   /* Emit the streamout code conditionally. This actually avoids
+    * out-of-bounds buffer access. The hw tells us via the SGPR
+    * (so_vtx_count) which threads are allowed to emit streamout data.
+    */
+   ac_build_ifcc(&ctx->ac, can_emit, 6501);
+   {
+      /* The buffer offset is computed as follows:
+       *   ByteOffset = streamout_offset[buffer_id]*4 +
+       *                (streamout_write_index + thread_id)*stride[buffer_id] +
+       *                attrib_offset
+       */
+      LLVMValueRef so_write_index = ac_get_arg(&ctx->ac, ctx->args->ac.streamout_write_index);
 
-		radv_export_param(ctx, param_count, outputs[i].values, usage_mask);
+      /* Compute (streamout_write_index + thread_id). */
+      so_write_index = LLVMBuildAdd(ctx->ac.builder, so_write_index, tid, "");
 
-		assert(i < ARRAY_SIZE(outinfo->vs_output_param_offset));
-		outinfo->vs_output_param_offset[slot_name] = param_count++;
-        }
+      /* Load the descriptor and compute the write offset for each
+       * enabled buffer.
+       */
+      LLVMValueRef so_write_offset[4] = {0};
+      LLVMValueRef so_buffers[4] = {0};
+      LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->args->streamout_buffers);
 
-	outinfo->param_exports = param_count;
-}
+      for (i = 0; i < 4; i++) {
+         uint16_t stride = ctx->args->shader_info->so.strides[i];
 
-/* Generate export instructions for hardware VS shader stage or NGG GS stage
- * (position and parameter data only).
- */
-static void
-radv_llvm_export_vs(struct radv_shader_context *ctx,
-                    struct radv_shader_output_values *outputs,
-                    unsigned noutput,
-                    struct radv_vs_output_info *outinfo,
-		    bool export_clip_dists)
-{
-	LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_value = NULL;
-	LLVMValueRef primitive_shading_rate = NULL;
-	struct ac_export_args pos_args[4] = {0};
-	unsigned pos_idx, index;
-	int i;
-
-	/* Build position exports */
-	for (i = 0; i < noutput; i++) {
-		switch (outputs[i].slot_name) {
-		case VARYING_SLOT_POS:
-			si_llvm_init_export_args(ctx, outputs[i].values, 0xf,
-						 V_008DFC_SQ_EXP_POS, &pos_args[0]);
-			break;
-		case VARYING_SLOT_PSIZ:
-			psize_value = outputs[i].values[0];
-			break;
-		case VARYING_SLOT_LAYER:
-			layer_value = outputs[i].values[0];
-			break;
-		case VARYING_SLOT_VIEWPORT:
-			viewport_value = outputs[i].values[0];
-			break;
-		case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
-			primitive_shading_rate = outputs[i].values[0];
-			break;
-		case VARYING_SLOT_CLIP_DIST0:
-		case VARYING_SLOT_CLIP_DIST1:
-			index = 2 + outputs[i].slot_index;
-			si_llvm_init_export_args(ctx, outputs[i].values, 0xf,
-						 V_008DFC_SQ_EXP_POS + index,
-						 &pos_args[index]);
-			break;
-		default:
-			break;
-		}
-	}
-
-	/* We need to add the position output manually if it's missing. */
-	if (!pos_args[0].out[0]) {
-		pos_args[0].enabled_channels = 0xf; /* writemask */
-		pos_args[0].valid_mask = 0; /* EXEC mask */
-		pos_args[0].done = 0; /* last export? */
-		pos_args[0].target = V_008DFC_SQ_EXP_POS;
-		pos_args[0].compr = 0; /* COMPR flag */
-		pos_args[0].out[0] = ctx->ac.f32_0; /* X */
-		pos_args[0].out[1] = ctx->ac.f32_0; /* Y */
-		pos_args[0].out[2] = ctx->ac.f32_0; /* Z */
-		pos_args[0].out[3] = ctx->ac.f32_1;  /* W */
-	}
-
-	if (outinfo->writes_pointsize ||
-	    outinfo->writes_layer ||
-	    outinfo->writes_layer ||
-	    outinfo->writes_viewport_index ||
-	    outinfo->writes_primitive_shading_rate) {
-		pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
-						(outinfo->writes_primitive_shading_rate == true ? 2 : 0) |
-						(outinfo->writes_layer == true ? 4 : 0));
-		pos_args[1].valid_mask = 0;
-		pos_args[1].done = 0;
-		pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
-		pos_args[1].compr = 0;
-		pos_args[1].out[0] = ctx->ac.f32_0; /* X */
-		pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
-		pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
-		pos_args[1].out[3] = ctx->ac.f32_0;  /* W */
-
-		if (outinfo->writes_pointsize == true)
-			pos_args[1].out[0] = psize_value;
-		if (outinfo->writes_layer == true)
-			pos_args[1].out[2] = layer_value;
-		if (outinfo->writes_viewport_index == true) {
-			if (ctx->args->options->chip_class >= GFX9) {
-				/* GFX9 has the layer in out.z[10:0] and the viewport
-				 * index in out.z[19:16].
-				 */
-				LLVMValueRef v = viewport_value;
-				v = ac_to_integer(&ctx->ac, v);
-				v = LLVMBuildShl(ctx->ac.builder, v,
-						 LLVMConstInt(ctx->ac.i32, 16, false),
-						 "");
-				v = LLVMBuildOr(ctx->ac.builder, v,
-						ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
-
-				pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
-				pos_args[1].enabled_channels |= 1 << 2;
-			} else {
-				pos_args[1].out[3] = viewport_value;
-				pos_args[1].enabled_channels |= 1 << 3;
-			}
-		}
-
-		if (outinfo->writes_primitive_shading_rate) {
-			LLVMValueRef v = ac_to_integer(&ctx->ac, primitive_shading_rate);
-			LLVMValueRef cond;
-
-			/* xRate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */
-			LLVMValueRef x_rate =
-				LLVMBuildAnd(ctx->ac.builder, v,
-					     LLVMConstInt(ctx->ac.i32, 4 | 8, false), "");
-			cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, x_rate, ctx->ac.i32_0, "");
-			x_rate = LLVMBuildSelect(ctx->ac.builder, cond,
-						 ctx->ac.i32_1, ctx->ac.i32_0, "");
-
-			/* yRate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */
-			LLVMValueRef y_rate =
-				LLVMBuildAnd(ctx->ac.builder, v,
-					     LLVMConstInt(ctx->ac.i32, 1 | 2, false), "");
-			cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, y_rate, ctx->ac.i32_0, "");
-			y_rate = LLVMBuildSelect(ctx->ac.builder, cond,
-						 ctx->ac.i32_1, ctx->ac.i32_0, "");
-
-			/* Bits [2:3] = VRS rate X
-			 * Bits [4:5] = VRS rate Y
-			 * HW shading rate = (xRate << 2) | (yRate << 4)
-			 */
-			v = LLVMBuildOr(ctx->ac.builder,
-					LLVMBuildShl(ctx->ac.builder, x_rate,
-						      LLVMConstInt(ctx->ac.i32, 2, false), ""),
-					LLVMBuildShl(ctx->ac.builder, y_rate,
-						      LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
-			pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
-		}
-	}
-
-	for (i = 0; i < 4; i++) {
-		if (pos_args[i].out[0])
-			outinfo->pos_exports++;
-	}
-
-	/* GFX10 skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
-	 * Setting valid_mask=1 prevents it and has no other effect.
-	 */
-	if (ctx->ac.chip_class == GFX10)
-		pos_args[0].valid_mask = 1;
-
-	pos_idx = 0;
-	for (i = 0; i < 4; i++) {
-		if (!pos_args[i].out[0])
-			continue;
-
-		/* Specify the target we are exporting */
-		pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
-
-		if (pos_idx == outinfo->pos_exports)
-			/* Specify that this is the last export */
-			pos_args[i].done = 1;
-
-		ac_build_export(&ctx->ac, &pos_args[i]);
-	}
-
-	/* Build parameter exports */
-	radv_build_param_exports(ctx, outputs, noutput, outinfo, export_clip_dists);
-}
+         if (!stride)
+            continue;
 
-static void
-handle_vs_outputs_post(struct radv_shader_context *ctx,
-		       bool export_prim_id,
-		       bool export_clip_dists,
-		       struct radv_vs_output_info *outinfo)
-{
-	struct radv_shader_output_values *outputs;
-	unsigned noutput = 0;
-
-	if (ctx->args->options->key.has_multiview_view_index) {
-		LLVMValueRef* tmp_out = &ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
-		if(!*tmp_out) {
-			for(unsigned i = 0; i < 4; ++i)
-				ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
-				            ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
-		}
+         LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i, false);
+
+         so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
+
+         LLVMValueRef so_offset = ac_get_arg(&ctx->ac, ctx->args->ac.streamout_offset[i]);
+
+         so_offset =
+            LLVMBuildMul(ctx->ac.builder, so_offset, LLVMConstInt(ctx->ac.i32, 4, false), "");
 
-		LLVMValueRef view_index = ac_get_arg(&ctx->ac, ctx->args->ac.view_index);
-		LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, view_index), *tmp_out);
-		ctx->output_mask |= 1ull << VARYING_SLOT_LAYER;
-	}
+         so_write_offset[i] = ac_build_imad(
+            &ctx->ac, so_write_index, LLVMConstInt(ctx->ac.i32, stride * 4, false), so_offset);
+      }
 
-	memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
-	       sizeof(outinfo->vs_output_param_offset));
-	outinfo->pos_exports = 0;
+      /* Write streamout data. */
+      for (i = 0; i < ctx->args->shader_info->so.num_outputs; i++) {
+         struct radv_shader_output_values shader_out = {0};
+         struct radv_stream_output *output = &ctx->args->shader_info->so.outputs[i];
 
-	if (!ctx->args->options->use_ngg_streamout &&
-	    ctx->args->shader_info->so.num_outputs &&
-	    !ctx->args->is_gs_copy_shader) {
-		/* The GS copy shader emission already emits streamout. */
-		radv_emit_streamout(ctx, 0);
-	}
+         if (stream != output->stream)
+            continue;
 
-	/* Allocate a temporary array for the output values. */
-	unsigned num_outputs = util_bitcount64(ctx->output_mask) + export_prim_id;
-	outputs = malloc(num_outputs * sizeof(outputs[0]));
+         for (int j = 0; j < 4; j++) {
+            shader_out.values[j] = radv_load_output(ctx, output->location, j);
+         }
 
-	for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
-		if (!(ctx->output_mask & (1ull << i)))
-			continue;
+         radv_emit_stream_output(ctx, so_buffers, so_write_offset, output, &shader_out);
+      }
+   }
+   ac_build_endif(&ctx->ac, 6501);
+}
 
-		outputs[noutput].slot_name = i;
-		outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
+static void
+radv_build_param_exports(struct radv_shader_context *ctx, struct radv_shader_output_values *outputs,
+                         unsigned noutput, struct radv_vs_output_info *outinfo,
+                         bool export_clip_dists)
+{
+   unsigned param_count = 0;
 
-		if (ctx->stage == MESA_SHADER_VERTEX &&
-		    !ctx->args->is_gs_copy_shader) {
-			outputs[noutput].usage_mask =
-				ctx->args->shader_info->vs.output_usage_mask[i];
-		} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
-			outputs[noutput].usage_mask =
-				ctx->args->shader_info->tes.output_usage_mask[i];
-		} else {
-			assert(ctx->args->is_gs_copy_shader);
-			outputs[noutput].usage_mask =
-				ctx->args->shader_info->gs.output_usage_mask[i];
-		}
+   for (unsigned i = 0; i < noutput; i++) {
+      unsigned slot_name = outputs[i].slot_name;
+      unsigned usage_mask = outputs[i].usage_mask;
 
-		for (unsigned j = 0; j < 4; j++) {
-			outputs[noutput].values[j] =
-				ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
-		}
+      if (slot_name != VARYING_SLOT_LAYER && slot_name != VARYING_SLOT_PRIMITIVE_ID &&
+          slot_name != VARYING_SLOT_VIEWPORT && slot_name != VARYING_SLOT_CLIP_DIST0 &&
+          slot_name != VARYING_SLOT_CLIP_DIST1 && slot_name < VARYING_SLOT_VAR0)
+         continue;
 
-		noutput++;
-	}
+      if ((slot_name == VARYING_SLOT_CLIP_DIST0 || slot_name == VARYING_SLOT_CLIP_DIST1) &&
+          !export_clip_dists)
+         continue;
 
-	/* Export PrimitiveID. */
-	if (export_prim_id) {
-		outputs[noutput].slot_name = VARYING_SLOT_PRIMITIVE_ID;
-		outputs[noutput].slot_index = 0;
-		outputs[noutput].usage_mask = 0x1;
-		if (ctx->stage == MESA_SHADER_TESS_EVAL)
-			outputs[noutput].values[0] =
-				ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id);
-		else
-			outputs[noutput].values[0] =
-				ac_get_arg(&ctx->ac, ctx->args->ac.vs_prim_id);
-		for (unsigned j = 1; j < 4; j++)
-			outputs[noutput].values[j] = ctx->ac.f32_0;
-		noutput++;
-	}
+      radv_export_param(ctx, param_count, outputs[i].values, usage_mask);
 
-	radv_llvm_export_vs(ctx, outputs, noutput, outinfo, export_clip_dists);
+      assert(i < ARRAY_SIZE(outinfo->vs_output_param_offset));
+      outinfo->vs_output_param_offset[slot_name] = param_count++;
+   }
 
-	free(outputs);
+   outinfo->param_exports = param_count;
 }
 
-static LLVMValueRef get_wave_id_in_tg(struct radv_shader_context *ctx)
+/* Generate export instructions for hardware VS shader stage or NGG GS stage
+ * (position and parameter data only).
+ */
+static void
+radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_values *outputs,
+                    unsigned noutput, struct radv_vs_output_info *outinfo, bool export_clip_dists)
+{
+   LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_value = NULL;
+   LLVMValueRef primitive_shading_rate = NULL;
+   struct ac_export_args pos_args[4] = {0};
+   unsigned pos_idx, index;
+   int i;
+
+   /* Build position exports */
+   for (i = 0; i < noutput; i++) {
+      switch (outputs[i].slot_name) {
+      case VARYING_SLOT_POS:
+         si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS, &pos_args[0]);
+         break;
+      case VARYING_SLOT_PSIZ:
+         psize_value = outputs[i].values[0];
+         break;
+      case VARYING_SLOT_LAYER:
+         layer_value = outputs[i].values[0];
+         break;
+      case VARYING_SLOT_VIEWPORT:
+         viewport_value = outputs[i].values[0];
+         break;
+      case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
+         primitive_shading_rate = outputs[i].values[0];
+         break;
+      case VARYING_SLOT_CLIP_DIST0:
+      case VARYING_SLOT_CLIP_DIST1:
+         index = 2 + outputs[i].slot_index;
+         si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS + index,
+                                  &pos_args[index]);
+         break;
+      default:
+         break;
+      }
+   }
+
+   /* We need to add the position output manually if it's missing. */
+   if (!pos_args[0].out[0]) {
+      pos_args[0].enabled_channels = 0xf; /* writemask */
+      pos_args[0].valid_mask = 0;         /* EXEC mask */
+      pos_args[0].done = 0;               /* last export? */
+      pos_args[0].target = V_008DFC_SQ_EXP_POS;
+      pos_args[0].compr = 0;              /* COMPR flag */
+      pos_args[0].out[0] = ctx->ac.f32_0; /* X */
+      pos_args[0].out[1] = ctx->ac.f32_0; /* Y */
+      pos_args[0].out[2] = ctx->ac.f32_0; /* Z */
+      pos_args[0].out[3] = ctx->ac.f32_1; /* W */
+   }
+
+   if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_layer ||
+       outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate) {
+      pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
+                                      (outinfo->writes_primitive_shading_rate == true ? 2 : 0) |
+                                      (outinfo->writes_layer == true ? 4 : 0));
+      pos_args[1].valid_mask = 0;
+      pos_args[1].done = 0;
+      pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
+      pos_args[1].compr = 0;
+      pos_args[1].out[0] = ctx->ac.f32_0; /* X */
+      pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
+      pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
+      pos_args[1].out[3] = ctx->ac.f32_0; /* W */
+
+      if (outinfo->writes_pointsize == true)
+         pos_args[1].out[0] = psize_value;
+      if (outinfo->writes_layer == true)
+         pos_args[1].out[2] = layer_value;
+      if (outinfo->writes_viewport_index == true) {
+         if (ctx->args->options->chip_class >= GFX9) {
+            /* GFX9 has the layer in out.z[10:0] and the viewport
+             * index in out.z[19:16].
+             */
+            LLVMValueRef v = viewport_value;
+            v = ac_to_integer(&ctx->ac, v);
+            v = LLVMBuildShl(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 16, false), "");
+            v = LLVMBuildOr(ctx->ac.builder, v, ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
+
+            pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
+            pos_args[1].enabled_channels |= 1 << 2;
+         } else {
+            pos_args[1].out[3] = viewport_value;
+            pos_args[1].enabled_channels |= 1 << 3;
+         }
+      }
+
+      if (outinfo->writes_primitive_shading_rate) {
+         LLVMValueRef v = ac_to_integer(&ctx->ac, primitive_shading_rate);
+         LLVMValueRef cond;
+
+         /* xRate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */
+         LLVMValueRef x_rate =
+            LLVMBuildAnd(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 4 | 8, false), "");
+         cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, x_rate, ctx->ac.i32_0, "");
+         x_rate = LLVMBuildSelect(ctx->ac.builder, cond, ctx->ac.i32_1, ctx->ac.i32_0, "");
+
+         /* yRate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */
+         LLVMValueRef y_rate =
+            LLVMBuildAnd(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 1 | 2, false), "");
+         cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, y_rate, ctx->ac.i32_0, "");
+         y_rate = LLVMBuildSelect(ctx->ac.builder, cond, ctx->ac.i32_1, ctx->ac.i32_0, "");
+
+         /* Bits [2:3] = VRS rate X
+          * Bits [4:5] = VRS rate Y
+          * HW shading rate = (xRate << 2) | (yRate << 4)
+          */
+         v = LLVMBuildOr(
+            ctx->ac.builder,
+            LLVMBuildShl(ctx->ac.builder, x_rate, LLVMConstInt(ctx->ac.i32, 2, false), ""),
+            LLVMBuildShl(ctx->ac.builder, y_rate, LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
+         pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
+      }
+   }
+
+   for (i = 0; i < 4; i++) {
+      if (pos_args[i].out[0])
+         outinfo->pos_exports++;
+   }
+
+   /* GFX10 skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
+    * Setting valid_mask=1 prevents it and has no other effect.
+    */
+   if (ctx->ac.chip_class == GFX10)
+      pos_args[0].valid_mask = 1;
+
+   pos_idx = 0;
+   for (i = 0; i < 4; i++) {
+      if (!pos_args[i].out[0])
+         continue;
+
+      /* Specify the target we are exporting */
+      pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
+
+      if (pos_idx == outinfo->pos_exports)
+         /* Specify that this is the last export */
+         pos_args[i].done = 1;
+
+      ac_build_export(&ctx->ac, &pos_args[i]);
+   }
+
+   /* Build parameter exports */
+   radv_build_param_exports(ctx, outputs, noutput, outinfo, export_clip_dists);
+}
+
+static void
+handle_vs_outputs_post(struct radv_shader_context *ctx, bool export_prim_id, bool export_clip_dists,
+                       struct radv_vs_output_info *outinfo)
+{
+   struct radv_shader_output_values *outputs;
+   unsigned noutput = 0;
+
+   if (ctx->args->options->key.has_multiview_view_index) {
+      LLVMValueRef *tmp_out = &ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+      if (!*tmp_out) {
+         for (unsigned i = 0; i < 4; ++i)
+            ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
+               ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
+      }
+
+      LLVMValueRef view_index = ac_get_arg(&ctx->ac, ctx->args->ac.view_index);
+      LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, view_index), *tmp_out);
+      ctx->output_mask |= 1ull << VARYING_SLOT_LAYER;
+   }
+
+   memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
+          sizeof(outinfo->vs_output_param_offset));
+   outinfo->pos_exports = 0;
+
+   if (!ctx->args->options->use_ngg_streamout && ctx->args->shader_info->so.num_outputs &&
+       !ctx->args->is_gs_copy_shader) {
+      /* The GS copy shader emission already emits streamout. */
+      radv_emit_streamout(ctx, 0);
+   }
+
+   /* Allocate a temporary array for the output values. */
+   unsigned num_outputs = util_bitcount64(ctx->output_mask) + export_prim_id;
+   outputs = malloc(num_outputs * sizeof(outputs[0]));
+
+   for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+      if (!(ctx->output_mask & (1ull << i)))
+         continue;
+
+      outputs[noutput].slot_name = i;
+      outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
+
+      if (ctx->stage == MESA_SHADER_VERTEX && !ctx->args->is_gs_copy_shader) {
+         outputs[noutput].usage_mask = ctx->args->shader_info->vs.output_usage_mask[i];
+      } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+         outputs[noutput].usage_mask = ctx->args->shader_info->tes.output_usage_mask[i];
+      } else {
+         assert(ctx->args->is_gs_copy_shader);
+         outputs[noutput].usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+      }
+
+      for (unsigned j = 0; j < 4; j++) {
+         outputs[noutput].values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
+      }
+
+      noutput++;
+   }
+
+   /* Export PrimitiveID. */
+   if (export_prim_id) {
+      outputs[noutput].slot_name = VARYING_SLOT_PRIMITIVE_ID;
+      outputs[noutput].slot_index = 0;
+      outputs[noutput].usage_mask = 0x1;
+      if (ctx->stage == MESA_SHADER_TESS_EVAL)
+         outputs[noutput].values[0] = ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id);
+      else
+         outputs[noutput].values[0] = ac_get_arg(&ctx->ac, ctx->args->ac.vs_prim_id);
+      for (unsigned j = 1; j < 4; j++)
+         outputs[noutput].values[j] = ctx->ac.f32_0;
+      noutput++;
+   }
+
+   radv_llvm_export_vs(ctx, outputs, noutput, outinfo, export_clip_dists);
+
+   free(outputs);
+}
+
+static LLVMValueRef
+get_wave_id_in_tg(struct radv_shader_context *ctx)
 {
-	return ac_unpack_param(&ctx->ac,
-			       ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 24, 4);
+   return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 24, 4);
 }
 
-static LLVMValueRef get_tgsize(struct radv_shader_context *ctx)
+static LLVMValueRef
+get_tgsize(struct radv_shader_context *ctx)
 {
-	return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 28, 4);
+   return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 28, 4);
 }
 
-static LLVMValueRef get_thread_id_in_tg(struct radv_shader_context *ctx)
+static LLVMValueRef
+get_thread_id_in_tg(struct radv_shader_context *ctx)
 {
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef tmp;
-	tmp = LLVMBuildMul(builder, get_wave_id_in_tg(ctx),
-			   LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false), "");
-	return LLVMBuildAdd(builder, tmp, ac_get_thread_id(&ctx->ac), "");
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef tmp;
+   tmp = LLVMBuildMul(builder, get_wave_id_in_tg(ctx),
+                      LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false), "");
+   return LLVMBuildAdd(builder, tmp, ac_get_thread_id(&ctx->ac), "");
 }
 
-static LLVMValueRef ngg_get_vtx_cnt(struct radv_shader_context *ctx)
+static LLVMValueRef
+ngg_get_vtx_cnt(struct radv_shader_context *ctx)
 {
-	return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
-			    LLVMConstInt(ctx->ac.i32, 12, false),
-			    LLVMConstInt(ctx->ac.i32, 9, false),
-			    false);
+   return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
+                       LLVMConstInt(ctx->ac.i32, 12, false), LLVMConstInt(ctx->ac.i32, 9, false),
+                       false);
 }
 
-static LLVMValueRef ngg_get_prim_cnt(struct radv_shader_context *ctx)
+static LLVMValueRef
+ngg_get_prim_cnt(struct radv_shader_context *ctx)
 {
-	return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
-			    LLVMConstInt(ctx->ac.i32, 22, false),
-			    LLVMConstInt(ctx->ac.i32, 9, false),
-			    false);
+   return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
+                       LLVMConstInt(ctx->ac.i32, 22, false), LLVMConstInt(ctx->ac.i32, 9, false),
+                       false);
 }
 
-static LLVMValueRef ngg_get_ordered_id(struct radv_shader_context *ctx)
+static LLVMValueRef
+ngg_get_ordered_id(struct radv_shader_context *ctx)
 {
-	return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
-			    ctx->ac.i32_0,
-			    LLVMConstInt(ctx->ac.i32, 12, false),
-			    false);
+   return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info), ctx->ac.i32_0,
+                       LLVMConstInt(ctx->ac.i32, 12, false), false);
 }
 
 static LLVMValueRef
 ngg_gs_get_vertex_storage(struct radv_shader_context *ctx)
 {
-	unsigned num_outputs = util_bitcount64(ctx->output_mask);
+   unsigned num_outputs = util_bitcount64(ctx->output_mask);
 
-	if (ctx->args->options->key.has_multiview_view_index)
-		num_outputs++;
+   if (ctx->args->options->key.has_multiview_view_index)
+      num_outputs++;
 
-	LLVMTypeRef elements[2] = {
-		LLVMArrayType(ctx->ac.i32, 4 * num_outputs),
-		LLVMArrayType(ctx->ac.i8, 4),
-	};
-	LLVMTypeRef type = LLVMStructTypeInContext(ctx->ac.context, elements, 2, false);
-	type = LLVMPointerType(LLVMArrayType(type, 0), AC_ADDR_SPACE_LDS);
-	return LLVMBuildBitCast(ctx->ac.builder, ctx->gs_ngg_emit, type, "");
+   LLVMTypeRef elements[2] = {
+      LLVMArrayType(ctx->ac.i32, 4 * num_outputs),
+      LLVMArrayType(ctx->ac.i8, 4),
+   };
+   LLVMTypeRef type = LLVMStructTypeInContext(ctx->ac.context, elements, 2, false);
+   type = LLVMPointerType(LLVMArrayType(type, 0), AC_ADDR_SPACE_LDS);
+   return LLVMBuildBitCast(ctx->ac.builder, ctx->gs_ngg_emit, type, "");
 }
 
 /**
@@ -1651,177 +1522,167 @@ ngg_gs_get_vertex_storage(struct radv_shader_context *ctx)
 static LLVMValueRef
 ngg_gs_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef vertexidx)
 {
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef storage = ngg_gs_get_vertex_storage(ctx);
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef storage = ngg_gs_get_vertex_storage(ctx);
 
-	/* gs_max_out_vertices = 2^(write_stride_2exp) * some odd number */
-	unsigned write_stride_2exp = ffs(MAX2(ctx->shader->info.gs.vertices_out, 1)) - 1;
-	if (write_stride_2exp) {
-		LLVMValueRef row =
-			LLVMBuildLShr(builder, vertexidx,
-				      LLVMConstInt(ctx->ac.i32, 5, false), "");
-		LLVMValueRef swizzle =
-			LLVMBuildAnd(builder, row,
-				     LLVMConstInt(ctx->ac.i32, (1u << write_stride_2exp) - 1,
-						  false), "");
-		vertexidx = LLVMBuildXor(builder, vertexidx, swizzle, "");
-	}
+   /* gs_max_out_vertices = 2^(write_stride_2exp) * some odd number */
+   unsigned write_stride_2exp = ffs(MAX2(ctx->shader->info.gs.vertices_out, 1)) - 1;
+   if (write_stride_2exp) {
+      LLVMValueRef row = LLVMBuildLShr(builder, vertexidx, LLVMConstInt(ctx->ac.i32, 5, false), "");
+      LLVMValueRef swizzle = LLVMBuildAnd(
+         builder, row, LLVMConstInt(ctx->ac.i32, (1u << write_stride_2exp) - 1, false), "");
+      vertexidx = LLVMBuildXor(builder, vertexidx, swizzle, "");
+   }
 
-	return ac_build_gep0(&ctx->ac, storage, vertexidx);
+   return ac_build_gep0(&ctx->ac, storage, vertexidx);
 }
 
 static LLVMValueRef
-ngg_gs_emit_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef gsthread,
-		       LLVMValueRef emitidx)
+ngg_gs_emit_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef gsthread, LLVMValueRef emitidx)
 {
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef tmp;
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef tmp;
 
-	tmp = LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false);
-	tmp = LLVMBuildMul(builder, tmp, gsthread, "");
-	const LLVMValueRef vertexidx = LLVMBuildAdd(builder, tmp, emitidx, "");
-	return ngg_gs_vertex_ptr(ctx, vertexidx);
+   tmp = LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false);
+   tmp = LLVMBuildMul(builder, tmp, gsthread, "");
+   const LLVMValueRef vertexidx = LLVMBuildAdd(builder, tmp, emitidx, "");
+   return ngg_gs_vertex_ptr(ctx, vertexidx);
 }
 
 static LLVMValueRef
 ngg_gs_get_emit_output_ptr(struct radv_shader_context *ctx, LLVMValueRef vertexptr,
-			   unsigned out_idx)
+                           unsigned out_idx)
 {
-	LLVMValueRef gep_idx[3] = {
-		ctx->ac.i32_0, /* implied C-style array */
-		ctx->ac.i32_0, /* first struct entry */
-		LLVMConstInt(ctx->ac.i32, out_idx, false),
-	};
-	return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
+   LLVMValueRef gep_idx[3] = {
+      ctx->ac.i32_0, /* implied C-style array */
+      ctx->ac.i32_0, /* first struct entry */
+      LLVMConstInt(ctx->ac.i32, out_idx, false),
+   };
+   return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
 }
 
 static LLVMValueRef
 ngg_gs_get_emit_primflag_ptr(struct radv_shader_context *ctx, LLVMValueRef vertexptr,
-			     unsigned stream)
+                             unsigned stream)
 {
-	LLVMValueRef gep_idx[3] = {
-		ctx->ac.i32_0, /* implied C-style array */
-		ctx->ac.i32_1, /* second struct entry */
-		LLVMConstInt(ctx->ac.i32, stream, false),
-	};
-	return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
+   LLVMValueRef gep_idx[3] = {
+      ctx->ac.i32_0, /* implied C-style array */
+      ctx->ac.i32_1, /* second struct entry */
+      LLVMConstInt(ctx->ac.i32, stream, false),
+   };
+   return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
 }
 
 static struct radv_stream_output *
 radv_get_stream_output_by_loc(struct radv_streamout_info *so, unsigned location)
 {
-	for (unsigned i = 0; i < so->num_outputs; ++i) {
-		if (so->outputs[i].location == location)
-			return &so->outputs[i];
-	}
+   for (unsigned i = 0; i < so->num_outputs; ++i) {
+      if (so->outputs[i].location == location)
+         return &so->outputs[i];
+   }
 
-	return NULL;
+   return NULL;
 }
 
-static void build_streamout_vertex(struct radv_shader_context *ctx,
-				   LLVMValueRef *so_buffer, LLVMValueRef *wg_offset_dw,
-				   unsigned stream, LLVMValueRef offset_vtx,
-				   LLVMValueRef vertexptr)
+static void
+build_streamout_vertex(struct radv_shader_context *ctx, LLVMValueRef *so_buffer,
+                       LLVMValueRef *wg_offset_dw, unsigned stream, LLVMValueRef offset_vtx,
+                       LLVMValueRef vertexptr)
 {
-	struct radv_streamout_info *so = &ctx->args->shader_info->so;
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef offset[4] = {0};
-	LLVMValueRef tmp;
+   struct radv_streamout_info *so = &ctx->args->shader_info->so;
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef offset[4] = {0};
+   LLVMValueRef tmp;
 
-	for (unsigned buffer = 0; buffer < 4; ++buffer) {
-		if (!wg_offset_dw[buffer])
-			continue;
+   for (unsigned buffer = 0; buffer < 4; ++buffer) {
+      if (!wg_offset_dw[buffer])
+         continue;
 
-		tmp = LLVMBuildMul(builder, offset_vtx,
-				   LLVMConstInt(ctx->ac.i32, so->strides[buffer], false), "");
-		tmp = LLVMBuildAdd(builder, wg_offset_dw[buffer], tmp, "");
-		offset[buffer] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 2, false), "");
-	}
+      tmp = LLVMBuildMul(builder, offset_vtx, LLVMConstInt(ctx->ac.i32, so->strides[buffer], false),
+                         "");
+      tmp = LLVMBuildAdd(builder, wg_offset_dw[buffer], tmp, "");
+      offset[buffer] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 2, false), "");
+   }
 
-	if (ctx->stage == MESA_SHADER_GEOMETRY) {
-		struct radv_shader_output_values outputs[AC_LLVM_MAX_OUTPUTS];
-		unsigned noutput = 0;
-		unsigned out_idx = 0;
+   if (ctx->stage == MESA_SHADER_GEOMETRY) {
+      struct radv_shader_output_values outputs[AC_LLVM_MAX_OUTPUTS];
+      unsigned noutput = 0;
+      unsigned out_idx = 0;
 
-		for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
-			unsigned output_usage_mask =
-				ctx->args->shader_info->gs.output_usage_mask[i];
-			uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
+      for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+         unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+         uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
 
-			if (!(ctx->output_mask & (1ull << i)) ||
-			    output_stream != stream)
-				continue;
+         if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
+            continue;
 
-			outputs[noutput].slot_name = i;
-			outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
-			outputs[noutput].usage_mask = output_usage_mask;
+         outputs[noutput].slot_name = i;
+         outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
+         outputs[noutput].usage_mask = output_usage_mask;
 
-			int length = util_last_bit(output_usage_mask);
+         int length = util_last_bit(output_usage_mask);
 
-			for (unsigned j = 0; j < length; j++, out_idx++) {
-				if (!(output_usage_mask & (1 << j)))
-					continue;
+         for (unsigned j = 0; j < length; j++, out_idx++) {
+            if (!(output_usage_mask & (1 << j)))
+               continue;
 
-				tmp = ac_build_gep0(&ctx->ac, vertexptr,
-						    LLVMConstInt(ctx->ac.i32, out_idx, false));
-				outputs[noutput].values[j] = LLVMBuildLoad(builder, tmp, "");
-			}
+            tmp = ac_build_gep0(&ctx->ac, vertexptr, LLVMConstInt(ctx->ac.i32, out_idx, false));
+            outputs[noutput].values[j] = LLVMBuildLoad(builder, tmp, "");
+         }
 
-			for (unsigned j = length; j < 4; j++)
-				outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32);
+         for (unsigned j = length; j < 4; j++)
+            outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32);
 
-			noutput++;
-		}
+         noutput++;
+      }
 
-		for (unsigned i = 0; i < noutput; i++) {
-			struct radv_stream_output *output =
-				radv_get_stream_output_by_loc(so, outputs[i].slot_name);
+      for (unsigned i = 0; i < noutput; i++) {
+         struct radv_stream_output *output =
+            radv_get_stream_output_by_loc(so, outputs[i].slot_name);
 
-			if (!output ||
-			    output->stream != stream)
-				continue;
+         if (!output || output->stream != stream)
+            continue;
 
-			struct radv_shader_output_values out = {0};
+         struct radv_shader_output_values out = {0};
 
-			for (unsigned j = 0; j < 4; j++) {
-				out.values[j] = outputs[i].values[j];
-			}
+         for (unsigned j = 0; j < 4; j++) {
+            out.values[j] = outputs[i].values[j];
+         }
 
-			radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
-		}
-	} else {
-		for (unsigned i = 0; i < so->num_outputs; ++i) {
-			struct radv_stream_output *output =
-				&ctx->args->shader_info->so.outputs[i];
+         radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
+      }
+   } else {
+      for (unsigned i = 0; i < so->num_outputs; ++i) {
+         struct radv_stream_output *output = &ctx->args->shader_info->so.outputs[i];
 
-			if (stream != output->stream)
-				continue;
+         if (stream != output->stream)
+            continue;
 
-			struct radv_shader_output_values out = {0};
+         struct radv_shader_output_values out = {0};
 
-			for (unsigned comp = 0; comp < 4; comp++) {
-				if (!(output->component_mask & (1 << comp)))
-					continue;
+         for (unsigned comp = 0; comp < 4; comp++) {
+            if (!(output->component_mask & (1 << comp)))
+               continue;
 
-				tmp = ac_build_gep0(&ctx->ac, vertexptr,
-						    LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
-				out.values[comp] = LLVMBuildLoad(builder, tmp, "");
-			}
+            tmp =
+               ac_build_gep0(&ctx->ac, vertexptr, LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
+            out.values[comp] = LLVMBuildLoad(builder, tmp, "");
+         }
 
-			radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
-		}
-	}
+         radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
+      }
+   }
 }
 
 struct ngg_streamout {
-	LLVMValueRef num_vertices;
+   LLVMValueRef num_vertices;
 
-	/* per-thread data */
-	LLVMValueRef prim_enable[4]; /* i1 per stream */
-	LLVMValueRef vertices[3]; /* [N x i32] addrspace(LDS)* */
+   /* per-thread data */
+   LLVMValueRef prim_enable[4]; /* i1 per stream */
+   LLVMValueRef vertices[3];    /* [N x i32] addrspace(LDS)* */
 
-	/* Output */
-	LLVMValueRef emit[4]; /* per-stream emitted primitives (only valid for used streams) */
+   /* Output */
+   LLVMValueRef emit[4]; /* per-stream emitted primitives (only valid for used streams) */
 };
 
 /**
@@ -1833,1323 +1694,1228 @@ struct ngg_streamout {
  *
  * Clobbers gs_ngg_scratch[8:].
  */
-static void build_streamout(struct radv_shader_context *ctx,
-			    struct ngg_streamout *nggso)
-{
-	struct radv_streamout_info *so = &ctx->args->shader_info->so;
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->args->streamout_buffers);
-	LLVMValueRef tid = get_thread_id_in_tg(ctx);
-	LLVMValueRef cond, tmp, tmp2;
-	LLVMValueRef i32_2 = LLVMConstInt(ctx->ac.i32, 2, false);
-	LLVMValueRef i32_4 = LLVMConstInt(ctx->ac.i32, 4, false);
-	LLVMValueRef i32_8 = LLVMConstInt(ctx->ac.i32, 8, false);
-	LLVMValueRef so_buffer[4] = {0};
-	unsigned max_num_vertices = 1 + (nggso->vertices[1] ? 1 : 0) +
-					(nggso->vertices[2] ? 1 : 0);
-	LLVMValueRef prim_stride_dw[4] = {0};
-	LLVMValueRef prim_stride_dw_vgpr = LLVMGetUndef(ctx->ac.i32);
-	int stream_for_buffer[4] = { -1, -1, -1, -1 };
-	unsigned bufmask_for_stream[4] = {0};
-	bool isgs = ctx->stage == MESA_SHADER_GEOMETRY;
-	unsigned scratch_emit_base = isgs ? 4 : 0;
-	LLVMValueRef scratch_emit_basev = isgs ? i32_4 : ctx->ac.i32_0;
-	unsigned scratch_offset_base = isgs ? 8 : 4;
-	LLVMValueRef scratch_offset_basev = isgs ? i32_8 : i32_4;
-
-	ac_llvm_add_target_dep_function_attr(ctx->main_function,
-					     "amdgpu-gds-size", 256);
-
-	/* Determine the mapping of streamout buffers to vertex streams. */
-	for (unsigned i = 0; i < so->num_outputs; ++i) {
-		unsigned buf = so->outputs[i].buffer;
-		unsigned stream = so->outputs[i].stream;
-		assert(stream_for_buffer[buf] < 0 || stream_for_buffer[buf] == stream);
-		stream_for_buffer[buf] = stream;
-		bufmask_for_stream[stream] |= 1 << buf;
-	}
-
-	for (unsigned buffer = 0; buffer < 4; ++buffer) {
-		if (stream_for_buffer[buffer] == -1)
-			continue;
-
-		assert(so->strides[buffer]);
-
-		LLVMValueRef stride_for_buffer =
-			LLVMConstInt(ctx->ac.i32, so->strides[buffer], false);
-		prim_stride_dw[buffer] =
-			LLVMBuildMul(builder, stride_for_buffer,
-				     nggso->num_vertices, "");
-		prim_stride_dw_vgpr = ac_build_writelane(
-			&ctx->ac, prim_stride_dw_vgpr, prim_stride_dw[buffer],
-			LLVMConstInt(ctx->ac.i32, buffer, false));
-
-		LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, buffer, false);
-		so_buffer[buffer] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr,
-							  offset);
-	}
-
-	cond = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
-	ac_build_ifcc(&ctx->ac, cond, 5200);
-	{
-		LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS);
-		LLVMValueRef gdsbase = LLVMBuildIntToPtr(builder, ctx->ac.i32_0, gdsptr, "");
-
-		/* Advance the streamout offsets in GDS. */
-		LLVMValueRef offsets_vgpr = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
-		LLVMValueRef generated_by_stream_vgpr = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
-
-		cond = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), i32_4, "");
-		ac_build_ifcc(&ctx->ac, cond, 5210);
-		{
-			/* Fetch the number of generated primitives and store
-			 * it in GDS for later use.
-			 */
-			if (isgs) {
-				tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid);
-				tmp = LLVMBuildLoad(builder, tmp, "");
-			} else {
-				tmp = ac_build_writelane(&ctx->ac, ctx->ac.i32_0,
-						ngg_get_prim_cnt(ctx), ctx->ac.i32_0);
-			}
-			LLVMBuildStore(builder, tmp, generated_by_stream_vgpr);
-
-			unsigned swizzle[4];
-			int unused_stream = -1;
-			for (unsigned stream = 0; stream < 4; ++stream) {
-				if (!ctx->args->shader_info->gs.num_stream_output_components[stream]) {
-					unused_stream = stream;
-					break;
-				}
-			}
-			for (unsigned buffer = 0; buffer < 4; ++buffer) {
-				if (stream_for_buffer[buffer] >= 0) {
-					swizzle[buffer] = stream_for_buffer[buffer];
-				} else {
-					assert(unused_stream >= 0);
-					swizzle[buffer] = unused_stream;
-				}
-			}
-
-			tmp = ac_build_quad_swizzle(&ctx->ac, tmp,
-				swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
-			tmp = LLVMBuildMul(builder, tmp, prim_stride_dw_vgpr, "");
-
-			LLVMValueRef args[] = {
-				LLVMBuildIntToPtr(builder, ngg_get_ordered_id(ctx), gdsptr, ""),
-				tmp,
-				ctx->ac.i32_0, // ordering
-				ctx->ac.i32_0, // scope
-				ctx->ac.i1false, // isVolatile
-				LLVMConstInt(ctx->ac.i32, 4 << 24, false), // OA index
-				ctx->ac.i1true, // wave release
-				ctx->ac.i1true, // wave done
-			};
-
-			tmp = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ds.ordered.add",
-						 ctx->ac.i32, args, ARRAY_SIZE(args), 0);
-
-			/* Keep offsets in a VGPR for quick retrieval via readlane by
-			 * the first wave for bounds checking, and also store in LDS
-			 * for retrieval by all waves later. */
-			LLVMBuildStore(builder, tmp, offsets_vgpr);
-
-			tmp2 = LLVMBuildAdd(builder, ac_get_thread_id(&ctx->ac),
-					    scratch_offset_basev, "");
-			tmp2 = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tmp2);
-			LLVMBuildStore(builder, tmp, tmp2);
-		}
-		ac_build_endif(&ctx->ac, 5210);
-
-		/* Determine the max emit per buffer. This is done via the SALU, in part
-		 * because LLVM can't generate divide-by-multiply if we try to do this
-		 * via VALU with one lane per buffer.
-		 */
-		LLVMValueRef max_emit[4] = {0};
-		for (unsigned buffer = 0; buffer < 4; ++buffer) {
-			if (stream_for_buffer[buffer] == -1)
-				continue;
-
-			/* Compute the streamout buffer size in DWORD. */
-			LLVMValueRef bufsize_dw =
-				LLVMBuildLShr(builder,
-					LLVMBuildExtractElement(builder, so_buffer[buffer], i32_2, ""),
-					i32_2, "");
-
-			/* Load the streamout buffer offset from GDS. */
-			tmp = LLVMBuildLoad(builder, offsets_vgpr, "");
-			LLVMValueRef offset_dw =
-				ac_build_readlane(&ctx->ac, tmp,
-						LLVMConstInt(ctx->ac.i32, buffer, false));
-
-			/* Compute the remaining size to emit. */
-			LLVMValueRef remaining_dw =
-				LLVMBuildSub(builder, bufsize_dw, offset_dw, "");
-			tmp = LLVMBuildUDiv(builder, remaining_dw,
-					    prim_stride_dw[buffer], "");
-
-			cond = LLVMBuildICmp(builder, LLVMIntULT,
-					     bufsize_dw, offset_dw, "");
-			max_emit[buffer] = LLVMBuildSelect(builder, cond,
-							   ctx->ac.i32_0, tmp, "");
-		}
-
-		/* Determine the number of emitted primitives per stream and fixup the
-		 * GDS counter if necessary.
-		 *
-		 * This is complicated by the fact that a single stream can emit to
-		 * multiple buffers (but luckily not vice versa).
-		 */
-		LLVMValueRef emit_vgpr = ctx->ac.i32_0;
-
-		for (unsigned stream = 0; stream < 4; ++stream) {
-			if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
-				continue;
-
-			/* Load the number of generated primitives from GDS and
-			 * determine that number for the given stream.
-			 */
-			tmp = LLVMBuildLoad(builder, generated_by_stream_vgpr, "");
-			LLVMValueRef generated =
-				ac_build_readlane(&ctx->ac, tmp,
-						  LLVMConstInt(ctx->ac.i32, stream, false));
-
-
-			/* Compute the number of emitted primitives. */
-			LLVMValueRef emit = generated;
-			for (unsigned buffer = 0; buffer < 4; ++buffer) {
-				if (stream_for_buffer[buffer] == stream)
-					emit = ac_build_umin(&ctx->ac, emit, max_emit[buffer]);
-			}
-
-			/* Store the number of emitted primitives for that
-			 * stream.
-			 */
-			emit_vgpr = ac_build_writelane(&ctx->ac, emit_vgpr, emit,
-						       LLVMConstInt(ctx->ac.i32, stream, false));
-
-			/* Fixup the offset using a plain GDS atomic if we overflowed. */
-			cond = LLVMBuildICmp(builder, LLVMIntULT, emit, generated, "");
-			ac_build_ifcc(&ctx->ac, cond, 5221); /* scalar branch */
-			tmp = LLVMBuildLShr(builder,
-					    LLVMConstInt(ctx->ac.i32, bufmask_for_stream[stream], false),
-					    ac_get_thread_id(&ctx->ac), "");
-			tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
-			ac_build_ifcc(&ctx->ac, tmp, 5222);
-			{
-				tmp = LLVMBuildSub(builder, generated, emit, "");
-				tmp = LLVMBuildMul(builder, tmp, prim_stride_dw_vgpr, "");
-				tmp2 = LLVMBuildGEP(builder, gdsbase, &tid, 1, "");
-				LLVMBuildAtomicRMW(builder, LLVMAtomicRMWBinOpSub, tmp2, tmp,
-						   LLVMAtomicOrderingMonotonic, false);
-			}
-			ac_build_endif(&ctx->ac, 5222);
-			ac_build_endif(&ctx->ac, 5221);
-		}
-
-		/* Store the number of emitted primitives to LDS for later use. */
-		cond = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), i32_4, "");
-		ac_build_ifcc(&ctx->ac, cond, 5225);
-		{
-			tmp = LLVMBuildAdd(builder, ac_get_thread_id(&ctx->ac),
-					   scratch_emit_basev, "");
-			tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tmp);
-			LLVMBuildStore(builder, emit_vgpr, tmp);
-		}
-		ac_build_endif(&ctx->ac, 5225);
-	}
-	ac_build_endif(&ctx->ac, 5200);
-
-	/* Determine the workgroup-relative per-thread / primitive offset into
-	 * the streamout buffers */
-	struct ac_wg_scan primemit_scan[4] = {0};
-
-	if (isgs) {
-		for (unsigned stream = 0; stream < 4; ++stream) {
-			if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
-				continue;
-
-			primemit_scan[stream].enable_exclusive = true;
-			primemit_scan[stream].op = nir_op_iadd;
-			primemit_scan[stream].src = nggso->prim_enable[stream];
-			primemit_scan[stream].scratch =
-				ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch,
-					LLVMConstInt(ctx->ac.i32, 12 + 8 * stream, false));
-			primemit_scan[stream].waveidx = get_wave_id_in_tg(ctx);
-			primemit_scan[stream].numwaves = get_tgsize(ctx);
-			primemit_scan[stream].maxwaves = 8;
-			ac_build_wg_scan_top(&ctx->ac, &primemit_scan[stream]);
-		}
-	}
-
-	ac_build_s_barrier(&ctx->ac);
-
-	/* Fetch the per-buffer offsets and per-stream emit counts in all waves. */
-	LLVMValueRef wgoffset_dw[4] = {0};
-
-	{
-		LLVMValueRef scratch_vgpr;
-
-		tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, ac_get_thread_id(&ctx->ac));
-		scratch_vgpr = LLVMBuildLoad(builder, tmp, "");
-
-		for (unsigned buffer = 0; buffer < 4; ++buffer) {
-			if (stream_for_buffer[buffer] >= 0) {
-				wgoffset_dw[buffer] = ac_build_readlane(
-					&ctx->ac, scratch_vgpr,
-					LLVMConstInt(ctx->ac.i32, scratch_offset_base + buffer, false));
-			}
-		}
-
-		for (unsigned stream = 0; stream < 4; ++stream) {
-			if (ctx->args->shader_info->gs.num_stream_output_components[stream]) {
-				nggso->emit[stream] = ac_build_readlane(
-					&ctx->ac, scratch_vgpr,
-					LLVMConstInt(ctx->ac.i32, scratch_emit_base + stream, false));
-			}
-		}
-	}
-
-	/* Write out primitive data */
-	for (unsigned stream = 0; stream < 4; ++stream) {
-		if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
-			continue;
-
-		if (isgs) {
-			ac_build_wg_scan_bottom(&ctx->ac, &primemit_scan[stream]);
-		} else {
-			primemit_scan[stream].result_exclusive = tid;
-		}
-
-		cond = LLVMBuildICmp(builder, LLVMIntULT,
-				    primemit_scan[stream].result_exclusive,
-				    nggso->emit[stream], "");
-		cond = LLVMBuildAnd(builder, cond, nggso->prim_enable[stream], "");
-		ac_build_ifcc(&ctx->ac, cond, 5240);
-		{
-			LLVMValueRef offset_vtx =
-				LLVMBuildMul(builder, primemit_scan[stream].result_exclusive,
-					     nggso->num_vertices, "");
-
-			for (unsigned i = 0; i < max_num_vertices; ++i) {
-				cond = LLVMBuildICmp(builder, LLVMIntULT,
-						    LLVMConstInt(ctx->ac.i32, i, false),
-						    nggso->num_vertices, "");
-				ac_build_ifcc(&ctx->ac, cond, 5241);
-				build_streamout_vertex(ctx, so_buffer, wgoffset_dw,
-						       stream, offset_vtx, nggso->vertices[i]);
-				ac_build_endif(&ctx->ac, 5241);
-				offset_vtx = LLVMBuildAdd(builder, offset_vtx, ctx->ac.i32_1, "");
-			}
-		}
-		ac_build_endif(&ctx->ac, 5240);
-	}
-}
-
-static unsigned ngg_nogs_vertex_size(struct radv_shader_context *ctx)
-{
-	unsigned lds_vertex_size = 0;
-
-	if (ctx->args->shader_info->so.num_outputs)
-		lds_vertex_size = 4 * ctx->args->shader_info->so.num_outputs + 1;
-
-	return lds_vertex_size;
+static void
+build_streamout(struct radv_shader_context *ctx, struct ngg_streamout *nggso)
+{
+   struct radv_streamout_info *so = &ctx->args->shader_info->so;
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->args->streamout_buffers);
+   LLVMValueRef tid = get_thread_id_in_tg(ctx);
+   LLVMValueRef cond, tmp, tmp2;
+   LLVMValueRef i32_2 = LLVMConstInt(ctx->ac.i32, 2, false);
+   LLVMValueRef i32_4 = LLVMConstInt(ctx->ac.i32, 4, false);
+   LLVMValueRef i32_8 = LLVMConstInt(ctx->ac.i32, 8, false);
+   LLVMValueRef so_buffer[4] = {0};
+   unsigned max_num_vertices = 1 + (nggso->vertices[1] ? 1 : 0) + (nggso->vertices[2] ? 1 : 0);
+   LLVMValueRef prim_stride_dw[4] = {0};
+   LLVMValueRef prim_stride_dw_vgpr = LLVMGetUndef(ctx->ac.i32);
+   int stream_for_buffer[4] = {-1, -1, -1, -1};
+   unsigned bufmask_for_stream[4] = {0};
+   bool isgs = ctx->stage == MESA_SHADER_GEOMETRY;
+   unsigned scratch_emit_base = isgs ? 4 : 0;
+   LLVMValueRef scratch_emit_basev = isgs ? i32_4 : ctx->ac.i32_0;
+   unsigned scratch_offset_base = isgs ? 8 : 4;
+   LLVMValueRef scratch_offset_basev = isgs ? i32_8 : i32_4;
+
+   ac_llvm_add_target_dep_function_attr(ctx->main_function, "amdgpu-gds-size", 256);
+
+   /* Determine the mapping of streamout buffers to vertex streams. */
+   for (unsigned i = 0; i < so->num_outputs; ++i) {
+      unsigned buf = so->outputs[i].buffer;
+      unsigned stream = so->outputs[i].stream;
+      assert(stream_for_buffer[buf] < 0 || stream_for_buffer[buf] == stream);
+      stream_for_buffer[buf] = stream;
+      bufmask_for_stream[stream] |= 1 << buf;
+   }
+
+   for (unsigned buffer = 0; buffer < 4; ++buffer) {
+      if (stream_for_buffer[buffer] == -1)
+         continue;
+
+      assert(so->strides[buffer]);
+
+      LLVMValueRef stride_for_buffer = LLVMConstInt(ctx->ac.i32, so->strides[buffer], false);
+      prim_stride_dw[buffer] = LLVMBuildMul(builder, stride_for_buffer, nggso->num_vertices, "");
+      prim_stride_dw_vgpr =
+         ac_build_writelane(&ctx->ac, prim_stride_dw_vgpr, prim_stride_dw[buffer],
+                            LLVMConstInt(ctx->ac.i32, buffer, false));
+
+      LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, buffer, false);
+      so_buffer[buffer] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
+   }
+
+   cond = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
+   ac_build_ifcc(&ctx->ac, cond, 5200);
+   {
+      LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS);
+      LLVMValueRef gdsbase = LLVMBuildIntToPtr(builder, ctx->ac.i32_0, gdsptr, "");
+
+      /* Advance the streamout offsets in GDS. */
+      LLVMValueRef offsets_vgpr = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
+      LLVMValueRef generated_by_stream_vgpr = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
+
+      cond = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), i32_4, "");
+      ac_build_ifcc(&ctx->ac, cond, 5210);
+      {
+         /* Fetch the number of generated primitives and store
+          * it in GDS for later use.
+          */
+         if (isgs) {
+            tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid);
+            tmp = LLVMBuildLoad(builder, tmp, "");
+         } else {
+            tmp = ac_build_writelane(&ctx->ac, ctx->ac.i32_0, ngg_get_prim_cnt(ctx), ctx->ac.i32_0);
+         }
+         LLVMBuildStore(builder, tmp, generated_by_stream_vgpr);
+
+         unsigned swizzle[4];
+         int unused_stream = -1;
+         for (unsigned stream = 0; stream < 4; ++stream) {
+            if (!ctx->args->shader_info->gs.num_stream_output_components[stream]) {
+               unused_stream = stream;
+               break;
+            }
+         }
+         for (unsigned buffer = 0; buffer < 4; ++buffer) {
+            if (stream_for_buffer[buffer] >= 0) {
+               swizzle[buffer] = stream_for_buffer[buffer];
+            } else {
+               assert(unused_stream >= 0);
+               swizzle[buffer] = unused_stream;
+            }
+         }
+
+         tmp = ac_build_quad_swizzle(&ctx->ac, tmp, swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+         tmp = LLVMBuildMul(builder, tmp, prim_stride_dw_vgpr, "");
+
+         LLVMValueRef args[] = {
+            LLVMBuildIntToPtr(builder, ngg_get_ordered_id(ctx), gdsptr, ""),
+            tmp,
+            ctx->ac.i32_0,                             // ordering
+            ctx->ac.i32_0,                             // scope
+            ctx->ac.i1false,                           // isVolatile
+            LLVMConstInt(ctx->ac.i32, 4 << 24, false), // OA index
+            ctx->ac.i1true,                            // wave release
+            ctx->ac.i1true,                            // wave done
+         };
+
+         tmp = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ds.ordered.add", ctx->ac.i32, args,
+                                  ARRAY_SIZE(args), 0);
+
+         /* Keep offsets in a VGPR for quick retrieval via readlane by
+          * the first wave for bounds checking, and also store in LDS
+          * for retrieval by all waves later. */
+         LLVMBuildStore(builder, tmp, offsets_vgpr);
+
+         tmp2 = LLVMBuildAdd(builder, ac_get_thread_id(&ctx->ac), scratch_offset_basev, "");
+         tmp2 = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tmp2);
+         LLVMBuildStore(builder, tmp, tmp2);
+      }
+      ac_build_endif(&ctx->ac, 5210);
+
+      /* Determine the max emit per buffer. This is done via the SALU, in part
+       * because LLVM can't generate divide-by-multiply if we try to do this
+       * via VALU with one lane per buffer.
+       */
+      LLVMValueRef max_emit[4] = {0};
+      for (unsigned buffer = 0; buffer < 4; ++buffer) {
+         if (stream_for_buffer[buffer] == -1)
+            continue;
+
+         /* Compute the streamout buffer size in DWORD. */
+         LLVMValueRef bufsize_dw = LLVMBuildLShr(
+            builder, LLVMBuildExtractElement(builder, so_buffer[buffer], i32_2, ""), i32_2, "");
+
+         /* Load the streamout buffer offset from GDS. */
+         tmp = LLVMBuildLoad(builder, offsets_vgpr, "");
+         LLVMValueRef offset_dw =
+            ac_build_readlane(&ctx->ac, tmp, LLVMConstInt(ctx->ac.i32, buffer, false));
+
+         /* Compute the remaining size to emit. */
+         LLVMValueRef remaining_dw = LLVMBuildSub(builder, bufsize_dw, offset_dw, "");
+         tmp = LLVMBuildUDiv(builder, remaining_dw, prim_stride_dw[buffer], "");
+
+         cond = LLVMBuildICmp(builder, LLVMIntULT, bufsize_dw, offset_dw, "");
+         max_emit[buffer] = LLVMBuildSelect(builder, cond, ctx->ac.i32_0, tmp, "");
+      }
+
+      /* Determine the number of emitted primitives per stream and fixup the
+       * GDS counter if necessary.
+       *
+       * This is complicated by the fact that a single stream can emit to
+       * multiple buffers (but luckily not vice versa).
+       */
+      LLVMValueRef emit_vgpr = ctx->ac.i32_0;
+
+      for (unsigned stream = 0; stream < 4; ++stream) {
+         if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
+            continue;
+
+         /* Load the number of generated primitives from GDS and
+          * determine that number for the given stream.
+          */
+         tmp = LLVMBuildLoad(builder, generated_by_stream_vgpr, "");
+         LLVMValueRef generated =
+            ac_build_readlane(&ctx->ac, tmp, LLVMConstInt(ctx->ac.i32, stream, false));
+
+         /* Compute the number of emitted primitives. */
+         LLVMValueRef emit = generated;
+         for (unsigned buffer = 0; buffer < 4; ++buffer) {
+            if (stream_for_buffer[buffer] == stream)
+               emit = ac_build_umin(&ctx->ac, emit, max_emit[buffer]);
+         }
+
+         /* Store the number of emitted primitives for that
+          * stream.
+          */
+         emit_vgpr =
+            ac_build_writelane(&ctx->ac, emit_vgpr, emit, LLVMConstInt(ctx->ac.i32, stream, false));
+
+         /* Fixup the offset using a plain GDS atomic if we overflowed. */
+         cond = LLVMBuildICmp(builder, LLVMIntULT, emit, generated, "");
+         ac_build_ifcc(&ctx->ac, cond, 5221); /* scalar branch */
+         tmp = LLVMBuildLShr(builder, LLVMConstInt(ctx->ac.i32, bufmask_for_stream[stream], false),
+                             ac_get_thread_id(&ctx->ac), "");
+         tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+         ac_build_ifcc(&ctx->ac, tmp, 5222);
+         {
+            tmp = LLVMBuildSub(builder, generated, emit, "");
+            tmp = LLVMBuildMul(builder, tmp, prim_stride_dw_vgpr, "");
+            tmp2 = LLVMBuildGEP(builder, gdsbase, &tid, 1, "");
+            LLVMBuildAtomicRMW(builder, LLVMAtomicRMWBinOpSub, tmp2, tmp,
+                               LLVMAtomicOrderingMonotonic, false);
+         }
+         ac_build_endif(&ctx->ac, 5222);
+         ac_build_endif(&ctx->ac, 5221);
+      }
+
+      /* Store the number of emitted primitives to LDS for later use. */
+      cond = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), i32_4, "");
+      ac_build_ifcc(&ctx->ac, cond, 5225);
+      {
+         tmp = LLVMBuildAdd(builder, ac_get_thread_id(&ctx->ac), scratch_emit_basev, "");
+         tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tmp);
+         LLVMBuildStore(builder, emit_vgpr, tmp);
+      }
+      ac_build_endif(&ctx->ac, 5225);
+   }
+   ac_build_endif(&ctx->ac, 5200);
+
+   /* Determine the workgroup-relative per-thread / primitive offset into
+    * the streamout buffers */
+   struct ac_wg_scan primemit_scan[4] = {0};
+
+   if (isgs) {
+      for (unsigned stream = 0; stream < 4; ++stream) {
+         if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
+            continue;
+
+         primemit_scan[stream].enable_exclusive = true;
+         primemit_scan[stream].op = nir_op_iadd;
+         primemit_scan[stream].src = nggso->prim_enable[stream];
+         primemit_scan[stream].scratch = ac_build_gep0(
+            &ctx->ac, ctx->gs_ngg_scratch, LLVMConstInt(ctx->ac.i32, 12 + 8 * stream, false));
+         primemit_scan[stream].waveidx = get_wave_id_in_tg(ctx);
+         primemit_scan[stream].numwaves = get_tgsize(ctx);
+         primemit_scan[stream].maxwaves = 8;
+         ac_build_wg_scan_top(&ctx->ac, &primemit_scan[stream]);
+      }
+   }
+
+   ac_build_s_barrier(&ctx->ac);
+
+   /* Fetch the per-buffer offsets and per-stream emit counts in all waves. */
+   LLVMValueRef wgoffset_dw[4] = {0};
+
+   {
+      LLVMValueRef scratch_vgpr;
+
+      tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, ac_get_thread_id(&ctx->ac));
+      scratch_vgpr = LLVMBuildLoad(builder, tmp, "");
+
+      for (unsigned buffer = 0; buffer < 4; ++buffer) {
+         if (stream_for_buffer[buffer] >= 0) {
+            wgoffset_dw[buffer] =
+               ac_build_readlane(&ctx->ac, scratch_vgpr,
+                                 LLVMConstInt(ctx->ac.i32, scratch_offset_base + buffer, false));
+         }
+      }
+
+      for (unsigned stream = 0; stream < 4; ++stream) {
+         if (ctx->args->shader_info->gs.num_stream_output_components[stream]) {
+            nggso->emit[stream] =
+               ac_build_readlane(&ctx->ac, scratch_vgpr,
+                                 LLVMConstInt(ctx->ac.i32, scratch_emit_base + stream, false));
+         }
+      }
+   }
+
+   /* Write out primitive data */
+   for (unsigned stream = 0; stream < 4; ++stream) {
+      if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
+         continue;
+
+      if (isgs) {
+         ac_build_wg_scan_bottom(&ctx->ac, &primemit_scan[stream]);
+      } else {
+         primemit_scan[stream].result_exclusive = tid;
+      }
+
+      cond = LLVMBuildICmp(builder, LLVMIntULT, primemit_scan[stream].result_exclusive,
+                           nggso->emit[stream], "");
+      cond = LLVMBuildAnd(builder, cond, nggso->prim_enable[stream], "");
+      ac_build_ifcc(&ctx->ac, cond, 5240);
+      {
+         LLVMValueRef offset_vtx =
+            LLVMBuildMul(builder, primemit_scan[stream].result_exclusive, nggso->num_vertices, "");
+
+         for (unsigned i = 0; i < max_num_vertices; ++i) {
+            cond = LLVMBuildICmp(builder, LLVMIntULT, LLVMConstInt(ctx->ac.i32, i, false),
+                                 nggso->num_vertices, "");
+            ac_build_ifcc(&ctx->ac, cond, 5241);
+            build_streamout_vertex(ctx, so_buffer, wgoffset_dw, stream, offset_vtx,
+                                   nggso->vertices[i]);
+            ac_build_endif(&ctx->ac, 5241);
+            offset_vtx = LLVMBuildAdd(builder, offset_vtx, ctx->ac.i32_1, "");
+         }
+      }
+      ac_build_endif(&ctx->ac, 5240);
+   }
+}
+
+static unsigned
+ngg_nogs_vertex_size(struct radv_shader_context *ctx)
+{
+   unsigned lds_vertex_size = 0;
+
+   if (ctx->args->shader_info->so.num_outputs)
+      lds_vertex_size = 4 * ctx->args->shader_info->so.num_outputs + 1;
+
+   return lds_vertex_size;
 }
 
 /**
  * Returns an `[N x i32] addrspace(LDS)*` pointing at contiguous LDS storage
  * for the vertex outputs.
  */
-static LLVMValueRef ngg_nogs_vertex_ptr(struct radv_shader_context *ctx,
-					LLVMValueRef vtxid)
+static LLVMValueRef
+ngg_nogs_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef vtxid)
 {
-	/* The extra dword is used to avoid LDS bank conflicts. */
-	unsigned vertex_size = ngg_nogs_vertex_size(ctx);
-	LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, vertex_size);
-	LLVMTypeRef pai32 = LLVMPointerType(ai32, AC_ADDR_SPACE_LDS);
-	LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, ctx->esgs_ring, pai32, "");
-	return LLVMBuildGEP(ctx->ac.builder, tmp, &vtxid, 1, "");
+   /* The extra dword is used to avoid LDS bank conflicts. */
+   unsigned vertex_size = ngg_nogs_vertex_size(ctx);
+   LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, vertex_size);
+   LLVMTypeRef pai32 = LLVMPointerType(ai32, AC_ADDR_SPACE_LDS);
+   LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, ctx->esgs_ring, pai32, "");
+   return LLVMBuildGEP(ctx->ac.builder, tmp, &vtxid, 1, "");
 }
 
 static void
 handle_ngg_outputs_post_1(struct radv_shader_context *ctx)
 {
-	struct radv_streamout_info *so = &ctx->args->shader_info->so;
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef vertex_ptr = NULL;
-	LLVMValueRef tmp, tmp2;
+   struct radv_streamout_info *so = &ctx->args->shader_info->so;
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef vertex_ptr = NULL;
+   LLVMValueRef tmp, tmp2;
 
-	assert((ctx->stage == MESA_SHADER_VERTEX ||
-	        ctx->stage == MESA_SHADER_TESS_EVAL) && !ctx->args->is_gs_copy_shader);
+   assert((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) &&
+          !ctx->args->is_gs_copy_shader);
 
-	if (!ctx->args->shader_info->so.num_outputs)
-		return;
+   if (!ctx->args->shader_info->so.num_outputs)
+      return;
 
-	vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
+   vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
 
-	for (unsigned i = 0; i < so->num_outputs; ++i) {
-		struct radv_stream_output *output =
-			&ctx->args->shader_info->so.outputs[i];
+   for (unsigned i = 0; i < so->num_outputs; ++i) {
+      struct radv_stream_output *output = &ctx->args->shader_info->so.outputs[i];
 
-		unsigned loc = output->location;
+      unsigned loc = output->location;
 
-		for (unsigned comp = 0; comp < 4; comp++) {
-			if (!(output->component_mask & (1 << comp)))
-				continue;
+      for (unsigned comp = 0; comp < 4; comp++) {
+         if (!(output->component_mask & (1 << comp)))
+            continue;
 
-			tmp = ac_build_gep0(&ctx->ac, vertex_ptr,
-					    LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
-			tmp2 = LLVMBuildLoad(builder,
-					     ctx->abi.outputs[4 * loc + comp], "");
-			tmp2 = ac_to_integer(&ctx->ac, tmp2);
-			LLVMBuildStore(builder, tmp2, tmp);
-		}
-	}
+         tmp = ac_build_gep0(&ctx->ac, vertex_ptr, LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
+         tmp2 = LLVMBuildLoad(builder, ctx->abi.outputs[4 * loc + comp], "");
+         tmp2 = ac_to_integer(&ctx->ac, tmp2);
+         LLVMBuildStore(builder, tmp2, tmp);
+      }
+   }
 }
 
 static void
 handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
 {
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef tmp;
-
-	assert((ctx->stage == MESA_SHADER_VERTEX ||
-	        ctx->stage == MESA_SHADER_TESS_EVAL) && !ctx->args->is_gs_copy_shader);
-
-	LLVMValueRef prims_in_wave = ac_unpack_param(&ctx->ac,
-						     ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8);
-	LLVMValueRef vtx_in_wave = ac_unpack_param(&ctx->ac,
-						   ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 0, 8);
-	LLVMValueRef is_gs_thread = LLVMBuildICmp(builder, LLVMIntULT,
-						  ac_get_thread_id(&ctx->ac), prims_in_wave, "");
-	LLVMValueRef is_es_thread = LLVMBuildICmp(builder, LLVMIntULT,
-						  ac_get_thread_id(&ctx->ac), vtx_in_wave, "");
-	LLVMValueRef vtxindex[] = {
-		ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 0, 16),
-		ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 16, 16),
-		ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[2]), 0, 16),
-	};
-
-	/* Determine the number of vertices per primitive. */
-	unsigned num_vertices;
-	LLVMValueRef num_vertices_val;
-
-	if (ctx->stage == MESA_SHADER_VERTEX) {
-		LLVMValueRef outprim_val =
-			LLVMConstInt(ctx->ac.i32,
-				     ctx->args->options->key.vs.outprim, false);
-		num_vertices_val = LLVMBuildAdd(builder, outprim_val,
-						ctx->ac.i32_1, "");
-		num_vertices = 3; /* TODO: optimize for points & lines */
-	} else {
-		assert(ctx->stage == MESA_SHADER_TESS_EVAL);
-
-		if (ctx->shader->info.tess.point_mode)
-			num_vertices = 1;
-		else if (ctx->shader->info.tess.primitive_mode == GL_ISOLINES)
-			num_vertices = 2;
-		else
-			num_vertices = 3;
-
-		num_vertices_val = LLVMConstInt(ctx->ac.i32, num_vertices, false);
-	}
-
-	/* Streamout */
-	if (ctx->args->shader_info->so.num_outputs) {
-		struct ngg_streamout nggso = {0};
-
-		nggso.num_vertices = num_vertices_val;
-		nggso.prim_enable[0] = is_gs_thread;
-
-		for (unsigned i = 0; i < num_vertices; ++i)
-			nggso.vertices[i] = ngg_nogs_vertex_ptr(ctx, vtxindex[i]);
-
-		build_streamout(ctx, &nggso);
-	}
-
-	/* Copy Primitive IDs from GS threads to the LDS address corresponding
-	 * to the ES thread of the provoking vertex.
-	 */
-	if (ctx->stage == MESA_SHADER_VERTEX &&
-	    ctx->args->options->key.vs_common_out.export_prim_id) {
-		if (ctx->args->shader_info->so.num_outputs)
-			ac_build_s_barrier(&ctx->ac);
-
-		ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
-		/* Extract the PROVOKING_VTX_INDEX field. */
-		LLVMValueRef provoking_vtx_in_prim =
-			LLVMConstInt(ctx->ac.i32, 0, false);
-
-		/* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */
-		LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3);
-		LLVMValueRef provoking_vtx_index =
-			LLVMBuildExtractElement(builder, indices, provoking_vtx_in_prim, "");
-
-		LLVMBuildStore(builder, ac_get_arg(&ctx->ac, ctx->args->ac.gs_prim_id),
-			       ac_build_gep0(&ctx->ac, ctx->esgs_ring, provoking_vtx_index));
-		ac_build_endif(&ctx->ac, 5400);
-	}
-
-	/* TODO: primitive culling */
-
-	ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx),
-				      ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
-
-	/* TODO: streamout queries */
-	/* Export primitive data to the index buffer.
-	 *
-	 * For the first version, we will always build up all three indices
-	 * independent of the primitive type. The additional garbage data
-	 * shouldn't hurt.
-	 *
-	 * TODO: culling depends on the primitive type, so can have some
-	 * interaction here.
-	 */
-	ac_build_ifcc(&ctx->ac, is_gs_thread, 6001);
-	{
-		struct ac_ngg_prim prim = {0};
-
-		if (ctx->args->options->key.vs_common_out.as_ngg_passthrough) {
-			prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]);
-		} else {
-			prim.num_vertices = num_vertices;
-			prim.isnull = ctx->ac.i1false;
-			memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
-
-			for (unsigned i = 0; i < num_vertices; ++i) {
-				tmp = LLVMBuildLShr(builder,
-						    ac_get_arg(&ctx->ac, ctx->args->ac.gs_invocation_id),
-						    LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
-				prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
-			}
-		}
-
-		ac_build_export_prim(&ctx->ac, &prim);
-	}
-	ac_build_endif(&ctx->ac, 6001);
-
-	/* Export per-vertex data (positions and parameters). */
-	ac_build_ifcc(&ctx->ac, is_es_thread, 6002);
-	{
-		struct radv_vs_output_info *outinfo =
-			ctx->stage == MESA_SHADER_TESS_EVAL ?
-			&ctx->args->shader_info->tes.outinfo : &ctx->args->shader_info->vs.outinfo;
-
-		/* Exporting the primitive ID is handled below. */
-		/* TODO: use the new VS export path */
-		handle_vs_outputs_post(ctx, false,
-				       ctx->args->options->key.vs_common_out.export_clip_dists,
-				       outinfo);
-
-		if (ctx->args->options->key.vs_common_out.export_prim_id) {
-			unsigned param_count = outinfo->param_exports;
-			LLVMValueRef values[4];
-
-			if (ctx->stage == MESA_SHADER_VERTEX) {
-				/* Wait for GS stores to finish. */
-				ac_build_s_barrier(&ctx->ac);
-
-				tmp = ac_build_gep0(&ctx->ac, ctx->esgs_ring,
-						    get_thread_id_in_tg(ctx));
-				values[0] = LLVMBuildLoad(builder, tmp, "");
-			} else {
-				assert(ctx->stage == MESA_SHADER_TESS_EVAL);
-				values[0] = ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id);
-			}
-
-			values[0] = ac_to_float(&ctx->ac, values[0]);
-			for (unsigned j = 1; j < 4; j++)
-				values[j] = ctx->ac.f32_0;
-
-			radv_export_param(ctx, param_count, values, 0x1);
-
-			outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++;
-			outinfo->param_exports = param_count;
-		}
-	}
-	ac_build_endif(&ctx->ac, 6002);
-}
-
-static void gfx10_ngg_gs_emit_prologue(struct radv_shader_context *ctx)
-{
-	/* Zero out the part of LDS scratch that is used to accumulate the
-	 * per-stream generated primitive count.
-	 */
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef scratchptr = ctx->gs_ngg_scratch;
-	LLVMValueRef tid = get_thread_id_in_tg(ctx);
-	LLVMBasicBlockRef merge_block;
-	LLVMValueRef cond;
-
-	LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
-	LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
-	merge_block = LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
-
-	cond = LLVMBuildICmp(builder, LLVMIntULT, tid, LLVMConstInt(ctx->ac.i32, 4, false), "");
-	LLVMBuildCondBr(ctx->ac.builder, cond, then_block, merge_block);
-	LLVMPositionBuilderAtEnd(ctx->ac.builder, then_block);
-
-	LLVMValueRef ptr = ac_build_gep0(&ctx->ac, scratchptr, tid);
-	LLVMBuildStore(builder, ctx->ac.i32_0, ptr);
-
-	LLVMBuildBr(ctx->ac.builder, merge_block);
-	LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
-
-	ac_build_s_barrier(&ctx->ac);
-}
-
-static void gfx10_ngg_gs_emit_epilogue_1(struct radv_shader_context *ctx)
-{
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef i8_0 = LLVMConstInt(ctx->ac.i8, 0, false);
-	LLVMValueRef tmp;
-
-	/* Zero out remaining (non-emitted) primitive flags.
-	 *
-	 * Note: Alternatively, we could pass the relevant gs_next_vertex to
-	 *       the emit threads via LDS. This is likely worse in the expected
-	 *       typical case where each GS thread emits the full set of
-	 *       vertices.
-	 */
-	for (unsigned stream = 0; stream < 4; ++stream) {
-		unsigned num_components;
-
-		num_components =
-			ctx->args->shader_info->gs.num_stream_output_components[stream];
-		if (!num_components)
-			continue;
-
-		const LLVMValueRef gsthread = get_thread_id_in_tg(ctx);
-
-		ac_build_bgnloop(&ctx->ac, 5100);
-
-		const LLVMValueRef vertexidx =
-			LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], "");
-		tmp = LLVMBuildICmp(builder, LLVMIntUGE, vertexidx,
-			LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
-		ac_build_ifcc(&ctx->ac, tmp, 5101);
-		ac_build_break(&ctx->ac);
-		ac_build_endif(&ctx->ac, 5101);
-
-		tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
-		LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
-
-		tmp = ngg_gs_emit_vertex_ptr(ctx, gsthread, vertexidx);
-		LLVMBuildStore(builder, i8_0,
-			       ngg_gs_get_emit_primflag_ptr(ctx, tmp, stream));
-
-		ac_build_endloop(&ctx->ac, 5100);
-	}
-
-	/* Accumulate generated primitives counts across the entire threadgroup. */
-	for (unsigned stream = 0; stream < 4; ++stream) {
-		unsigned num_components;
-
-		num_components =
-			ctx->args->shader_info->gs.num_stream_output_components[stream];
-		if (!num_components)
-			continue;
-
-		LLVMValueRef numprims =
-			LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
-		numprims = ac_build_reduce(&ctx->ac, numprims, nir_op_iadd, ctx->ac.wave_size);
-
-		tmp = LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(&ctx->ac), ctx->ac.i32_0, "");
-		ac_build_ifcc(&ctx->ac, tmp, 5105);
-		{
-			LLVMBuildAtomicRMW(builder, LLVMAtomicRMWBinOpAdd,
-					   ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch,
-							 LLVMConstInt(ctx->ac.i32, stream, false)),
-					   numprims, LLVMAtomicOrderingMonotonic, false);
-		}
-		ac_build_endif(&ctx->ac, 5105);
-	}
-}
-
-static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
-{
-	const unsigned verts_per_prim = si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive);
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef tmp, tmp2;
-
-	ac_build_s_barrier(&ctx->ac);
-
-	const LLVMValueRef tid = get_thread_id_in_tg(ctx);
-	LLVMValueRef num_emit_threads = ngg_get_prim_cnt(ctx);
-
-	/* Streamout */
-	if (ctx->args->shader_info->so.num_outputs) {
-		struct ngg_streamout nggso = {0};
-
-		nggso.num_vertices = LLVMConstInt(ctx->ac.i32, verts_per_prim, false);
-
-		LLVMValueRef vertexptr = ngg_gs_vertex_ptr(ctx, tid);
-		for (unsigned stream = 0; stream < 4; ++stream) {
-			if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
-				continue;
-
-			tmp = LLVMBuildLoad(builder,
-					    ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream), "");
-			tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
-			tmp2 = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
-			nggso.prim_enable[stream] = LLVMBuildAnd(builder, tmp, tmp2, "");
-		}
-
-		for (unsigned i = 0; i < verts_per_prim; ++i) {
-			tmp = LLVMBuildSub(builder, tid,
-					   LLVMConstInt(ctx->ac.i32, verts_per_prim - i - 1, false), "");
-			tmp = ngg_gs_vertex_ptr(ctx, tmp);
-			nggso.vertices[i] = ac_build_gep0(&ctx->ac, tmp, ctx->ac.i32_0);
-		}
-
-		build_streamout(ctx, &nggso);
-	}
-
-	/* Write shader query data. */
-	tmp = ac_get_arg(&ctx->ac, ctx->args->ngg_gs_state);
-	tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
-	ac_build_ifcc(&ctx->ac, tmp, 5109);
-	tmp = LLVMBuildICmp(builder, LLVMIntULT, tid,
-			    LLVMConstInt(ctx->ac.i32, 4, false), "");
-	ac_build_ifcc(&ctx->ac, tmp, 5110);
-	{
-		tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), "");
-
-		ac_llvm_add_target_dep_function_attr(ctx->main_function,
-						     "amdgpu-gds-size", 256);
-
-		LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS);
-		LLVMValueRef gdsbase = LLVMBuildIntToPtr(builder, ctx->ac.i32_0, gdsptr, "");
-
-		const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
-
-		/* Use a plain GDS atomic to accumulate the number of generated
-		 * primitives.
-		 */
-		ac_build_atomic_rmw(&ctx->ac, LLVMAtomicRMWBinOpAdd, gdsbase,
-				    tmp, sync_scope);
-	}
-	ac_build_endif(&ctx->ac, 5110);
-	ac_build_endif(&ctx->ac, 5109);
-
-	/* TODO: culling */
-
-	/* Determine vertex liveness. */
-	LLVMValueRef vertliveptr = ac_build_alloca(&ctx->ac, ctx->ac.i1, "vertexlive");
-
-	tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
-	ac_build_ifcc(&ctx->ac, tmp, 5120);
-	{
-		for (unsigned i = 0; i < verts_per_prim; ++i) {
-			const LLVMValueRef primidx =
-				LLVMBuildAdd(builder, tid,
-					     LLVMConstInt(ctx->ac.i32, i, false), "");
-
-			if (i > 0) {
-				tmp = LLVMBuildICmp(builder, LLVMIntULT, primidx, num_emit_threads, "");
-				ac_build_ifcc(&ctx->ac, tmp, 5121 + i);
-			}
-
-			/* Load primitive liveness */
-			tmp = ngg_gs_vertex_ptr(ctx, primidx);
-			tmp = LLVMBuildLoad(builder,
-					    ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
-			const LLVMValueRef primlive =
-				LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
-
-			tmp = LLVMBuildLoad(builder, vertliveptr, "");
-			tmp = LLVMBuildOr(builder, tmp, primlive, ""),
-			LLVMBuildStore(builder, tmp, vertliveptr);
-
-			if (i > 0)
-				ac_build_endif(&ctx->ac, 5121 + i);
-		}
-	}
-	ac_build_endif(&ctx->ac, 5120);
-
-	/* Inclusive scan addition across the current wave. */
-	LLVMValueRef vertlive = LLVMBuildLoad(builder, vertliveptr, "");
-	struct ac_wg_scan vertlive_scan = {0};
-	vertlive_scan.op = nir_op_iadd;
-	vertlive_scan.enable_reduce = true;
-	vertlive_scan.enable_exclusive = true;
-	vertlive_scan.src = vertlive;
-	vertlive_scan.scratch = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, ctx->ac.i32_0);
-	vertlive_scan.waveidx = get_wave_id_in_tg(ctx);
-	vertlive_scan.numwaves = get_tgsize(ctx);
-	vertlive_scan.maxwaves = 8;
-
-	ac_build_wg_scan(&ctx->ac, &vertlive_scan);
-
-	/* Skip all exports (including index exports) when possible. At least on
-	 * early gfx10 revisions this is also to avoid hangs.
-	 */
-	LLVMValueRef have_exports =
-		LLVMBuildICmp(builder, LLVMIntNE, vertlive_scan.result_reduce, ctx->ac.i32_0, "");
-	num_emit_threads =
-		LLVMBuildSelect(builder, have_exports, num_emit_threads, ctx->ac.i32_0, "");
-
-	/* Allocate export space. Send this message as early as possible, to
-	 * hide the latency of the SQ <-> SPI roundtrip.
-	 *
-	 * Note: We could consider compacting primitives for export as well.
-	 *       PA processes 1 non-null prim / clock, but it fetches 4 DW of
-	 *       prim data per clock and skips null primitives at no additional
-	 *       cost. So compacting primitives can only be beneficial when
-	 *       there are 4 or more contiguous null primitives in the export
-	 *       (in the common case of single-dword prim exports).
-	 */
-	ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx),
-				      vertlive_scan.result_reduce, num_emit_threads);
-
-	/* Setup the reverse vertex compaction permutation. We re-use stream 1
-	 * of the primitive liveness flags, relying on the fact that each
-	 * threadgroup can have at most 256 threads. */
-	ac_build_ifcc(&ctx->ac, vertlive, 5130);
-	{
-		tmp = ngg_gs_vertex_ptr(ctx, vertlive_scan.result_exclusive);
-		tmp2 = LLVMBuildTrunc(builder, tid, ctx->ac.i8, "");
-		LLVMBuildStore(builder, tmp2,
-			       ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1));
-	}
-	ac_build_endif(&ctx->ac, 5130);
-
-	ac_build_s_barrier(&ctx->ac);
-
-	/* Export primitive data */
-	tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
-	ac_build_ifcc(&ctx->ac, tmp, 5140);
-	{
-		LLVMValueRef flags;
-		struct ac_ngg_prim prim = {0};
-		prim.num_vertices = verts_per_prim;
-
-		tmp = ngg_gs_vertex_ptr(ctx, tid);
-		flags = LLVMBuildLoad(builder,
-				      ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
-		prim.isnull = LLVMBuildNot(builder, LLVMBuildTrunc(builder, flags, ctx->ac.i1, ""), "");
-
-		for (unsigned i = 0; i < verts_per_prim; ++i) {
-			prim.index[i] = LLVMBuildSub(builder, vertlive_scan.result_exclusive,
-				LLVMConstInt(ctx->ac.i32, verts_per_prim - i - 1, false), "");
-			prim.edgeflag[i] = ctx->ac.i1false;
-		}
-
-		/* Geometry shaders output triangle strips, but NGG expects
-		 * triangles. We need to change the vertex order for odd
-		 * triangles to get correct front/back facing by swapping 2
-		 * vertex indices, but we also have to keep the provoking
-		 * vertex in the same place.
-		 */
-		if (verts_per_prim == 3) {
-			LLVMValueRef is_odd = LLVMBuildLShr(builder, flags, ctx->ac.i8_1, "");
-			is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, "");
-
-			struct ac_ngg_prim in = prim;
-			prim.index[0] = in.index[0];
-			prim.index[1] = LLVMBuildSelect(builder, is_odd,
-							in.index[2], in.index[1], "");
-			prim.index[2] = LLVMBuildSelect(builder, is_odd,
-							in.index[1], in.index[2], "");
-		}
-
-		ac_build_export_prim(&ctx->ac, &prim);
-	}
-	ac_build_endif(&ctx->ac, 5140);
-
-	/* Export position and parameter data */
-	tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, vertlive_scan.result_reduce, "");
-	ac_build_ifcc(&ctx->ac, tmp, 5145);
-	{
-		struct radv_vs_output_info *outinfo = &ctx->args->shader_info->vs.outinfo;
-		bool export_view_index = ctx->args->options->key.has_multiview_view_index;
-		struct radv_shader_output_values *outputs;
-		unsigned noutput = 0;
-
-		/* Allocate a temporary array for the output values. */
-		unsigned num_outputs = util_bitcount64(ctx->output_mask) + export_view_index;
-		outputs = calloc(num_outputs, sizeof(outputs[0]));
-
-		memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
-		       sizeof(outinfo->vs_output_param_offset));
-		outinfo->pos_exports = 0;
-
-		tmp = ngg_gs_vertex_ptr(ctx, tid);
-		tmp = LLVMBuildLoad(builder,
-				    ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1), "");
-		tmp = LLVMBuildZExt(builder, tmp, ctx->ac.i32, "");
-		const LLVMValueRef vertexptr = ngg_gs_vertex_ptr(ctx, tmp);
-
-		unsigned out_idx = 0;
-		for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
-			unsigned output_usage_mask =
-				ctx->args->shader_info->gs.output_usage_mask[i];
-			int length = util_last_bit(output_usage_mask);
-
-			if (!(ctx->output_mask & (1ull << i)))
-				continue;
-
-			outputs[noutput].slot_name = i;
-			outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
-			outputs[noutput].usage_mask = output_usage_mask;
-
-			for (unsigned j = 0; j < length; j++, out_idx++) {
-				if (!(output_usage_mask & (1 << j)))
-					continue;
-
-				tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
-				tmp = LLVMBuildLoad(builder, tmp, "");
-
-				LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
-				if (ac_get_type_size(type) == 2) {
-					tmp = ac_to_integer(&ctx->ac, tmp);
-					tmp = LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i16, "");
-				}
-
-				outputs[noutput].values[j] = ac_to_float(&ctx->ac, tmp);
-			}
-
-			for (unsigned j = length; j < 4; j++)
-				outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32);
-
-			noutput++;
-		}
-
-		/* Export ViewIndex. */
-		if (export_view_index) {
-			outputs[noutput].slot_name = VARYING_SLOT_LAYER;
-			outputs[noutput].slot_index = 0;
-			outputs[noutput].usage_mask = 0x1;
-			outputs[noutput].values[0] =
-				ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.view_index));
-			for (unsigned j = 1; j < 4; j++)
-				outputs[noutput].values[j] = ctx->ac.f32_0;
-			noutput++;
-		}
-
-		radv_llvm_export_vs(ctx, outputs, noutput, outinfo,
-				    ctx->args->options->key.vs_common_out.export_clip_dists);
-		FREE(outputs);
-	}
-	ac_build_endif(&ctx->ac, 5145);
-}
-
-static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
-				     unsigned stream,
-				     LLVMValueRef vertexidx,
-				     LLVMValueRef *addrs)
-{
-	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef tmp;
-
-	const LLVMValueRef vertexptr =
-		ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
-	unsigned out_idx = 0;
-	for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
-		unsigned output_usage_mask =
-			ctx->args->shader_info->gs.output_usage_mask[i];
-		uint8_t output_stream =
-			ctx->args->shader_info->gs.output_streams[i];
-		LLVMValueRef *out_ptr = &addrs[i * 4];
-		int length = util_last_bit(output_usage_mask);
-
-		if (!(ctx->output_mask & (1ull << i)) ||
-		    output_stream != stream)
-			continue;
-
-		for (unsigned j = 0; j < length; j++, out_idx++) {
-			if (!(output_usage_mask & (1 << j)))
-				continue;
-
-			LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder,
-							     out_ptr[j], "");
-			out_val = ac_to_integer(&ctx->ac, out_val);
-			out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
-
-			LLVMBuildStore(builder, out_val,
-				       ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx));
-		}
-	}
-	assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
-
-	/* Store the current number of emitted vertices to zero out remaining
-	 * primitive flags in case the geometry shader doesn't emit the maximum
-	 * number of vertices.
-	 */
-	tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
-	LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
-
-	/* Determine and store whether this vertex completed a primitive. */
-	const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
-
-	tmp = LLVMConstInt(ctx->ac.i32, si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive) - 1, false);
-	const LLVMValueRef iscompleteprim =
-		LLVMBuildICmp(builder, LLVMIntUGE, curverts, tmp, "");
-
-	/* Since the geometry shader emits triangle strips, we need to
-	 * track which primitive is odd and swap vertex indices to get
-	 * the correct vertex order.
-	 */
-	LLVMValueRef is_odd = ctx->ac.i1false;
-	if (stream == 0 &&
-	    si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive) == 3) {
-		tmp = LLVMBuildAnd(builder, curverts, ctx->ac.i32_1, "");
-		is_odd = LLVMBuildICmp(builder, LLVMIntEQ, tmp, ctx->ac.i32_1, "");
-	}
-
-	tmp = LLVMBuildAdd(builder, curverts, ctx->ac.i32_1, "");
-	LLVMBuildStore(builder, tmp, ctx->gs_curprim_verts[stream]);
-
-	/* The per-vertex primitive flag encoding:
-	 *   bit 0: whether this vertex finishes a primitive
-	 *   bit 1: whether the primitive is odd (if we are emitting triangle strips)
-	 */
-	tmp = LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i8, "");
-	tmp = LLVMBuildOr(builder, tmp,
-			  LLVMBuildShl(builder,
-				       LLVMBuildZExt(builder, is_odd, ctx->ac.i8, ""),
-				       ctx->ac.i8_1, ""), "");
-	LLVMBuildStore(builder, tmp,
-		       ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream));
-
-	tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
-	tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
-	LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef tmp;
+
+   assert((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) &&
+          !ctx->args->is_gs_copy_shader);
+
+   LLVMValueRef prims_in_wave =
+      ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8);
+   LLVMValueRef vtx_in_wave =
+      ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 0, 8);
+   LLVMValueRef is_gs_thread =
+      LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), prims_in_wave, "");
+   LLVMValueRef is_es_thread =
+      LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), vtx_in_wave, "");
+   LLVMValueRef vtxindex[] = {
+      ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 0, 16),
+      ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 16, 16),
+      ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[2]), 0, 16),
+   };
+
+   /* Determine the number of vertices per primitive. */
+   unsigned num_vertices;
+   LLVMValueRef num_vertices_val;
+
+   if (ctx->stage == MESA_SHADER_VERTEX) {
+      LLVMValueRef outprim_val =
+         LLVMConstInt(ctx->ac.i32, ctx->args->options->key.vs.outprim, false);
+      num_vertices_val = LLVMBuildAdd(builder, outprim_val, ctx->ac.i32_1, "");
+      num_vertices = 3; /* TODO: optimize for points & lines */
+   } else {
+      assert(ctx->stage == MESA_SHADER_TESS_EVAL);
+
+      if (ctx->shader->info.tess.point_mode)
+         num_vertices = 1;
+      else if (ctx->shader->info.tess.primitive_mode == GL_ISOLINES)
+         num_vertices = 2;
+      else
+         num_vertices = 3;
+
+      num_vertices_val = LLVMConstInt(ctx->ac.i32, num_vertices, false);
+   }
+
+   /* Streamout */
+   if (ctx->args->shader_info->so.num_outputs) {
+      struct ngg_streamout nggso = {0};
+
+      nggso.num_vertices = num_vertices_val;
+      nggso.prim_enable[0] = is_gs_thread;
+
+      for (unsigned i = 0; i < num_vertices; ++i)
+         nggso.vertices[i] = ngg_nogs_vertex_ptr(ctx, vtxindex[i]);
+
+      build_streamout(ctx, &nggso);
+   }
+
+   /* Copy Primitive IDs from GS threads to the LDS address corresponding
+    * to the ES thread of the provoking vertex.
+    */
+   if (ctx->stage == MESA_SHADER_VERTEX && ctx->args->options->key.vs_common_out.export_prim_id) {
+      if (ctx->args->shader_info->so.num_outputs)
+         ac_build_s_barrier(&ctx->ac);
+
+      ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
+      /* Extract the PROVOKING_VTX_INDEX field. */
+      LLVMValueRef provoking_vtx_in_prim = LLVMConstInt(ctx->ac.i32, 0, false);
+
+      /* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */
+      LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3);
+      LLVMValueRef provoking_vtx_index =
+         LLVMBuildExtractElement(builder, indices, provoking_vtx_in_prim, "");
+
+      LLVMBuildStore(builder, ac_get_arg(&ctx->ac, ctx->args->ac.gs_prim_id),
+                     ac_build_gep0(&ctx->ac, ctx->esgs_ring, provoking_vtx_index));
+      ac_build_endif(&ctx->ac, 5400);
+   }
+
+   /* TODO: primitive culling */
+
+   ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), ngg_get_vtx_cnt(ctx),
+                                 ngg_get_prim_cnt(ctx));
+
+   /* TODO: streamout queries */
+   /* Export primitive data to the index buffer.
+    *
+    * For the first version, we will always build up all three indices
+    * independent of the primitive type. The additional garbage data
+    * shouldn't hurt.
+    *
+    * TODO: culling depends on the primitive type, so can have some
+    * interaction here.
+    */
+   ac_build_ifcc(&ctx->ac, is_gs_thread, 6001);
+   {
+      struct ac_ngg_prim prim = {0};
+
+      if (ctx->args->options->key.vs_common_out.as_ngg_passthrough) {
+         prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]);
+      } else {
+         prim.num_vertices = num_vertices;
+         prim.isnull = ctx->ac.i1false;
+         memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
+
+         for (unsigned i = 0; i < num_vertices; ++i) {
+            tmp = LLVMBuildLShr(builder, ac_get_arg(&ctx->ac, ctx->args->ac.gs_invocation_id),
+                                LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
+            prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+         }
+      }
+
+      ac_build_export_prim(&ctx->ac, &prim);
+   }
+   ac_build_endif(&ctx->ac, 6001);
+
+   /* Export per-vertex data (positions and parameters). */
+   ac_build_ifcc(&ctx->ac, is_es_thread, 6002);
+   {
+      struct radv_vs_output_info *outinfo = ctx->stage == MESA_SHADER_TESS_EVAL
+                                               ? &ctx->args->shader_info->tes.outinfo
+                                               : &ctx->args->shader_info->vs.outinfo;
+
+      /* Exporting the primitive ID is handled below. */
+      /* TODO: use the new VS export path */
+      handle_vs_outputs_post(ctx, false, ctx->args->options->key.vs_common_out.export_clip_dists,
+                             outinfo);
+
+      if (ctx->args->options->key.vs_common_out.export_prim_id) {
+         unsigned param_count = outinfo->param_exports;
+         LLVMValueRef values[4];
+
+         if (ctx->stage == MESA_SHADER_VERTEX) {
+            /* Wait for GS stores to finish. */
+            ac_build_s_barrier(&ctx->ac);
+
+            tmp = ac_build_gep0(&ctx->ac, ctx->esgs_ring, get_thread_id_in_tg(ctx));
+            values[0] = LLVMBuildLoad(builder, tmp, "");
+         } else {
+            assert(ctx->stage == MESA_SHADER_TESS_EVAL);
+            values[0] = ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id);
+         }
+
+         values[0] = ac_to_float(&ctx->ac, values[0]);
+         for (unsigned j = 1; j < 4; j++)
+            values[j] = ctx->ac.f32_0;
+
+         radv_export_param(ctx, param_count, values, 0x1);
+
+         outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++;
+         outinfo->param_exports = param_count;
+      }
+   }
+   ac_build_endif(&ctx->ac, 6002);
+}
+
+static void
+gfx10_ngg_gs_emit_prologue(struct radv_shader_context *ctx)
+{
+   /* Zero out the part of LDS scratch that is used to accumulate the
+    * per-stream generated primitive count.
+    */
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef scratchptr = ctx->gs_ngg_scratch;
+   LLVMValueRef tid = get_thread_id_in_tg(ctx);
+   LLVMBasicBlockRef merge_block;
+   LLVMValueRef cond;
+
+   LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
+   LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
+   merge_block = LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
+
+   cond = LLVMBuildICmp(builder, LLVMIntULT, tid, LLVMConstInt(ctx->ac.i32, 4, false), "");
+   LLVMBuildCondBr(ctx->ac.builder, cond, then_block, merge_block);
+   LLVMPositionBuilderAtEnd(ctx->ac.builder, then_block);
+
+   LLVMValueRef ptr = ac_build_gep0(&ctx->ac, scratchptr, tid);
+   LLVMBuildStore(builder, ctx->ac.i32_0, ptr);
+
+   LLVMBuildBr(ctx->ac.builder, merge_block);
+   LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
+
+   ac_build_s_barrier(&ctx->ac);
+}
+
+static void
+gfx10_ngg_gs_emit_epilogue_1(struct radv_shader_context *ctx)
+{
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef i8_0 = LLVMConstInt(ctx->ac.i8, 0, false);
+   LLVMValueRef tmp;
+
+   /* Zero out remaining (non-emitted) primitive flags.
+    *
+    * Note: Alternatively, we could pass the relevant gs_next_vertex to
+    *       the emit threads via LDS. This is likely worse in the expected
+    *       typical case where each GS thread emits the full set of
+    *       vertices.
+    */
+   for (unsigned stream = 0; stream < 4; ++stream) {
+      unsigned num_components;
+
+      num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
+      if (!num_components)
+         continue;
+
+      const LLVMValueRef gsthread = get_thread_id_in_tg(ctx);
+
+      ac_build_bgnloop(&ctx->ac, 5100);
+
+      const LLVMValueRef vertexidx = LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], "");
+      tmp = LLVMBuildICmp(builder, LLVMIntUGE, vertexidx,
+                          LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
+      ac_build_ifcc(&ctx->ac, tmp, 5101);
+      ac_build_break(&ctx->ac);
+      ac_build_endif(&ctx->ac, 5101);
+
+      tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
+      LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
+
+      tmp = ngg_gs_emit_vertex_ptr(ctx, gsthread, vertexidx);
+      LLVMBuildStore(builder, i8_0, ngg_gs_get_emit_primflag_ptr(ctx, tmp, stream));
+
+      ac_build_endloop(&ctx->ac, 5100);
+   }
+
+   /* Accumulate generated primitives counts across the entire threadgroup. */
+   for (unsigned stream = 0; stream < 4; ++stream) {
+      unsigned num_components;
+
+      num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
+      if (!num_components)
+         continue;
+
+      LLVMValueRef numprims = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
+      numprims = ac_build_reduce(&ctx->ac, numprims, nir_op_iadd, ctx->ac.wave_size);
+
+      tmp = LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(&ctx->ac), ctx->ac.i32_0, "");
+      ac_build_ifcc(&ctx->ac, tmp, 5105);
+      {
+         LLVMBuildAtomicRMW(
+            builder, LLVMAtomicRMWBinOpAdd,
+            ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, LLVMConstInt(ctx->ac.i32, stream, false)),
+            numprims, LLVMAtomicOrderingMonotonic, false);
+      }
+      ac_build_endif(&ctx->ac, 5105);
+   }
+}
+
+static void
+gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
+{
+   const unsigned verts_per_prim =
+      si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive);
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef tmp, tmp2;
+
+   ac_build_s_barrier(&ctx->ac);
+
+   const LLVMValueRef tid = get_thread_id_in_tg(ctx);
+   LLVMValueRef num_emit_threads = ngg_get_prim_cnt(ctx);
+
+   /* Streamout */
+   if (ctx->args->shader_info->so.num_outputs) {
+      struct ngg_streamout nggso = {0};
+
+      nggso.num_vertices = LLVMConstInt(ctx->ac.i32, verts_per_prim, false);
+
+      LLVMValueRef vertexptr = ngg_gs_vertex_ptr(ctx, tid);
+      for (unsigned stream = 0; stream < 4; ++stream) {
+         if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
+            continue;
+
+         tmp = LLVMBuildLoad(builder, ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream), "");
+         tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+         tmp2 = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
+         nggso.prim_enable[stream] = LLVMBuildAnd(builder, tmp, tmp2, "");
+      }
+
+      for (unsigned i = 0; i < verts_per_prim; ++i) {
+         tmp = LLVMBuildSub(builder, tid, LLVMConstInt(ctx->ac.i32, verts_per_prim - i - 1, false),
+                            "");
+         tmp = ngg_gs_vertex_ptr(ctx, tmp);
+         nggso.vertices[i] = ac_build_gep0(&ctx->ac, tmp, ctx->ac.i32_0);
+      }
+
+      build_streamout(ctx, &nggso);
+   }
+
+   /* Write shader query data. */
+   tmp = ac_get_arg(&ctx->ac, ctx->args->ngg_gs_state);
+   tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+   ac_build_ifcc(&ctx->ac, tmp, 5109);
+   tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, LLVMConstInt(ctx->ac.i32, 4, false), "");
+   ac_build_ifcc(&ctx->ac, tmp, 5110);
+   {
+      tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), "");
+
+      ac_llvm_add_target_dep_function_attr(ctx->main_function, "amdgpu-gds-size", 256);
+
+      LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS);
+      LLVMValueRef gdsbase = LLVMBuildIntToPtr(builder, ctx->ac.i32_0, gdsptr, "");
+
+      const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
+
+      /* Use a plain GDS atomic to accumulate the number of generated
+       * primitives.
+       */
+      ac_build_atomic_rmw(&ctx->ac, LLVMAtomicRMWBinOpAdd, gdsbase, tmp, sync_scope);
+   }
+   ac_build_endif(&ctx->ac, 5110);
+   ac_build_endif(&ctx->ac, 5109);
+
+   /* TODO: culling */
+
+   /* Determine vertex liveness. */
+   LLVMValueRef vertliveptr = ac_build_alloca(&ctx->ac, ctx->ac.i1, "vertexlive");
+
+   tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
+   ac_build_ifcc(&ctx->ac, tmp, 5120);
+   {
+      for (unsigned i = 0; i < verts_per_prim; ++i) {
+         const LLVMValueRef primidx =
+            LLVMBuildAdd(builder, tid, LLVMConstInt(ctx->ac.i32, i, false), "");
+
+         if (i > 0) {
+            tmp = LLVMBuildICmp(builder, LLVMIntULT, primidx, num_emit_threads, "");
+            ac_build_ifcc(&ctx->ac, tmp, 5121 + i);
+         }
+
+         /* Load primitive liveness */
+         tmp = ngg_gs_vertex_ptr(ctx, primidx);
+         tmp = LLVMBuildLoad(builder, ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
+         const LLVMValueRef primlive = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+
+         tmp = LLVMBuildLoad(builder, vertliveptr, "");
+         tmp = LLVMBuildOr(builder, tmp, primlive, ""), LLVMBuildStore(builder, tmp, vertliveptr);
+
+         if (i > 0)
+            ac_build_endif(&ctx->ac, 5121 + i);
+      }
+   }
+   ac_build_endif(&ctx->ac, 5120);
+
+   /* Inclusive scan addition across the current wave. */
+   LLVMValueRef vertlive = LLVMBuildLoad(builder, vertliveptr, "");
+   struct ac_wg_scan vertlive_scan = {0};
+   vertlive_scan.op = nir_op_iadd;
+   vertlive_scan.enable_reduce = true;
+   vertlive_scan.enable_exclusive = true;
+   vertlive_scan.src = vertlive;
+   vertlive_scan.scratch = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, ctx->ac.i32_0);
+   vertlive_scan.waveidx = get_wave_id_in_tg(ctx);
+   vertlive_scan.numwaves = get_tgsize(ctx);
+   vertlive_scan.maxwaves = 8;
+
+   ac_build_wg_scan(&ctx->ac, &vertlive_scan);
+
+   /* Skip all exports (including index exports) when possible. At least on
+    * early gfx10 revisions this is also to avoid hangs.
+    */
+   LLVMValueRef have_exports =
+      LLVMBuildICmp(builder, LLVMIntNE, vertlive_scan.result_reduce, ctx->ac.i32_0, "");
+   num_emit_threads = LLVMBuildSelect(builder, have_exports, num_emit_threads, ctx->ac.i32_0, "");
+
+   /* Allocate export space. Send this message as early as possible, to
+    * hide the latency of the SQ <-> SPI roundtrip.
+    *
+    * Note: We could consider compacting primitives for export as well.
+    *       PA processes 1 non-null prim / clock, but it fetches 4 DW of
+    *       prim data per clock and skips null primitives at no additional
+    *       cost. So compacting primitives can only be beneficial when
+    *       there are 4 or more contiguous null primitives in the export
+    *       (in the common case of single-dword prim exports).
+    */
+   ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), vertlive_scan.result_reduce,
+                                 num_emit_threads);
+
+   /* Setup the reverse vertex compaction permutation. We re-use stream 1
+    * of the primitive liveness flags, relying on the fact that each
+    * threadgroup can have at most 256 threads. */
+   ac_build_ifcc(&ctx->ac, vertlive, 5130);
+   {
+      tmp = ngg_gs_vertex_ptr(ctx, vertlive_scan.result_exclusive);
+      tmp2 = LLVMBuildTrunc(builder, tid, ctx->ac.i8, "");
+      LLVMBuildStore(builder, tmp2, ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1));
+   }
+   ac_build_endif(&ctx->ac, 5130);
+
+   ac_build_s_barrier(&ctx->ac);
+
+   /* Export primitive data */
+   tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
+   ac_build_ifcc(&ctx->ac, tmp, 5140);
+   {
+      LLVMValueRef flags;
+      struct ac_ngg_prim prim = {0};
+      prim.num_vertices = verts_per_prim;
+
+      tmp = ngg_gs_vertex_ptr(ctx, tid);
+      flags = LLVMBuildLoad(builder, ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
+      prim.isnull = LLVMBuildNot(builder, LLVMBuildTrunc(builder, flags, ctx->ac.i1, ""), "");
+
+      for (unsigned i = 0; i < verts_per_prim; ++i) {
+         prim.index[i] = LLVMBuildSub(builder, vertlive_scan.result_exclusive,
+                                      LLVMConstInt(ctx->ac.i32, verts_per_prim - i - 1, false), "");
+         prim.edgeflag[i] = ctx->ac.i1false;
+      }
+
+      /* Geometry shaders output triangle strips, but NGG expects
+       * triangles. We need to change the vertex order for odd
+       * triangles to get correct front/back facing by swapping 2
+       * vertex indices, but we also have to keep the provoking
+       * vertex in the same place.
+       */
+      if (verts_per_prim == 3) {
+         LLVMValueRef is_odd = LLVMBuildLShr(builder, flags, ctx->ac.i8_1, "");
+         is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, "");
+
+         struct ac_ngg_prim in = prim;
+         prim.index[0] = in.index[0];
+         prim.index[1] = LLVMBuildSelect(builder, is_odd, in.index[2], in.index[1], "");
+         prim.index[2] = LLVMBuildSelect(builder, is_odd, in.index[1], in.index[2], "");
+      }
+
+      ac_build_export_prim(&ctx->ac, &prim);
+   }
+   ac_build_endif(&ctx->ac, 5140);
+
+   /* Export position and parameter data */
+   tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, vertlive_scan.result_reduce, "");
+   ac_build_ifcc(&ctx->ac, tmp, 5145);
+   {
+      struct radv_vs_output_info *outinfo = &ctx->args->shader_info->vs.outinfo;
+      bool export_view_index = ctx->args->options->key.has_multiview_view_index;
+      struct radv_shader_output_values *outputs;
+      unsigned noutput = 0;
+
+      /* Allocate a temporary array for the output values. */
+      unsigned num_outputs = util_bitcount64(ctx->output_mask) + export_view_index;
+      outputs = calloc(num_outputs, sizeof(outputs[0]));
+
+      memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
+             sizeof(outinfo->vs_output_param_offset));
+      outinfo->pos_exports = 0;
+
+      tmp = ngg_gs_vertex_ptr(ctx, tid);
+      tmp = LLVMBuildLoad(builder, ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1), "");
+      tmp = LLVMBuildZExt(builder, tmp, ctx->ac.i32, "");
+      const LLVMValueRef vertexptr = ngg_gs_vertex_ptr(ctx, tmp);
+
+      unsigned out_idx = 0;
+      for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+         unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+         int length = util_last_bit(output_usage_mask);
+
+         if (!(ctx->output_mask & (1ull << i)))
+            continue;
+
+         outputs[noutput].slot_name = i;
+         outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
+         outputs[noutput].usage_mask = output_usage_mask;
+
+         for (unsigned j = 0; j < length; j++, out_idx++) {
+            if (!(output_usage_mask & (1 << j)))
+               continue;
+
+            tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
+            tmp = LLVMBuildLoad(builder, tmp, "");
+
+            LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
+            if (ac_get_type_size(type) == 2) {
+               tmp = ac_to_integer(&ctx->ac, tmp);
+               tmp = LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i16, "");
+            }
+
+            outputs[noutput].values[j] = ac_to_float(&ctx->ac, tmp);
+         }
+
+         for (unsigned j = length; j < 4; j++)
+            outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32);
+
+         noutput++;
+      }
+
+      /* Export ViewIndex. */
+      if (export_view_index) {
+         outputs[noutput].slot_name = VARYING_SLOT_LAYER;
+         outputs[noutput].slot_index = 0;
+         outputs[noutput].usage_mask = 0x1;
+         outputs[noutput].values[0] =
+            ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.view_index));
+         for (unsigned j = 1; j < 4; j++)
+            outputs[noutput].values[j] = ctx->ac.f32_0;
+         noutput++;
+      }
+
+      radv_llvm_export_vs(ctx, outputs, noutput, outinfo,
+                          ctx->args->options->key.vs_common_out.export_clip_dists);
+      FREE(outputs);
+   }
+   ac_build_endif(&ctx->ac, 5145);
+}
+
+static void
+gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream, LLVMValueRef vertexidx,
+                         LLVMValueRef *addrs)
+{
+   LLVMBuilderRef builder = ctx->ac.builder;
+   LLVMValueRef tmp;
+
+   const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
+   unsigned out_idx = 0;
+   for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+      unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+      uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
+      LLVMValueRef *out_ptr = &addrs[i * 4];
+      int length = util_last_bit(output_usage_mask);
+
+      if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
+         continue;
+
+      for (unsigned j = 0; j < length; j++, out_idx++) {
+         if (!(output_usage_mask & (1 << j)))
+            continue;
+
+         LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
+         out_val = ac_to_integer(&ctx->ac, out_val);
+         out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
+
+         LLVMBuildStore(builder, out_val, ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx));
+      }
+   }
+   assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
+
+   /* Store the current number of emitted vertices to zero out remaining
+    * primitive flags in case the geometry shader doesn't emit the maximum
+    * number of vertices.
+    */
+   tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
+   LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
+
+   /* Determine and store whether this vertex completed a primitive. */
+   const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
+
+   tmp = LLVMConstInt(
+      ctx->ac.i32, si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive) - 1, false);
+   const LLVMValueRef iscompleteprim = LLVMBuildICmp(builder, LLVMIntUGE, curverts, tmp, "");
+
+   /* Since the geometry shader emits triangle strips, we need to
+    * track which primitive is odd and swap vertex indices to get
+    * the correct vertex order.
+    */
+   LLVMValueRef is_odd = ctx->ac.i1false;
+   if (stream == 0 && si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive) == 3) {
+      tmp = LLVMBuildAnd(builder, curverts, ctx->ac.i32_1, "");
+      is_odd = LLVMBuildICmp(builder, LLVMIntEQ, tmp, ctx->ac.i32_1, "");
+   }
+
+   tmp = LLVMBuildAdd(builder, curverts, ctx->ac.i32_1, "");
+   LLVMBuildStore(builder, tmp, ctx->gs_curprim_verts[stream]);
+
+   /* The per-vertex primitive flag encoding:
+    *   bit 0: whether this vertex finishes a primitive
+    *   bit 1: whether the primitive is odd (if we are emitting triangle strips)
+    */
+   tmp = LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i8, "");
+   tmp = LLVMBuildOr(
+      builder, tmp,
+      LLVMBuildShl(builder, LLVMBuildZExt(builder, is_odd, ctx->ac.i8, ""), ctx->ac.i8_1, ""), "");
+   LLVMBuildStore(builder, tmp, ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream));
+
+   tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
+   tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
+   LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
 }
 
 static bool
-si_export_mrt_color(struct radv_shader_context *ctx,
-		    LLVMValueRef *color, unsigned index,
-		    struct ac_export_args *args)
+si_export_mrt_color(struct radv_shader_context *ctx, LLVMValueRef *color, unsigned index,
+                    struct ac_export_args *args)
 {
-	/* Export */
-	si_llvm_init_export_args(ctx, color, 0xf,
-				 V_008DFC_SQ_EXP_MRT + index, args);
-	if (!args->enabled_channels)
-		return false; /* unnecessary NULL export */
+   /* Export */
+   si_llvm_init_export_args(ctx, color, 0xf, V_008DFC_SQ_EXP_MRT + index, args);
+   if (!args->enabled_channels)
+      return false; /* unnecessary NULL export */
 
-	return true;
+   return true;
 }
 
 static void
-radv_export_mrt_z(struct radv_shader_context *ctx,
-		  LLVMValueRef depth, LLVMValueRef stencil,
-		  LLVMValueRef samplemask)
+radv_export_mrt_z(struct radv_shader_context *ctx, LLVMValueRef depth, LLVMValueRef stencil,
+                  LLVMValueRef samplemask)
 {
-	struct ac_export_args args;
+   struct ac_export_args args;
 
-	ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
+   ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
 
-	ac_build_export(&ctx->ac, &args);
+   ac_build_export(&ctx->ac, &args);
 }
 
 static void
 handle_fs_outputs_post(struct radv_shader_context *ctx)
 {
-	unsigned index = 0;
-	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
-	struct ac_export_args color_args[8];
-
-	for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
-		LLVMValueRef values[4];
-
-		if (!(ctx->output_mask & (1ull << i)))
-			continue;
-
-		if (i < FRAG_RESULT_DATA0)
-			continue;
-
-		for (unsigned j = 0; j < 4; j++)
-			values[j] = ac_to_float(&ctx->ac,
-						radv_load_output(ctx, i, j));
-
-		bool ret = si_export_mrt_color(ctx, values,
-					       i - FRAG_RESULT_DATA0,
-					       &color_args[index]);
-		if (ret)
-			index++;
-	}
-
-	/* Process depth, stencil, samplemask. */
-	if (ctx->args->shader_info->ps.writes_z) {
-		depth = ac_to_float(&ctx->ac,
-				    radv_load_output(ctx, FRAG_RESULT_DEPTH, 0));
-	}
-	if (ctx->args->shader_info->ps.writes_stencil) {
-		stencil = ac_to_float(&ctx->ac,
-				      radv_load_output(ctx, FRAG_RESULT_STENCIL, 0));
-	}
-	if (ctx->args->shader_info->ps.writes_sample_mask) {
-		samplemask = ac_to_float(&ctx->ac,
-					 radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0));
-	}
-
-	/* Set the DONE bit on last non-null color export only if Z isn't
-	 * exported.
-	 */
-	if (index > 0 &&
-	    !ctx->args->shader_info->ps.writes_z &&
-	    !ctx->args->shader_info->ps.writes_stencil &&
-	    !ctx->args->shader_info->ps.writes_sample_mask) {
-		unsigned last = index - 1;
-
-               color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */
-               color_args[last].done = 1; /* DONE bit */
-	}
-
-	/* Export PS outputs. */
-	for (unsigned i = 0; i < index; i++)
-		ac_build_export(&ctx->ac, &color_args[i]);
-
-	if (depth || stencil || samplemask)
-		radv_export_mrt_z(ctx, depth, stencil, samplemask);
-	else if (!index)
-		ac_build_export_null(&ctx->ac);
+   unsigned index = 0;
+   LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+   struct ac_export_args color_args[8];
+
+   for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+      LLVMValueRef values[4];
+
+      if (!(ctx->output_mask & (1ull << i)))
+         continue;
+
+      if (i < FRAG_RESULT_DATA0)
+         continue;
+
+      for (unsigned j = 0; j < 4; j++)
+         values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
+
+      bool ret = si_export_mrt_color(ctx, values, i - FRAG_RESULT_DATA0, &color_args[index]);
+      if (ret)
+         index++;
+   }
+
+   /* Process depth, stencil, samplemask. */
+   if (ctx->args->shader_info->ps.writes_z) {
+      depth = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_DEPTH, 0));
+   }
+   if (ctx->args->shader_info->ps.writes_stencil) {
+      stencil = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_STENCIL, 0));
+   }
+   if (ctx->args->shader_info->ps.writes_sample_mask) {
+      samplemask = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0));
+   }
+
+   /* Set the DONE bit on last non-null color export only if Z isn't
+    * exported.
+    */
+   if (index > 0 && !ctx->args->shader_info->ps.writes_z &&
+       !ctx->args->shader_info->ps.writes_stencil &&
+       !ctx->args->shader_info->ps.writes_sample_mask) {
+      unsigned last = index - 1;
+
+      color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */
+      color_args[last].done = 1;       /* DONE bit */
+   }
+
+   /* Export PS outputs. */
+   for (unsigned i = 0; i < index; i++)
+      ac_build_export(&ctx->ac, &color_args[i]);
+
+   if (depth || stencil || samplemask)
+      radv_export_mrt_z(ctx, depth, stencil, samplemask);
+   else if (!index)
+      ac_build_export_null(&ctx->ac);
 }
 
 static void
 emit_gs_epilogue(struct radv_shader_context *ctx)
 {
-	if (ctx->args->options->key.vs_common_out.as_ngg) {
-		gfx10_ngg_gs_emit_epilogue_1(ctx);
-		return;
-	}
+   if (ctx->args->options->key.vs_common_out.as_ngg) {
+      gfx10_ngg_gs_emit_epilogue_1(ctx);
+      return;
+   }
 
-	if (ctx->ac.chip_class >= GFX10)
-		LLVMBuildFence(ctx->ac.builder, LLVMAtomicOrderingRelease, false, "");
+   if (ctx->ac.chip_class >= GFX10)
+      LLVMBuildFence(ctx->ac.builder, LLVMAtomicOrderingRelease, false, "");
 
-	ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
+   ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
 }
 
 static void
-handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs,
-			   LLVMValueRef *addrs)
-{
-	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-
-	switch (ctx->stage) {
-	case MESA_SHADER_VERTEX:
-		if (ctx->args->options->key.vs_common_out.as_ls)
-			break; /* Lowered in NIR */
-		else if (ctx->args->options->key.vs_common_out.as_es)
-			break; /* Lowered in NIR */
-		else if (ctx->args->options->key.vs_common_out.as_ngg)
-			handle_ngg_outputs_post_1(ctx);
-		else
-			handle_vs_outputs_post(ctx, ctx->args->options->key.vs_common_out.export_prim_id,
-					       ctx->args->options->key.vs_common_out.export_clip_dists,
-					       &ctx->args->shader_info->vs.outinfo);
-		break;
-	case MESA_SHADER_FRAGMENT:
-		handle_fs_outputs_post(ctx);
-		break;
-	case MESA_SHADER_GEOMETRY:
-		emit_gs_epilogue(ctx);
-		break;
-	case MESA_SHADER_TESS_CTRL:
-		break; /* Lowered in NIR */
-	case MESA_SHADER_TESS_EVAL:
-		if (ctx->args->options->key.vs_common_out.as_es)
-			break; /* Lowered in NIR */
-		else if (ctx->args->options->key.vs_common_out.as_ngg)
-			handle_ngg_outputs_post_1(ctx);
-		else
-			handle_vs_outputs_post(ctx, ctx->args->options->key.vs_common_out.export_prim_id,
-					       ctx->args->options->key.vs_common_out.export_clip_dists,
-					       &ctx->args->shader_info->tes.outinfo);
-		break;
-	default:
-		break;
-	}
-}
-
-static void ac_llvm_finalize_module(struct radv_shader_context *ctx,
-				    LLVMPassManagerRef passmgr,
-				    const struct radv_nir_compiler_options *options)
-{
-	LLVMRunPassManager(passmgr, ctx->ac.module);
-	LLVMDisposeBuilder(ctx->ac.builder);
-
-	ac_llvm_context_dispose(&ctx->ac);
+handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs)
+{
+   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+   switch (ctx->stage) {
+   case MESA_SHADER_VERTEX:
+      if (ctx->args->options->key.vs_common_out.as_ls)
+         break; /* Lowered in NIR */
+      else if (ctx->args->options->key.vs_common_out.as_es)
+         break; /* Lowered in NIR */
+      else if (ctx->args->options->key.vs_common_out.as_ngg)
+         handle_ngg_outputs_post_1(ctx);
+      else
+         handle_vs_outputs_post(ctx, ctx->args->options->key.vs_common_out.export_prim_id,
+                                ctx->args->options->key.vs_common_out.export_clip_dists,
+                                &ctx->args->shader_info->vs.outinfo);
+      break;
+   case MESA_SHADER_FRAGMENT:
+      handle_fs_outputs_post(ctx);
+      break;
+   case MESA_SHADER_GEOMETRY:
+      emit_gs_epilogue(ctx);
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      break; /* Lowered in NIR */
+   case MESA_SHADER_TESS_EVAL:
+      if (ctx->args->options->key.vs_common_out.as_es)
+         break; /* Lowered in NIR */
+      else if (ctx->args->options->key.vs_common_out.as_ngg)
+         handle_ngg_outputs_post_1(ctx);
+      else
+         handle_vs_outputs_post(ctx, ctx->args->options->key.vs_common_out.export_prim_id,
+                                ctx->args->options->key.vs_common_out.export_clip_dists,
+                                &ctx->args->shader_info->tes.outinfo);
+      break;
+   default:
+      break;
+   }
+}
+
+static void
+ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef passmgr,
+                        const struct radv_nir_compiler_options *options)
+{
+   LLVMRunPassManager(passmgr, ctx->ac.module);
+   LLVMDisposeBuilder(ctx->ac.builder);
+
+   ac_llvm_context_dispose(&ctx->ac);
 }
 
 static void
 ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx)
 {
-	struct radv_vs_output_info *outinfo;
-
-	switch (ctx->stage) {
-	case MESA_SHADER_FRAGMENT:
-	case MESA_SHADER_COMPUTE:
-	case MESA_SHADER_TESS_CTRL:
-	case MESA_SHADER_GEOMETRY:
-		return;
-	case MESA_SHADER_VERTEX:
-		if (ctx->args->options->key.vs_common_out.as_ls ||
-		    ctx->args->options->key.vs_common_out.as_es)
-			return;
-		outinfo = &ctx->args->shader_info->vs.outinfo;
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		if (ctx->args->options->key.vs_common_out.as_es)
-			return;
-		outinfo = &ctx->args->shader_info->tes.outinfo;
-		break;
-	default:
-		unreachable("Unhandled shader type");
-	}
-
-	ac_optimize_vs_outputs(&ctx->ac,
-			       ctx->main_function,
-			       outinfo->vs_output_param_offset,
-			       VARYING_SLOT_MAX, 0,
-			       &outinfo->param_exports);
+   struct radv_vs_output_info *outinfo;
+
+   switch (ctx->stage) {
+   case MESA_SHADER_FRAGMENT:
+   case MESA_SHADER_COMPUTE:
+   case MESA_SHADER_TESS_CTRL:
+   case MESA_SHADER_GEOMETRY:
+      return;
+   case MESA_SHADER_VERTEX:
+      if (ctx->args->options->key.vs_common_out.as_ls ||
+          ctx->args->options->key.vs_common_out.as_es)
+         return;
+      outinfo = &ctx->args->shader_info->vs.outinfo;
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      if (ctx->args->options->key.vs_common_out.as_es)
+         return;
+      outinfo = &ctx->args->shader_info->tes.outinfo;
+      break;
+   default:
+      unreachable("Unhandled shader type");
+   }
+
+   ac_optimize_vs_outputs(&ctx->ac, ctx->main_function, outinfo->vs_output_param_offset,
+                          VARYING_SLOT_MAX, 0, &outinfo->param_exports);
 }
 
 static void
 ac_setup_rings(struct radv_shader_context *ctx)
 {
-	if (ctx->args->options->chip_class <= GFX8 &&
-	    (ctx->stage == MESA_SHADER_GEOMETRY ||
-	     ctx->args->options->key.vs_common_out.as_es)) {
-		unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS
-								   : RING_ESGS_VS;
-		LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false);
-
-		ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac,
-						       ctx->ring_offsets,
-						       offset);
-	}
-
-	if (ctx->args->is_gs_copy_shader) {
-		ctx->gsvs_ring[0] =
-			ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
-					      LLVMConstInt(ctx->ac.i32,
-							   RING_GSVS_VS, false));
-	}
-
-	if (ctx->stage == MESA_SHADER_GEOMETRY) {
-		/* The conceptual layout of the GSVS ring is
-		 *   v0c0 .. vLv0 v0c1 .. vLc1 ..
-		 * but the real memory layout is swizzled across
-		 * threads:
-		 *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
-		 *   t16v0c0 ..
-		 * Override the buffer descriptor accordingly.
-		 */
-		LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
-		uint64_t stream_offset = 0;
-		unsigned num_records = ctx->ac.wave_size;
-		LLVMValueRef base_ring;
-
-		base_ring =
-			ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
-					      LLVMConstInt(ctx->ac.i32,
-							   RING_GSVS_GS, false));
-
-		for (unsigned stream = 0; stream < 4; stream++) {
-			unsigned num_components, stride;
-			LLVMValueRef ring, tmp;
-
-			num_components =
-				ctx->args->shader_info->gs.num_stream_output_components[stream];
-
-			if (!num_components)
-				continue;
-
-			stride = 4 * num_components * ctx->shader->info.gs.vertices_out;
-
-			/* Limit on the stride field for <= GFX7. */
-			assert(stride < (1 << 14));
-
-			ring = LLVMBuildBitCast(ctx->ac.builder,
-						base_ring, v2i64, "");
-			tmp = LLVMBuildExtractElement(ctx->ac.builder,
-						      ring, ctx->ac.i32_0, "");
-			tmp = LLVMBuildAdd(ctx->ac.builder, tmp,
-					   LLVMConstInt(ctx->ac.i64,
-							stream_offset, 0), "");
-			ring = LLVMBuildInsertElement(ctx->ac.builder,
-						      ring, tmp, ctx->ac.i32_0, "");
-
-			stream_offset += stride * ctx->ac.wave_size;
-
-			ring = LLVMBuildBitCast(ctx->ac.builder, ring,
-						ctx->ac.v4i32, "");
-
-			tmp = LLVMBuildExtractElement(ctx->ac.builder, ring,
-						      ctx->ac.i32_1, "");
-			tmp = LLVMBuildOr(ctx->ac.builder, tmp,
-					  LLVMConstInt(ctx->ac.i32,
-						       S_008F04_STRIDE(stride), false), "");
-			ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp,
-						      ctx->ac.i32_1, "");
-
-			ring = LLVMBuildInsertElement(ctx->ac.builder, ring,
-						      LLVMConstInt(ctx->ac.i32,
-								   num_records, false),
-						      LLVMConstInt(ctx->ac.i32, 2, false), "");
-
-			ctx->gsvs_ring[stream] = ring;
-		}
-	}
-
-	if (ctx->stage == MESA_SHADER_TESS_CTRL ||
-	    ctx->stage == MESA_SHADER_TESS_EVAL) {
-		ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false));
-		ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false));
-	}
+   if (ctx->args->options->chip_class <= GFX8 &&
+       (ctx->stage == MESA_SHADER_GEOMETRY || ctx->args->options->key.vs_common_out.as_es)) {
+      unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS;
+      LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false);
+
+      ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, offset);
+   }
+
+   if (ctx->args->is_gs_copy_shader) {
+      ctx->gsvs_ring[0] = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
+                                                LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false));
+   }
+
+   if (ctx->stage == MESA_SHADER_GEOMETRY) {
+      /* The conceptual layout of the GSVS ring is
+       *   v0c0 .. vLv0 v0c1 .. vLc1 ..
+       * but the real memory layout is swizzled across
+       * threads:
+       *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
+       *   t16v0c0 ..
+       * Override the buffer descriptor accordingly.
+       */
+      LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
+      uint64_t stream_offset = 0;
+      unsigned num_records = ctx->ac.wave_size;
+      LLVMValueRef base_ring;
+
+      base_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
+                                        LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false));
+
+      for (unsigned stream = 0; stream < 4; stream++) {
+         unsigned num_components, stride;
+         LLVMValueRef ring, tmp;
+
+         num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
+
+         if (!num_components)
+            continue;
+
+         stride = 4 * num_components * ctx->shader->info.gs.vertices_out;
+
+         /* Limit on the stride field for <= GFX7. */
+         assert(stride < (1 << 14));
+
+         ring = LLVMBuildBitCast(ctx->ac.builder, base_ring, v2i64, "");
+         tmp = LLVMBuildExtractElement(ctx->ac.builder, ring, ctx->ac.i32_0, "");
+         tmp = LLVMBuildAdd(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i64, stream_offset, 0), "");
+         ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp, ctx->ac.i32_0, "");
+
+         stream_offset += stride * ctx->ac.wave_size;
+
+         ring = LLVMBuildBitCast(ctx->ac.builder, ring, ctx->ac.v4i32, "");
+
+         tmp = LLVMBuildExtractElement(ctx->ac.builder, ring, ctx->ac.i32_1, "");
+         tmp = LLVMBuildOr(ctx->ac.builder, tmp,
+                           LLVMConstInt(ctx->ac.i32, S_008F04_STRIDE(stride), false), "");
+         ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp, ctx->ac.i32_1, "");
+
+         ring = LLVMBuildInsertElement(ctx->ac.builder, ring,
+                                       LLVMConstInt(ctx->ac.i32, num_records, false),
+                                       LLVMConstInt(ctx->ac.i32, 2, false), "");
+
+         ctx->gsvs_ring[stream] = ring;
+      }
+   }
+
+   if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) {
+      ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(
+         &ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false));
+      ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(
+         &ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false));
+   }
 }
 
 unsigned
-radv_nir_get_max_workgroup_size(enum chip_class chip_class,
-				gl_shader_stage stage,
-				const struct nir_shader *nir)
+radv_nir_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
+                                const struct nir_shader *nir)
 {
-	const unsigned backup_sizes[] = {chip_class >= GFX9 ? 128 : 64, 1, 1};
-	unsigned sizes[3];
-	for (unsigned i = 0; i < 3; i++)
-		sizes[i] = nir ? nir->info.cs.local_size[i] : backup_sizes[i];
-	return radv_get_max_workgroup_size(chip_class, stage, sizes);
+   const unsigned backup_sizes[] = {chip_class >= GFX9 ? 128 : 64, 1, 1};
+   unsigned sizes[3];
+   for (unsigned i = 0; i < 3; i++)
+      sizes[i] = nir ? nir->info.cs.local_size[i] : backup_sizes[i];
+   return radv_get_max_workgroup_size(chip_class, stage, sizes);
 }
 
 /* Fixup the HW not emitting the TCS regs if there are no HS threads. */
-static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx)
-{
-	LLVMValueRef count =
-		ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8);
-	LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
-	                                      ctx->ac.i32_0, "");
-	ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty,
-					       ac_get_arg(&ctx->ac, ctx->args->ac.vertex_id),
-					       ctx->abi.instance_id, "");
-	ctx->vs_rel_patch_id = LLVMBuildSelect(ctx->ac.builder, hs_empty,
-					   ac_get_arg(&ctx->ac, ctx->args->ac.tcs_rel_ids),
-					   ctx->vs_rel_patch_id,
-					   "");
-	ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty,
-						 ac_get_arg(&ctx->ac, ctx->args->ac.tcs_patch_id),
-						 ctx->abi.vertex_id, "");
-}
-
-static void prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged)
-{
-	if (merged) {
-		for(int i = 5; i >= 0; --i) {
-			ctx->gs_vtx_offset[i] =
-				ac_unpack_param(&ctx->ac,
-						ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i & ~1]),
-							   (i & 1) * 16, 16);
-		}
-
-		ctx->gs_wave_id = ac_unpack_param(&ctx->ac,
-						  ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info),
-						  16, 8);
-	} else {
-		for (int i = 0; i < 6; i++)
-			ctx->gs_vtx_offset[i] = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i]);
-		ctx->gs_wave_id = ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id);
-	}
+static void
+ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx)
+{
+   LLVMValueRef count =
+      ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8);
+   LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count, ctx->ac.i32_0, "");
+   ctx->abi.instance_id =
+      LLVMBuildSelect(ctx->ac.builder, hs_empty, ac_get_arg(&ctx->ac, ctx->args->ac.vertex_id),
+                      ctx->abi.instance_id, "");
+   ctx->vs_rel_patch_id =
+      LLVMBuildSelect(ctx->ac.builder, hs_empty, ac_get_arg(&ctx->ac, ctx->args->ac.tcs_rel_ids),
+                      ctx->vs_rel_patch_id, "");
+   ctx->abi.vertex_id =
+      LLVMBuildSelect(ctx->ac.builder, hs_empty, ac_get_arg(&ctx->ac, ctx->args->ac.tcs_patch_id),
+                      ctx->abi.vertex_id, "");
+}
+
+static void
+prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged)
+{
+   if (merged) {
+      for (int i = 5; i >= 0; --i) {
+         ctx->gs_vtx_offset[i] = ac_unpack_param(
+            &ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i & ~1]), (i & 1) * 16, 16);
+      }
+
+      ctx->gs_wave_id =
+         ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 16, 8);
+   } else {
+      for (int i = 0; i < 6; i++)
+         ctx->gs_vtx_offset[i] = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i]);
+      ctx->gs_wave_id = ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id);
+   }
 }
 
 /* Ensure that the esgs ring is declared.
@@ -3157,545 +2923,499 @@ static void prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged)
  * We declare it with 64KB alignment as a hint that the
  * pointer value will always be 0.
  */
-static void declare_esgs_ring(struct radv_shader_context *ctx)
-{
-	if (ctx->esgs_ring)
-		return;
-
-	assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
-
-	ctx->esgs_ring = LLVMAddGlobalInAddressSpace(
-		ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
-		"esgs_ring",
-		AC_ADDR_SPACE_LDS);
-	LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
-	LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
-}
-
-static
-LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
-                                       struct nir_shader *const *shaders,
-                                       int shader_count,
-                                       const struct radv_shader_args *args)
-{
-	struct radv_shader_context ctx = {0};
-	ctx.args = args;
-
-	enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT;
-
-	if (args->shader_info->float_controls_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) {
-		float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
-	}
-
-	ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
-			     args->options->family, args->options->info, float_mode,
-			     args->shader_info->wave_size,
-			     args->shader_info->ballot_bit_size);
-	ctx.context = ctx.ac.context;
-
-	ctx.max_workgroup_size = 0;
-	for (int i = 0; i < shader_count; ++i) {
-		ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
-		                              radv_nir_get_max_workgroup_size(args->options->chip_class,
-									      shaders[i]->info.stage,
-									      shaders[i]));
-	}
-
-	if (ctx.ac.chip_class >= GFX10) {
-		if (is_pre_gs_stage(shaders[0]->info.stage) &&
-		    args->options->key.vs_common_out.as_ngg) {
-			ctx.max_workgroup_size = 128;
-		}
-	}
-
-	create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2);
-
-	ctx.abi.inputs = &ctx.inputs[0];
-	ctx.abi.emit_outputs = handle_shader_outputs_post;
-	ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
-	ctx.abi.load_ubo = radv_load_ubo;
-	ctx.abi.load_ssbo = radv_load_ssbo;
-	ctx.abi.load_sampler_desc = radv_get_sampler_desc;
-	ctx.abi.load_resource = radv_load_resource;
-	ctx.abi.load_ring_tess_factors = load_ring_tess_factors;
-	ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip;
-	ctx.abi.load_ring_esgs = load_ring_esgs;
-	ctx.abi.clamp_shadow_reference = false;
-	ctx.abi.adjust_frag_coord_z = args->options->adjust_frag_coord_z;
-	ctx.abi.robust_buffer_access = args->options->robust_buffer_access;
-
-	bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) &&  args->options->key.vs_common_out.as_ngg;
-	if (shader_count >= 2 || is_ngg)
-		ac_init_exec_full_mask(&ctx.ac);
-
-	if (args->ac.vertex_id.used)
-		ctx.abi.vertex_id = ac_get_arg(&ctx.ac, args->ac.vertex_id);
-	if (args->ac.vs_rel_patch_id.used)
-		ctx.vs_rel_patch_id = ac_get_arg(&ctx.ac, args->ac.vs_rel_patch_id);
-	if (args->ac.instance_id.used)
-		ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id);
-
-	if (args->options->has_ls_vgpr_init_bug &&
-	    shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
-		ac_nir_fixup_ls_hs_input_vgprs(&ctx);
-
-	if (is_ngg) {
-		/* Declare scratch space base for streamout and vertex
-		 * compaction. Whether space is actually allocated is
-		 * determined during linking / PM4 creation.
-		 *
-		 * Add an extra dword per vertex to ensure an odd stride, which
-		 * avoids bank conflicts for SoA accesses.
-		 */
-		if (!args->options->key.vs_common_out.as_ngg_passthrough)
-			declare_esgs_ring(&ctx);
-
-		/* This is really only needed when streamout and / or vertex
-		 * compaction is enabled.
-		 */
-		if (args->shader_info->so.num_outputs) {
-			LLVMTypeRef asi32 = LLVMArrayType(ctx.ac.i32, 8);
-			ctx.gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx.ac.module,
-				asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
-			LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(asi32));
-			LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
-		}
-	}
-
-	for(int shader_idx = 0; shader_idx < shader_count; ++shader_idx) {
-		ctx.stage = shaders[shader_idx]->info.stage;
-		ctx.shader = shaders[shader_idx];
-		ctx.output_mask = 0;
-
-		if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY) {
-			for (int i = 0; i < 4; i++) {
-				ctx.gs_next_vertex[i] =
-					ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
-			}
-			if (args->options->key.vs_common_out.as_ngg) {
-				for (unsigned i = 0; i < 4; ++i) {
-					ctx.gs_curprim_verts[i] =
-						ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
-					ctx.gs_generated_prims[i] =
-						ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
-				}
-
-				unsigned scratch_size = 8;
-				if (args->shader_info->so.num_outputs)
-					scratch_size = 44;
-
-				LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, scratch_size);
-				ctx.gs_ngg_scratch =
-					LLVMAddGlobalInAddressSpace(ctx.ac.module,
-								    ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);
-				LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(ai32));
-				LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
-
-				ctx.gs_ngg_emit = LLVMAddGlobalInAddressSpace(ctx.ac.module,
-					LLVMArrayType(ctx.ac.i32, 0), "ngg_emit", AC_ADDR_SPACE_LDS);
-				LLVMSetLinkage(ctx.gs_ngg_emit, LLVMExternalLinkage);
-				LLVMSetAlignment(ctx.gs_ngg_emit, 4);
-			}
-
-			ctx.abi.emit_primitive = visit_end_primitive;
-		} else if (shaders[shader_idx]->info.stage == MESA_SHADER_TESS_EVAL) {
-			ctx.abi.load_tess_coord = load_tess_coord;
-		} else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) {
-			ctx.abi.load_base_vertex = radv_load_base_vertex;
-		} else if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT) {
-			ctx.abi.load_sample_position = load_sample_position;
-			ctx.abi.load_sample_mask_in = load_sample_mask_in;
-		}
-
-		if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX &&
-		    args->options->key.vs_common_out.as_ngg &&
-		    args->options->key.vs_common_out.export_prim_id) {
-			declare_esgs_ring(&ctx);
-		}
-
-		bool nested_barrier = false;
-
-		if (shader_idx) {
-			if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY &&
-			    args->options->key.vs_common_out.as_ngg) {
-				gfx10_ngg_gs_emit_prologue(&ctx);
-				nested_barrier = false;
-			} else {
-				nested_barrier = true;
-			}
-		}
-
-		if (nested_barrier) {
-			/* Execute a barrier before the second shader in
-			 * a merged shader.
-			 *
-			 * Execute the barrier inside the conditional block,
-			 * so that empty waves can jump directly to s_endpgm,
-			 * which will also signal the barrier.
-			 *
-			 * This is possible in gfx9, because an empty wave
-			 * for the second shader does not participate in
-			 * the epilogue. With NGG, empty waves may still
-			 * be required to export data (e.g. GS output vertices),
-			 * so we cannot let them exit early.
-			 *
-			 * If the shader is TCS and the TCS epilog is present
-			 * and contains a barrier, it will wait there and then
-			 * reach s_endpgm.
-			*/
-			ac_emit_barrier(&ctx.ac, ctx.stage);
-		}
-
-		nir_foreach_shader_out_variable(variable, shaders[shader_idx])
-			scan_shader_output_decl(&ctx, variable, shaders[shader_idx], shaders[shader_idx]->info.stage);
-
-		ac_setup_rings(&ctx);
-
-		LLVMBasicBlockRef merge_block = NULL;
-		if (shader_count >= 2 || is_ngg) {
-			LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
-			LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
-			merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
-
-			LLVMValueRef count =
-				ac_unpack_param(&ctx.ac,
-						ac_get_arg(&ctx.ac, args->ac.merged_wave_info),
-						8 * shader_idx, 8);
-			LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
-			LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT,
-			                                  thread_id, count, "");
-			LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
-
-			LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block);
-		}
-
-		if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT)
-			prepare_interp_optimize(&ctx, shaders[shader_idx]);
-		else if(shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX)
-			handle_vs_inputs(&ctx, shaders[shader_idx]);
-		else if(shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY)
-			prepare_gs_input_vgprs(&ctx, shader_count >= 2);
-
-		ac_nir_translate(&ctx.ac, &ctx.abi, &args->ac, shaders[shader_idx]);
-
-		if (shader_count >= 2 || is_ngg) {
-			LLVMBuildBr(ctx.ac.builder, merge_block);
-			LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block);
-		}
-
-		/* This needs to be outside the if wrapping the shader body, as sometimes
-		 * the HW generates waves with 0 es/vs threads. */
-		if (is_pre_gs_stage(shaders[shader_idx]->info.stage) &&
-		    args->options->key.vs_common_out.as_ngg &&
-		    shader_idx == shader_count - 1) {
-			handle_ngg_outputs_post_2(&ctx);
-		} else if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY &&
-			   args->options->key.vs_common_out.as_ngg) {
-			gfx10_ngg_gs_emit_epilogue_2(&ctx);
-		}
-	}
-
-	LLVMBuildRetVoid(ctx.ac.builder);
-
-	if (args->options->dump_preoptir) {
-		fprintf(stderr, "%s LLVM IR:\n\n",
-			radv_get_shader_name(args->shader_info,
-					     shaders[shader_count - 1]->info.stage));
-		ac_dump_module(ctx.ac.module);
-		fprintf(stderr, "\n");
-	}
-
-	ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
-
-	if (shader_count == 1)
-		ac_nir_eliminate_const_vs_outputs(&ctx);
-
-	if (args->options->dump_shader) {
-		args->shader_info->private_mem_vgprs =
-			ac_count_scratch_private_memory(ctx.main_function);
-	}
-
-	return ctx.ac.module;
-}
-
-static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
-{
-	unsigned *retval = (unsigned *)context;
-	LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
-	char *description = LLVMGetDiagInfoDescription(di);
-
-	if (severity == LLVMDSError) {
-		*retval = 1;
-		fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
-		        description);
-	}
-
-	LLVMDisposeMessage(description);
-}
-
-static unsigned radv_llvm_compile(LLVMModuleRef M,
-                                  char **pelf_buffer, size_t *pelf_size,
-                                  struct ac_llvm_compiler *ac_llvm)
-{
-	unsigned retval = 0;
-	LLVMContextRef llvm_ctx;
-
-	/* Setup Diagnostic Handler*/
-	llvm_ctx = LLVMGetModuleContext(M);
-
-	LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
-	                                &retval);
-
-	/* Compile IR*/
-	if (!radv_compile_to_elf(ac_llvm, M, pelf_buffer, pelf_size))
-		retval = 1;
-	return retval;
-}
-
-static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm,
-				   LLVMModuleRef llvm_module,
-				   struct radv_shader_binary **rbinary,
-				   gl_shader_stage stage,
-				   const char *name,
-				   const struct radv_nir_compiler_options *options)
-{
-	char *elf_buffer = NULL;
-	size_t elf_size = 0;
-	char *llvm_ir_string = NULL;
-
-	if (options->dump_shader) {
-		fprintf(stderr, "%s LLVM IR:\n\n", name);
-		ac_dump_module(llvm_module);
-		fprintf(stderr, "\n");
-	}
+static void
+declare_esgs_ring(struct radv_shader_context *ctx)
+{
+   if (ctx->esgs_ring)
+      return;
+
+   assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
+
+   ctx->esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
+                                                "esgs_ring", AC_ADDR_SPACE_LDS);
+   LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
+   LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
+}
+
+static LLVMModuleRef
+ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *const *shaders,
+                         int shader_count, const struct radv_shader_args *args)
+{
+   struct radv_shader_context ctx = {0};
+   ctx.args = args;
+
+   enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT;
+
+   if (args->shader_info->float_controls_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) {
+      float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
+   }
+
+   ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, args->options->family,
+                        args->options->info, float_mode, args->shader_info->wave_size,
+                        args->shader_info->ballot_bit_size);
+   ctx.context = ctx.ac.context;
+
+   ctx.max_workgroup_size = 0;
+   for (int i = 0; i < shader_count; ++i) {
+      ctx.max_workgroup_size = MAX2(
+         ctx.max_workgroup_size, radv_nir_get_max_workgroup_size(
+                                    args->options->chip_class, shaders[i]->info.stage, shaders[i]));
+   }
+
+   if (ctx.ac.chip_class >= GFX10) {
+      if (is_pre_gs_stage(shaders[0]->info.stage) && args->options->key.vs_common_out.as_ngg) {
+         ctx.max_workgroup_size = 128;
+      }
+   }
+
+   create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2);
+
+   ctx.abi.inputs = &ctx.inputs[0];
+   ctx.abi.emit_outputs = handle_shader_outputs_post;
+   ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
+   ctx.abi.load_ubo = radv_load_ubo;
+   ctx.abi.load_ssbo = radv_load_ssbo;
+   ctx.abi.load_sampler_desc = radv_get_sampler_desc;
+   ctx.abi.load_resource = radv_load_resource;
+   ctx.abi.load_ring_tess_factors = load_ring_tess_factors;
+   ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip;
+   ctx.abi.load_ring_esgs = load_ring_esgs;
+   ctx.abi.clamp_shadow_reference = false;
+   ctx.abi.adjust_frag_coord_z = args->options->adjust_frag_coord_z;
+   ctx.abi.robust_buffer_access = args->options->robust_buffer_access;
+
+   bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && args->options->key.vs_common_out.as_ngg;
+   if (shader_count >= 2 || is_ngg)
+      ac_init_exec_full_mask(&ctx.ac);
+
+   if (args->ac.vertex_id.used)
+      ctx.abi.vertex_id = ac_get_arg(&ctx.ac, args->ac.vertex_id);
+   if (args->ac.vs_rel_patch_id.used)
+      ctx.vs_rel_patch_id = ac_get_arg(&ctx.ac, args->ac.vs_rel_patch_id);
+   if (args->ac.instance_id.used)
+      ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id);
+
+   if (args->options->has_ls_vgpr_init_bug &&
+       shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
+      ac_nir_fixup_ls_hs_input_vgprs(&ctx);
+
+   if (is_ngg) {
+      /* Declare scratch space base for streamout and vertex
+       * compaction. Whether space is actually allocated is
+       * determined during linking / PM4 creation.
+       *
+       * Add an extra dword per vertex to ensure an odd stride, which
+       * avoids bank conflicts for SoA accesses.
+       */
+      if (!args->options->key.vs_common_out.as_ngg_passthrough)
+         declare_esgs_ring(&ctx);
+
+      /* This is really only needed when streamout and / or vertex
+       * compaction is enabled.
+       */
+      if (args->shader_info->so.num_outputs) {
+         LLVMTypeRef asi32 = LLVMArrayType(ctx.ac.i32, 8);
+         ctx.gs_ngg_scratch =
+            LLVMAddGlobalInAddressSpace(ctx.ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
+         LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(asi32));
+         LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
+      }
+   }
+
+   for (int shader_idx = 0; shader_idx < shader_count; ++shader_idx) {
+      ctx.stage = shaders[shader_idx]->info.stage;
+      ctx.shader = shaders[shader_idx];
+      ctx.output_mask = 0;
+
+      if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY) {
+         for (int i = 0; i < 4; i++) {
+            ctx.gs_next_vertex[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
+         }
+         if (args->options->key.vs_common_out.as_ngg) {
+            for (unsigned i = 0; i < 4; ++i) {
+               ctx.gs_curprim_verts[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
+               ctx.gs_generated_prims[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
+            }
+
+            unsigned scratch_size = 8;
+            if (args->shader_info->so.num_outputs)
+               scratch_size = 44;
+
+            LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, scratch_size);
+            ctx.gs_ngg_scratch =
+               LLVMAddGlobalInAddressSpace(ctx.ac.module, ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);
+            LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(ai32));
+            LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
+
+            ctx.gs_ngg_emit = LLVMAddGlobalInAddressSpace(
+               ctx.ac.module, LLVMArrayType(ctx.ac.i32, 0), "ngg_emit", AC_ADDR_SPACE_LDS);
+            LLVMSetLinkage(ctx.gs_ngg_emit, LLVMExternalLinkage);
+            LLVMSetAlignment(ctx.gs_ngg_emit, 4);
+         }
+
+         ctx.abi.emit_primitive = visit_end_primitive;
+      } else if (shaders[shader_idx]->info.stage == MESA_SHADER_TESS_EVAL) {
+         ctx.abi.load_tess_coord = load_tess_coord;
+      } else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) {
+         ctx.abi.load_base_vertex = radv_load_base_vertex;
+      } else if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT) {
+         ctx.abi.load_sample_position = load_sample_position;
+         ctx.abi.load_sample_mask_in = load_sample_mask_in;
+      }
+
+      if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX &&
+          args->options->key.vs_common_out.as_ngg &&
+          args->options->key.vs_common_out.export_prim_id) {
+         declare_esgs_ring(&ctx);
+      }
+
+      bool nested_barrier = false;
+
+      if (shader_idx) {
+         if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY &&
+             args->options->key.vs_common_out.as_ngg) {
+            gfx10_ngg_gs_emit_prologue(&ctx);
+            nested_barrier = false;
+         } else {
+            nested_barrier = true;
+         }
+      }
+
+      if (nested_barrier) {
+         /* Execute a barrier before the second shader in
+          * a merged shader.
+          *
+          * Execute the barrier inside the conditional block,
+          * so that empty waves can jump directly to s_endpgm,
+          * which will also signal the barrier.
+          *
+          * This is possible in gfx9, because an empty wave
+          * for the second shader does not participate in
+          * the epilogue. With NGG, empty waves may still
+          * be required to export data (e.g. GS output vertices),
+          * so we cannot let them exit early.
+          *
+          * If the shader is TCS and the TCS epilog is present
+          * and contains a barrier, it will wait there and then
+          * reach s_endpgm.
+          */
+         ac_emit_barrier(&ctx.ac, ctx.stage);
+      }
+
+      nir_foreach_shader_out_variable(variable, shaders[shader_idx]) scan_shader_output_decl(
+         &ctx, variable, shaders[shader_idx], shaders[shader_idx]->info.stage);
+
+      ac_setup_rings(&ctx);
+
+      LLVMBasicBlockRef merge_block = NULL;
+      if (shader_count >= 2 || is_ngg) {
+         LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
+         LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
+         merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
+
+         LLVMValueRef count = ac_unpack_param(
+            &ctx.ac, ac_get_arg(&ctx.ac, args->ac.merged_wave_info), 8 * shader_idx, 8);
+         LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
+         LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, thread_id, count, "");
+         LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
+
+         LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block);
+      }
+
+      if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT)
+         prepare_interp_optimize(&ctx, shaders[shader_idx]);
+      else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX)
+         handle_vs_inputs(&ctx, shaders[shader_idx]);
+      else if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY)
+         prepare_gs_input_vgprs(&ctx, shader_count >= 2);
+
+      ac_nir_translate(&ctx.ac, &ctx.abi, &args->ac, shaders[shader_idx]);
+
+      if (shader_count >= 2 || is_ngg) {
+         LLVMBuildBr(ctx.ac.builder, merge_block);
+         LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block);
+      }
+
+      /* This needs to be outside the if wrapping the shader body, as sometimes
+       * the HW generates waves with 0 es/vs threads. */
+      if (is_pre_gs_stage(shaders[shader_idx]->info.stage) &&
+          args->options->key.vs_common_out.as_ngg && shader_idx == shader_count - 1) {
+         handle_ngg_outputs_post_2(&ctx);
+      } else if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY &&
+                 args->options->key.vs_common_out.as_ngg) {
+         gfx10_ngg_gs_emit_epilogue_2(&ctx);
+      }
+   }
+
+   LLVMBuildRetVoid(ctx.ac.builder);
+
+   if (args->options->dump_preoptir) {
+      fprintf(stderr, "%s LLVM IR:\n\n",
+              radv_get_shader_name(args->shader_info, shaders[shader_count - 1]->info.stage));
+      ac_dump_module(ctx.ac.module);
+      fprintf(stderr, "\n");
+   }
+
+   ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
+
+   if (shader_count == 1)
+      ac_nir_eliminate_const_vs_outputs(&ctx);
+
+   if (args->options->dump_shader) {
+      args->shader_info->private_mem_vgprs = ac_count_scratch_private_memory(ctx.main_function);
+   }
+
+   return ctx.ac.module;
+}
+
+static void
+ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
+{
+   unsigned *retval = (unsigned *)context;
+   LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
+   char *description = LLVMGetDiagInfoDescription(di);
+
+   if (severity == LLVMDSError) {
+      *retval = 1;
+      fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", description);
+   }
 
-	if (options->record_ir) {
-		char *llvm_ir = LLVMPrintModuleToString(llvm_module);
-		llvm_ir_string = strdup(llvm_ir);
-		LLVMDisposeMessage(llvm_ir);
-	}
+   LLVMDisposeMessage(description);
+}
 
-	int v = radv_llvm_compile(llvm_module, &elf_buffer, &elf_size, ac_llvm);
-	if (v) {
-		fprintf(stderr, "compile failed\n");
-	}
+static unsigned
+radv_llvm_compile(LLVMModuleRef M, char **pelf_buffer, size_t *pelf_size,
+                  struct ac_llvm_compiler *ac_llvm)
+{
+   unsigned retval = 0;
+   LLVMContextRef llvm_ctx;
 
-	LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
-	LLVMDisposeModule(llvm_module);
-	LLVMContextDispose(ctx);
+   /* Setup Diagnostic Handler*/
+   llvm_ctx = LLVMGetModuleContext(M);
 
-	size_t llvm_ir_size = llvm_ir_string ? strlen(llvm_ir_string) : 0;
-	size_t alloc_size = sizeof(struct radv_shader_binary_rtld) + elf_size + llvm_ir_size + 1;
-	struct radv_shader_binary_rtld *rbin = calloc(1, alloc_size);
-	memcpy(rbin->data,  elf_buffer, elf_size);
-	if (llvm_ir_string)
-		memcpy(rbin->data + elf_size, llvm_ir_string, llvm_ir_size + 1);
+   LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler, &retval);
 
-	rbin->base.type = RADV_BINARY_TYPE_RTLD;
-	rbin->base.stage = stage;
-	rbin->base.total_size = alloc_size;
-	rbin->elf_size = elf_size;
-	rbin->llvm_ir_size = llvm_ir_size;
-	*rbinary = &rbin->base;
+   /* Compile IR*/
+   if (!radv_compile_to_elf(ac_llvm, M, pelf_buffer, pelf_size))
+      retval = 1;
+   return retval;
+}
 
-	free(llvm_ir_string);
-	free(elf_buffer);
+static void
+ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module,
+                       struct radv_shader_binary **rbinary, gl_shader_stage stage, const char *name,
+                       const struct radv_nir_compiler_options *options)
+{
+   char *elf_buffer = NULL;
+   size_t elf_size = 0;
+   char *llvm_ir_string = NULL;
+
+   if (options->dump_shader) {
+      fprintf(stderr, "%s LLVM IR:\n\n", name);
+      ac_dump_module(llvm_module);
+      fprintf(stderr, "\n");
+   }
+
+   if (options->record_ir) {
+      char *llvm_ir = LLVMPrintModuleToString(llvm_module);
+      llvm_ir_string = strdup(llvm_ir);
+      LLVMDisposeMessage(llvm_ir);
+   }
+
+   int v = radv_llvm_compile(llvm_module, &elf_buffer, &elf_size, ac_llvm);
+   if (v) {
+      fprintf(stderr, "compile failed\n");
+   }
+
+   LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
+   LLVMDisposeModule(llvm_module);
+   LLVMContextDispose(ctx);
+
+   size_t llvm_ir_size = llvm_ir_string ? strlen(llvm_ir_string) : 0;
+   size_t alloc_size = sizeof(struct radv_shader_binary_rtld) + elf_size + llvm_ir_size + 1;
+   struct radv_shader_binary_rtld *rbin = calloc(1, alloc_size);
+   memcpy(rbin->data, elf_buffer, elf_size);
+   if (llvm_ir_string)
+      memcpy(rbin->data + elf_size, llvm_ir_string, llvm_ir_size + 1);
+
+   rbin->base.type = RADV_BINARY_TYPE_RTLD;
+   rbin->base.stage = stage;
+   rbin->base.total_size = alloc_size;
+   rbin->elf_size = elf_size;
+   rbin->llvm_ir_size = llvm_ir_size;
+   *rbinary = &rbin->base;
+
+   free(llvm_ir_string);
+   free(elf_buffer);
 }
 
 static void
-radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
-			struct radv_shader_binary **rbinary,
-			const struct radv_shader_args *args,
-			struct nir_shader *const *nir,
-			int nir_count)
+radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, struct radv_shader_binary **rbinary,
+                        const struct radv_shader_args *args, struct nir_shader *const *nir,
+                        int nir_count)
 {
 
-	LLVMModuleRef llvm_module;
+   LLVMModuleRef llvm_module;
 
-	llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, args);
+   llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, args);
 
-	ac_compile_llvm_module(ac_llvm, llvm_module, rbinary,
-			       nir[nir_count - 1]->info.stage,
-			       radv_get_shader_name(args->shader_info,
-						    nir[nir_count - 1]->info.stage),
-			       args->options);
+   ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, nir[nir_count - 1]->info.stage,
+                          radv_get_shader_name(args->shader_info, nir[nir_count - 1]->info.stage),
+                          args->options);
 
-	/* Determine the ES type (VS or TES) for the GS on GFX9. */
-	if (args->options->chip_class >= GFX9) {
-		if (nir_count == 2 &&
-		    nir[1]->info.stage == MESA_SHADER_GEOMETRY) {
-			args->shader_info->gs.es_type = nir[0]->info.stage;
-		}
-	}
+   /* Determine the ES type (VS or TES) for the GS on GFX9. */
+   if (args->options->chip_class >= GFX9) {
+      if (nir_count == 2 && nir[1]->info.stage == MESA_SHADER_GEOMETRY) {
+         args->shader_info->gs.es_type = nir[0]->info.stage;
+      }
+   }
 }
 
 static void
 ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
 {
-	LLVMValueRef vtx_offset =
-		LLVMBuildMul(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->ac.vertex_id),
-			     LLVMConstInt(ctx->ac.i32, 4, false), "");
-	LLVMValueRef stream_id;
-
-	/* Fetch the vertex stream ID. */
-	if (!ctx->args->options->use_ngg_streamout &&
-	    ctx->args->shader_info->so.num_outputs) {
-		stream_id =
-			ac_unpack_param(&ctx->ac,
-					ac_get_arg(&ctx->ac,
-						   ctx->args->ac.streamout_config),
-					24, 2);
-	} else {
-		stream_id = ctx->ac.i32_0;
-	}
-
-	LLVMBasicBlockRef end_bb;
-	LLVMValueRef switch_inst;
-
-	end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context,
-					       ctx->main_function, "end");
-	switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4);
-
-	for (unsigned stream = 0; stream < 4; stream++) {
-		unsigned num_components =
-			ctx->args->shader_info->gs.num_stream_output_components[stream];
-		LLVMBasicBlockRef bb;
-		unsigned offset;
-
-		if (stream > 0 && !num_components)
-			continue;
-
-		if (stream > 0 && !ctx->args->shader_info->so.num_outputs)
-			continue;
-
-		bb = LLVMInsertBasicBlockInContext(ctx->ac.context, end_bb, "out");
-		LLVMAddCase(switch_inst, LLVMConstInt(ctx->ac.i32, stream, 0), bb);
-		LLVMPositionBuilderAtEnd(ctx->ac.builder, bb);
-
-		offset = 0;
-		for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
-			unsigned output_usage_mask =
-				ctx->args->shader_info->gs.output_usage_mask[i];
-			unsigned output_stream =
-				ctx->args->shader_info->gs.output_streams[i];
-			int length = util_last_bit(output_usage_mask);
-
-			if (!(ctx->output_mask & (1ull << i)) ||
-			    output_stream != stream)
-				continue;
-
-			for (unsigned j = 0; j < length; j++) {
-				LLVMValueRef value, soffset;
-
-				if (!(output_usage_mask & (1 << j)))
-					continue;
-
-				soffset = LLVMConstInt(ctx->ac.i32,
-						       offset *
-						       ctx->shader->info.gs.vertices_out * 16 * 4, false);
-
-				offset++;
-
-				value = ac_build_buffer_load(&ctx->ac,
-							     ctx->gsvs_ring[0],
-							     1, ctx->ac.i32_0,
-							     vtx_offset, soffset,
-							     0, ctx->ac.f32, ac_glc | ac_slc, true, false);
-
-				LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
-				if (ac_get_type_size(type) == 2) {
-					value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
-					value = LLVMBuildTrunc(ctx->ac.builder, value, ctx->ac.i16, "");
-				}
-
-				LLVMBuildStore(ctx->ac.builder,
-					       ac_to_float(&ctx->ac, value), ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
-			}
-		}
-
-		if (!ctx->args->options->use_ngg_streamout &&
-		    ctx->args->shader_info->so.num_outputs)
-			radv_emit_streamout(ctx, stream);
-
-		if (stream == 0) {
-			handle_vs_outputs_post(ctx, false, true,
-					       &ctx->args->shader_info->vs.outinfo);
-		}
-
-		LLVMBuildBr(ctx->ac.builder, end_bb);
-	}
-
-	LLVMPositionBuilderAtEnd(ctx->ac.builder, end_bb);
+   LLVMValueRef vtx_offset =
+      LLVMBuildMul(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->ac.vertex_id),
+                   LLVMConstInt(ctx->ac.i32, 4, false), "");
+   LLVMValueRef stream_id;
+
+   /* Fetch the vertex stream ID. */
+   if (!ctx->args->options->use_ngg_streamout && ctx->args->shader_info->so.num_outputs) {
+      stream_id =
+         ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.streamout_config), 24, 2);
+   } else {
+      stream_id = ctx->ac.i32_0;
+   }
+
+   LLVMBasicBlockRef end_bb;
+   LLVMValueRef switch_inst;
+
+   end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context, ctx->main_function, "end");
+   switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4);
+
+   for (unsigned stream = 0; stream < 4; stream++) {
+      unsigned num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
+      LLVMBasicBlockRef bb;
+      unsigned offset;
+
+      if (stream > 0 && !num_components)
+         continue;
+
+      if (stream > 0 && !ctx->args->shader_info->so.num_outputs)
+         continue;
+
+      bb = LLVMInsertBasicBlockInContext(ctx->ac.context, end_bb, "out");
+      LLVMAddCase(switch_inst, LLVMConstInt(ctx->ac.i32, stream, 0), bb);
+      LLVMPositionBuilderAtEnd(ctx->ac.builder, bb);
+
+      offset = 0;
+      for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+         unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+         unsigned output_stream = ctx->args->shader_info->gs.output_streams[i];
+         int length = util_last_bit(output_usage_mask);
+
+         if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
+            continue;
+
+         for (unsigned j = 0; j < length; j++) {
+            LLVMValueRef value, soffset;
+
+            if (!(output_usage_mask & (1 << j)))
+               continue;
+
+            soffset = LLVMConstInt(ctx->ac.i32, offset * ctx->shader->info.gs.vertices_out * 16 * 4,
+                                   false);
+
+            offset++;
+
+            value = ac_build_buffer_load(&ctx->ac, ctx->gsvs_ring[0], 1, ctx->ac.i32_0, vtx_offset,
+                                         soffset, 0, ctx->ac.f32, ac_glc | ac_slc, true, false);
+
+            LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
+            if (ac_get_type_size(type) == 2) {
+               value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
+               value = LLVMBuildTrunc(ctx->ac.builder, value, ctx->ac.i16, "");
+            }
+
+            LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, value),
+                           ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
+         }
+      }
+
+      if (!ctx->args->options->use_ngg_streamout && ctx->args->shader_info->so.num_outputs)
+         radv_emit_streamout(ctx, stream);
+
+      if (stream == 0) {
+         handle_vs_outputs_post(ctx, false, true, &ctx->args->shader_info->vs.outinfo);
+      }
+
+      LLVMBuildBr(ctx->ac.builder, end_bb);
+   }
+
+   LLVMPositionBuilderAtEnd(ctx->ac.builder, end_bb);
 }
 
 static void
-radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
-			    struct nir_shader *geom_shader,
-			    struct radv_shader_binary **rbinary,
-			    const struct radv_shader_args *args)
+radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader,
+                            struct radv_shader_binary **rbinary,
+                            const struct radv_shader_args *args)
 {
-	struct radv_shader_context ctx = {0};
-	ctx.args = args;
-
-	assert(args->is_gs_copy_shader);
+   struct radv_shader_context ctx = {0};
+   ctx.args = args;
 
-	ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
-			     args->options->family, args->options->info,
-			     AC_FLOAT_MODE_DEFAULT, 64, 64);
-	ctx.context = ctx.ac.context;
+   assert(args->is_gs_copy_shader);
 
-	ctx.stage = MESA_SHADER_VERTEX;
-	ctx.shader = geom_shader;
+   ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, args->options->family,
+                        args->options->info, AC_FLOAT_MODE_DEFAULT, 64, 64);
+   ctx.context = ctx.ac.context;
 
-	create_function(&ctx, MESA_SHADER_VERTEX, false);
+   ctx.stage = MESA_SHADER_VERTEX;
+   ctx.shader = geom_shader;
 
-	ac_setup_rings(&ctx);
+   create_function(&ctx, MESA_SHADER_VERTEX, false);
 
-	nir_foreach_shader_out_variable(variable, geom_shader) {
-		scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
-		ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader,
-					     variable, MESA_SHADER_VERTEX);
-	}
+   ac_setup_rings(&ctx);
 
-	ac_gs_copy_shader_emit(&ctx);
+   nir_foreach_shader_out_variable(variable, geom_shader)
+   {
+      scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
+      ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader, variable, MESA_SHADER_VERTEX);
+   }
 
-	LLVMBuildRetVoid(ctx.ac.builder);
+   ac_gs_copy_shader_emit(&ctx);
 
-	ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
+   LLVMBuildRetVoid(ctx.ac.builder);
 
-	ac_compile_llvm_module(ac_llvm, ctx.ac.module, rbinary,
-			       MESA_SHADER_VERTEX, "GS Copy Shader", args->options);
-	(*rbinary)->is_gs_copy_shader = true;
+   ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
 
+   ac_compile_llvm_module(ac_llvm, ctx.ac.module, rbinary, MESA_SHADER_VERTEX, "GS Copy Shader",
+                          args->options);
+   (*rbinary)->is_gs_copy_shader = true;
 }
 
 void
-llvm_compile_shader(struct radv_device *device,
-		    unsigned shader_count,
-		    struct nir_shader *const *shaders,
-		    struct radv_shader_binary **binary,
-		    struct radv_shader_args *args)
-{
-	enum ac_target_machine_options tm_options = 0;
-	struct ac_llvm_compiler ac_llvm;
-	bool thread_compiler;
-
-	tm_options |= AC_TM_SUPPORTS_SPILL;
-	if (args->options->check_ir)
-		tm_options |= AC_TM_CHECK_IR;
-
-	thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
-
-	radv_init_llvm_compiler(&ac_llvm, thread_compiler,
-				args->options->family, tm_options,
-				args->shader_info->wave_size);
-
-	if (args->is_gs_copy_shader) {
-		radv_compile_gs_copy_shader(&ac_llvm, *shaders, binary, args);
-	} else {
-		radv_compile_nir_shader(&ac_llvm, binary, args,
-					shaders, shader_count);
-	}
-
-	radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
+llvm_compile_shader(struct radv_device *device, unsigned shader_count,
+                    struct nir_shader *const *shaders, struct radv_shader_binary **binary,
+                    struct radv_shader_args *args)
+{
+   enum ac_target_machine_options tm_options = 0;
+   struct ac_llvm_compiler ac_llvm;
+   bool thread_compiler;
+
+   tm_options |= AC_TM_SUPPORTS_SPILL;
+   if (args->options->check_ir)
+      tm_options |= AC_TM_CHECK_IR;
+
+   thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
+
+   radv_init_llvm_compiler(&ac_llvm, thread_compiler, args->options->family, tm_options,
+                           args->shader_info->wave_size);
+
+   if (args->is_gs_copy_shader) {
+      radv_compile_gs_copy_shader(&ac_llvm, *shaders, binary, args);
+   } else {
+      radv_compile_nir_shader(&ac_llvm, binary, args, shaders, shader_count);
+   }
+
+   radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
 }
diff --git a/src/amd/vulkan/radv_pass.c b/src/amd/vulkan/radv_pass.c
index 3e1db72df0b..117bb3569be 100644
--- a/src/amd/vulkan/radv_pass.c
+++ b/src/amd/vulkan/radv_pass.c
@@ -29,312 +29,291 @@
 #include "vk_util.h"
 
 static void
-radv_render_pass_add_subpass_dep(struct radv_render_pass *pass,
-				 const VkSubpassDependency2 *dep)
+radv_render_pass_add_subpass_dep(struct radv_render_pass *pass, const VkSubpassDependency2 *dep)
 {
-	uint32_t src = dep->srcSubpass;
-	uint32_t dst = dep->dstSubpass;
-
-	/* Ignore subpass self-dependencies as they allow the app to call
-	 * vkCmdPipelineBarrier() inside the render pass and the driver should
-	 * only do the barrier when called, not when starting the render pass.
-	 */
-	if (src == dst)
-		return;
-
-	/* Accumulate all ingoing external dependencies to the first subpass. */
-	if (src == VK_SUBPASS_EXTERNAL)
-		dst = 0;
-
-	if (dst == VK_SUBPASS_EXTERNAL) {
-		if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
-			pass->end_barrier.src_stage_mask |= dep->srcStageMask;
-		pass->end_barrier.src_access_mask |= dep->srcAccessMask;
-		pass->end_barrier.dst_access_mask |= dep->dstAccessMask;
-	} else {
-		if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
-			pass->subpasses[dst].start_barrier.src_stage_mask |= dep->srcStageMask;
-		pass->subpasses[dst].start_barrier.src_access_mask |= dep->srcAccessMask;
-		pass->subpasses[dst].start_barrier.dst_access_mask |= dep->dstAccessMask;
-	}
+   uint32_t src = dep->srcSubpass;
+   uint32_t dst = dep->dstSubpass;
+
+   /* Ignore subpass self-dependencies as they allow the app to call
+    * vkCmdPipelineBarrier() inside the render pass and the driver should
+    * only do the barrier when called, not when starting the render pass.
+    */
+   if (src == dst)
+      return;
+
+   /* Accumulate all ingoing external dependencies to the first subpass. */
+   if (src == VK_SUBPASS_EXTERNAL)
+      dst = 0;
+
+   if (dst == VK_SUBPASS_EXTERNAL) {
+      if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+         pass->end_barrier.src_stage_mask |= dep->srcStageMask;
+      pass->end_barrier.src_access_mask |= dep->srcAccessMask;
+      pass->end_barrier.dst_access_mask |= dep->dstAccessMask;
+   } else {
+      if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+         pass->subpasses[dst].start_barrier.src_stage_mask |= dep->srcStageMask;
+      pass->subpasses[dst].start_barrier.src_access_mask |= dep->srcAccessMask;
+      pass->subpasses[dst].start_barrier.dst_access_mask |= dep->dstAccessMask;
+   }
 }
 
 static void
 radv_render_pass_add_implicit_deps(struct radv_render_pass *pass)
 {
-	/* From the Vulkan 1.0.39 spec:
-	*
-	*    If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
-	*    first subpass that uses an attachment, then an implicit subpass
-	*    dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
-	*    used in. The implicit subpass dependency only exists if there
-	*    exists an automatic layout transition away from initialLayout.
-	*    The subpass dependency operates as if defined with the
-	*    following parameters:
-	*
-	*    VkSubpassDependency implicitDependency = {
-	*        .srcSubpass = VK_SUBPASS_EXTERNAL;
-	*        .dstSubpass = firstSubpass; // First subpass attachment is used in
-	*        .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
-	*        .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-	*        .srcAccessMask = 0;
-	*        .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
-	*                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
-	*                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
-	*                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
-	*                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
-	*        .dependencyFlags = 0;
-	*    };
-	*
-	*    Similarly, if there is no subpass dependency from the last subpass
-	*    that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
-	*    subpass dependency exists from the last subpass it is used in to
-	*    VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
-	*    if there exists an automatic layout transition into finalLayout.
-	*    The subpass dependency operates as if defined with the following
-	*    parameters:
-	*
-	*    VkSubpassDependency implicitDependency = {
-	*        .srcSubpass = lastSubpass; // Last subpass attachment is used in
-	*        .dstSubpass = VK_SUBPASS_EXTERNAL;
-	*        .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-	*        .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
-	*        .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
-	*                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
-	*                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
-	*                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
-	*                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
-	*        .dstAccessMask = 0;
-	*        .dependencyFlags = 0;
-	*    };
-	*/
-	for (uint32_t i = 0; i < pass->subpass_count; i++) {
-		struct radv_subpass *subpass = &pass->subpasses[i];
-		bool add_ingoing_dep = false, add_outgoing_dep = false;
-
-		for (uint32_t j = 0; j < subpass->attachment_count; j++) {
-			struct radv_subpass_attachment *subpass_att =
-				&subpass->attachments[j];
-			if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
-				continue;
-
-			struct radv_render_pass_attachment *pass_att =
-				&pass->attachments[subpass_att->attachment];
-			uint32_t initial_layout = pass_att->initial_layout;
-			uint32_t stencil_initial_layout = pass_att->stencil_initial_layout;
-			uint32_t final_layout = pass_att->final_layout;
-			uint32_t stencil_final_layout = pass_att->stencil_final_layout;
-
-			/* The implicit subpass dependency only exists if
-			 * there exists an automatic layout transition away
-			 * from initialLayout.
-			 */
-			if (pass_att->first_subpass_idx == i &&
-			    !subpass->has_ingoing_dep &&
-			    ((subpass_att->layout != initial_layout) ||
-			     (subpass_att->layout != stencil_initial_layout))) {
-				add_ingoing_dep = true;
-			}
-
-			/* The implicit subpass dependency only exists if
-			 * there exists an automatic layout transition into
-			 * finalLayout.
-			 */
-			if (pass_att->last_subpass_idx == i &&
-			    !subpass->has_outgoing_dep &&
-			    ((subpass_att->layout != final_layout) ||
-			     (subpass_att->layout != stencil_final_layout))) {
-				add_outgoing_dep = true;
-			}
-		}
-
-		if (add_ingoing_dep) {
-			const VkSubpassDependency2KHR implicit_ingoing_dep = {
-				.srcSubpass = VK_SUBPASS_EXTERNAL,
-				.dstSubpass = i, /* first subpass attachment is used in */
-				.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-				.dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
-				.srcAccessMask = 0,
-				.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
-						 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
-						 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
-						 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
-						 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
-				.dependencyFlags = 0,
-			};
-
-			radv_render_pass_add_subpass_dep(pass,
-							 &implicit_ingoing_dep);
-		}
-
-		if (add_outgoing_dep) {
-			const VkSubpassDependency2KHR implicit_outgoing_dep = {
-				.srcSubpass = i, /* last subpass attachment is used in */
-				.dstSubpass = VK_SUBPASS_EXTERNAL,
-				.srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
-				.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
-				.srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
-						 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
-						 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
-						 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
-						 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
-				.dstAccessMask = 0,
-				.dependencyFlags = 0,
-			};
-
-			radv_render_pass_add_subpass_dep(pass,
-							 &implicit_outgoing_dep);
-		}
-	}
+   /* From the Vulkan 1.0.39 spec:
+    *
+    *    If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
+    *    first subpass that uses an attachment, then an implicit subpass
+    *    dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
+    *    used in. The implicit subpass dependency only exists if there
+    *    exists an automatic layout transition away from initialLayout.
+    *    The subpass dependency operates as if defined with the
+    *    following parameters:
+    *
+    *    VkSubpassDependency implicitDependency = {
+    *        .srcSubpass = VK_SUBPASS_EXTERNAL;
+    *        .dstSubpass = firstSubpass; // First subpass attachment is used in
+    *        .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+    *        .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+    *        .srcAccessMask = 0;
+    *        .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+    *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+    *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+    *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+    *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+    *        .dependencyFlags = 0;
+    *    };
+    *
+    *    Similarly, if there is no subpass dependency from the last subpass
+    *    that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
+    *    subpass dependency exists from the last subpass it is used in to
+    *    VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
+    *    if there exists an automatic layout transition into finalLayout.
+    *    The subpass dependency operates as if defined with the following
+    *    parameters:
+    *
+    *    VkSubpassDependency implicitDependency = {
+    *        .srcSubpass = lastSubpass; // Last subpass attachment is used in
+    *        .dstSubpass = VK_SUBPASS_EXTERNAL;
+    *        .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+    *        .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+    *        .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+    *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+    *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+    *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+    *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+    *        .dstAccessMask = 0;
+    *        .dependencyFlags = 0;
+    *    };
+    */
+   for (uint32_t i = 0; i < pass->subpass_count; i++) {
+      struct radv_subpass *subpass = &pass->subpasses[i];
+      bool add_ingoing_dep = false, add_outgoing_dep = false;
+
+      for (uint32_t j = 0; j < subpass->attachment_count; j++) {
+         struct radv_subpass_attachment *subpass_att = &subpass->attachments[j];
+         if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+            continue;
+
+         struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
+         uint32_t initial_layout = pass_att->initial_layout;
+         uint32_t stencil_initial_layout = pass_att->stencil_initial_layout;
+         uint32_t final_layout = pass_att->final_layout;
+         uint32_t stencil_final_layout = pass_att->stencil_final_layout;
+
+         /* The implicit subpass dependency only exists if
+          * there exists an automatic layout transition away
+          * from initialLayout.
+          */
+         if (pass_att->first_subpass_idx == i && !subpass->has_ingoing_dep &&
+             ((subpass_att->layout != initial_layout) ||
+              (subpass_att->layout != stencil_initial_layout))) {
+            add_ingoing_dep = true;
+         }
+
+         /* The implicit subpass dependency only exists if
+          * there exists an automatic layout transition into
+          * finalLayout.
+          */
+         if (pass_att->last_subpass_idx == i && !subpass->has_outgoing_dep &&
+             ((subpass_att->layout != final_layout) ||
+              (subpass_att->layout != stencil_final_layout))) {
+            add_outgoing_dep = true;
+         }
+      }
+
+      if (add_ingoing_dep) {
+         const VkSubpassDependency2KHR implicit_ingoing_dep = {
+            .srcSubpass = VK_SUBPASS_EXTERNAL,
+            .dstSubpass = i, /* first subpass attachment is used in */
+            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+            .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+            .srcAccessMask = 0,
+            .dstAccessMask =
+               VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+               VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+               VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+            .dependencyFlags = 0,
+         };
+
+         radv_render_pass_add_subpass_dep(pass, &implicit_ingoing_dep);
+      }
+
+      if (add_outgoing_dep) {
+         const VkSubpassDependency2KHR implicit_outgoing_dep = {
+            .srcSubpass = i, /* last subpass attachment is used in */
+            .dstSubpass = VK_SUBPASS_EXTERNAL,
+            .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+            .srcAccessMask =
+               VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+               VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+               VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+            .dstAccessMask = 0,
+            .dependencyFlags = 0,
+         };
+
+         radv_render_pass_add_subpass_dep(pass, &implicit_outgoing_dep);
+      }
+   }
 }
 
 static void
 radv_render_pass_compile(struct radv_render_pass *pass)
 {
-	for (uint32_t i = 0; i < pass->subpass_count; i++) {
-		struct radv_subpass *subpass = &pass->subpasses[i];
-
-		for (uint32_t j = 0; j < subpass->attachment_count; j++) {
-			struct radv_subpass_attachment *subpass_att =
-				&subpass->attachments[j];
-			if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
-				continue;
-
-			struct radv_render_pass_attachment *pass_att =
-				&pass->attachments[subpass_att->attachment];
-
-			pass_att->first_subpass_idx = VK_SUBPASS_EXTERNAL;
-			pass_att->last_subpass_idx = VK_SUBPASS_EXTERNAL;
-		}
-	}
-
-	for (uint32_t i = 0; i < pass->subpass_count; i++) {
-		struct radv_subpass *subpass = &pass->subpasses[i];
-		uint32_t color_sample_count = 1, depth_sample_count = 1;
-
-		/* We don't allow depth_stencil_attachment to be non-NULL and
-		 * be VK_ATTACHMENT_UNUSED.  This way something can just check
-		 * for NULL and be guaranteed that they have a valid
-		 * attachment.
-		 */
-		if (subpass->depth_stencil_attachment &&
-		    subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED)
-			subpass->depth_stencil_attachment = NULL;
-
-		if (subpass->ds_resolve_attachment &&
-		    subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED)
-			subpass->ds_resolve_attachment = NULL;
-
-		for (uint32_t j = 0; j < subpass->attachment_count; j++) {
-			struct radv_subpass_attachment *subpass_att =
-				&subpass->attachments[j];
-			if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
-				continue;
-
-			struct radv_render_pass_attachment *pass_att =
-				&pass->attachments[subpass_att->attachment];
-
-			if (i < pass_att->first_subpass_idx)
-				pass_att->first_subpass_idx = i;
-			pass_att->last_subpass_idx = i;
-		}
-
-		subpass->has_color_att = false;
-		for (uint32_t j = 0; j < subpass->color_count; j++) {
-			struct radv_subpass_attachment *subpass_att =
-				&subpass->color_attachments[j];
-			if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
-				continue;
-
-			subpass->has_color_att = true;
-
-			struct radv_render_pass_attachment *pass_att =
-				&pass->attachments[subpass_att->attachment];
-
-			color_sample_count = pass_att->samples;
-		}
-
-		if (subpass->depth_stencil_attachment) {
-			const uint32_t a =
-				subpass->depth_stencil_attachment->attachment;
-			struct radv_render_pass_attachment *pass_att =
-				&pass->attachments[a];
-			depth_sample_count = pass_att->samples;
-		}
-
-		subpass->max_sample_count = MAX2(color_sample_count,
-						 depth_sample_count);
-		subpass->color_sample_count = color_sample_count;
-		subpass->depth_sample_count = depth_sample_count;
-
-		/* We have to handle resolve attachments specially */
-		subpass->has_color_resolve = false;
-		if (subpass->resolve_attachments) {
-			for (uint32_t j = 0; j < subpass->color_count; j++) {
-				struct radv_subpass_attachment *resolve_att =
-					&subpass->resolve_attachments[j];
-
-				if (resolve_att->attachment == VK_ATTACHMENT_UNUSED)
-					continue;
-
-				subpass->has_color_resolve = true;
-			}
-		}
-
-		for (uint32_t j = 0; j < subpass->input_count; ++j) {
-			if (subpass->input_attachments[j].attachment == VK_ATTACHMENT_UNUSED)
-				continue;
-
-			for (uint32_t k = 0; k < subpass->color_count; ++k) {
-				if (subpass->color_attachments[k].attachment == subpass->input_attachments[j].attachment) {
-					subpass->input_attachments[j].in_render_loop = true;
-					subpass->color_attachments[k].in_render_loop = true;
-				}
-			}
-
-			if (subpass->depth_stencil_attachment &&
-			    subpass->depth_stencil_attachment->attachment == subpass->input_attachments[j].attachment) {
-				subpass->input_attachments[j].in_render_loop = true;
-				subpass->depth_stencil_attachment->in_render_loop = true;
-			}
-		}
-	}
+   for (uint32_t i = 0; i < pass->subpass_count; i++) {
+      struct radv_subpass *subpass = &pass->subpasses[i];
+
+      for (uint32_t j = 0; j < subpass->attachment_count; j++) {
+         struct radv_subpass_attachment *subpass_att = &subpass->attachments[j];
+         if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+            continue;
+
+         struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
+
+         pass_att->first_subpass_idx = VK_SUBPASS_EXTERNAL;
+         pass_att->last_subpass_idx = VK_SUBPASS_EXTERNAL;
+      }
+   }
+
+   for (uint32_t i = 0; i < pass->subpass_count; i++) {
+      struct radv_subpass *subpass = &pass->subpasses[i];
+      uint32_t color_sample_count = 1, depth_sample_count = 1;
+
+      /* We don't allow depth_stencil_attachment to be non-NULL and
+       * be VK_ATTACHMENT_UNUSED.  This way something can just check
+       * for NULL and be guaranteed that they have a valid
+       * attachment.
+       */
+      if (subpass->depth_stencil_attachment &&
+          subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED)
+         subpass->depth_stencil_attachment = NULL;
+
+      if (subpass->ds_resolve_attachment &&
+          subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED)
+         subpass->ds_resolve_attachment = NULL;
+
+      for (uint32_t j = 0; j < subpass->attachment_count; j++) {
+         struct radv_subpass_attachment *subpass_att = &subpass->attachments[j];
+         if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+            continue;
+
+         struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
+
+         if (i < pass_att->first_subpass_idx)
+            pass_att->first_subpass_idx = i;
+         pass_att->last_subpass_idx = i;
+      }
+
+      subpass->has_color_att = false;
+      for (uint32_t j = 0; j < subpass->color_count; j++) {
+         struct radv_subpass_attachment *subpass_att = &subpass->color_attachments[j];
+         if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+            continue;
+
+         subpass->has_color_att = true;
+
+         struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
+
+         color_sample_count = pass_att->samples;
+      }
+
+      if (subpass->depth_stencil_attachment) {
+         const uint32_t a = subpass->depth_stencil_attachment->attachment;
+         struct radv_render_pass_attachment *pass_att = &pass->attachments[a];
+         depth_sample_count = pass_att->samples;
+      }
+
+      subpass->max_sample_count = MAX2(color_sample_count, depth_sample_count);
+      subpass->color_sample_count = color_sample_count;
+      subpass->depth_sample_count = depth_sample_count;
+
+      /* We have to handle resolve attachments specially */
+      subpass->has_color_resolve = false;
+      if (subpass->resolve_attachments) {
+         for (uint32_t j = 0; j < subpass->color_count; j++) {
+            struct radv_subpass_attachment *resolve_att = &subpass->resolve_attachments[j];
+
+            if (resolve_att->attachment == VK_ATTACHMENT_UNUSED)
+               continue;
+
+            subpass->has_color_resolve = true;
+         }
+      }
+
+      for (uint32_t j = 0; j < subpass->input_count; ++j) {
+         if (subpass->input_attachments[j].attachment == VK_ATTACHMENT_UNUSED)
+            continue;
+
+         for (uint32_t k = 0; k < subpass->color_count; ++k) {
+            if (subpass->color_attachments[k].attachment ==
+                subpass->input_attachments[j].attachment) {
+               subpass->input_attachments[j].in_render_loop = true;
+               subpass->color_attachments[k].in_render_loop = true;
+            }
+         }
+
+         if (subpass->depth_stencil_attachment && subpass->depth_stencil_attachment->attachment ==
+                                                     subpass->input_attachments[j].attachment) {
+            subpass->input_attachments[j].in_render_loop = true;
+            subpass->depth_stencil_attachment->in_render_loop = true;
+         }
+      }
+   }
 }
 
 static void
-radv_destroy_render_pass(struct radv_device *device,
-			 const VkAllocationCallbacks *pAllocator,
-			 struct radv_render_pass *pass)
+radv_destroy_render_pass(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+                         struct radv_render_pass *pass)
 {
-	vk_object_base_finish(&pass->base);
-	vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
-	vk_free2(&device->vk.alloc, pAllocator, pass);
+   vk_object_base_finish(&pass->base);
+   vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
+   vk_free2(&device->vk.alloc, pAllocator, pass);
 }
 
 static unsigned
 radv_num_subpass_attachments2(const VkSubpassDescription2 *desc)
 {
-	const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
-		vk_find_struct_const(desc->pNext,
-				     SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
-
-	return desc->inputAttachmentCount +
-	       desc->colorAttachmentCount +
-	       (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
-	       (desc->pDepthStencilAttachment != NULL) +
-	       (ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
+   const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
+      vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
+
+   return desc->inputAttachmentCount + desc->colorAttachmentCount +
+          (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
+          (desc->pDepthStencilAttachment != NULL) +
+          (ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
 }
 
 static bool
 vk_image_layout_depth_only(VkImageLayout layout)
 {
-	switch (layout) {
-	case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
-	case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
-		return true;
-	default:
-		return false;
-	}
+   switch (layout) {
+   case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
+   case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
+      return true;
+   default:
+      return false;
+   }
 }
 
 /* From the Vulkan Specification 1.2.166 - VkAttachmentReference2:
@@ -348,16 +327,15 @@ vk_image_layout_depth_only(VkImageLayout layout)
 static VkImageLayout
 stencil_ref_layout(const VkAttachmentReference2 *att_ref)
 {
-	if (!vk_image_layout_depth_only(att_ref->layout))
-		return att_ref->layout;
+   if (!vk_image_layout_depth_only(att_ref->layout))
+      return att_ref->layout;
 
-	const VkAttachmentReferenceStencilLayoutKHR *stencil_ref =
-		vk_find_struct_const(att_ref->pNext,
-				     ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
-	if (!stencil_ref)
-		return VK_IMAGE_LAYOUT_UNDEFINED;
+   const VkAttachmentReferenceStencilLayoutKHR *stencil_ref =
+      vk_find_struct_const(att_ref->pNext, ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
+   if (!stencil_ref)
+      return VK_IMAGE_LAYOUT_UNDEFINED;
 
-	return stencil_ref->stencilLayout;
+   return stencil_ref->stencilLayout;
 }
 
 /* From the Vulkan Specification 1.2.166 - VkAttachmentDescription2:
@@ -372,212 +350,197 @@ stencil_ref_layout(const VkAttachmentReference2 *att_ref)
 static VkImageLayout
 stencil_desc_layout(const VkAttachmentDescription2KHR *att_desc, bool final)
 {
-	const struct util_format_description *desc = vk_format_description(att_desc->format);
-	if (!util_format_has_stencil(desc))
-		return VK_IMAGE_LAYOUT_UNDEFINED;
-
-	const VkImageLayout main_layout =
-		final ? att_desc->finalLayout : att_desc->initialLayout;
-	if (!vk_image_layout_depth_only(main_layout))
-		return main_layout;
-
-	const VkAttachmentDescriptionStencilLayoutKHR *stencil_desc =
-		vk_find_struct_const(att_desc->pNext,
-				     ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR);
-	assert(stencil_desc);
-	return final ? stencil_desc->stencilFinalLayout : stencil_desc->stencilInitialLayout;
+   const struct util_format_description *desc = vk_format_description(att_desc->format);
+   if (!util_format_has_stencil(desc))
+      return VK_IMAGE_LAYOUT_UNDEFINED;
+
+   const VkImageLayout main_layout = final ? att_desc->finalLayout : att_desc->initialLayout;
+   if (!vk_image_layout_depth_only(main_layout))
+      return main_layout;
+
+   const VkAttachmentDescriptionStencilLayoutKHR *stencil_desc =
+      vk_find_struct_const(att_desc->pNext, ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR);
+   assert(stencil_desc);
+   return final ? stencil_desc->stencilFinalLayout : stencil_desc->stencilInitialLayout;
 }
 
-VkResult radv_CreateRenderPass2(
-    VkDevice                                    _device,
-    const VkRenderPassCreateInfo2*              pCreateInfo,
-    const VkAllocationCallbacks*                pAllocator,
-    VkRenderPass*                               pRenderPass)
+VkResult
+radv_CreateRenderPass2(VkDevice _device, const VkRenderPassCreateInfo2 *pCreateInfo,
+                       const VkAllocationCallbacks *pAllocator, VkRenderPass *pRenderPass)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_render_pass *pass;
-	size_t size;
-	size_t attachments_offset;
-
-	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
-
-	size = sizeof(*pass);
-	size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
-	attachments_offset = size;
-	size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
-
-	pass = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
-			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (pass == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	memset(pass, 0, size);
-
-	vk_object_base_init(&device->vk, &pass->base,
-			    VK_OBJECT_TYPE_RENDER_PASS);
-
-	pass->attachment_count = pCreateInfo->attachmentCount;
-	pass->subpass_count = pCreateInfo->subpassCount;
-	pass->attachments = (struct radv_render_pass_attachment *)((uint8_t *) pass + attachments_offset);
-
-	for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
-		struct radv_render_pass_attachment *att = &pass->attachments[i];
-
-		att->format = pCreateInfo->pAttachments[i].format;
-		att->samples = pCreateInfo->pAttachments[i].samples;
-		att->load_op = pCreateInfo->pAttachments[i].loadOp;
-		att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
-		att->initial_layout =  pCreateInfo->pAttachments[i].initialLayout;
-		att->final_layout =  pCreateInfo->pAttachments[i].finalLayout;
-		att->stencil_initial_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], false);
-	        att->stencil_final_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], true);
-		// att->store_op = pCreateInfo->pAttachments[i].storeOp;
-		// att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
-	}
-	uint32_t subpass_attachment_count = 0;
-	struct radv_subpass_attachment *p;
-	for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
-		subpass_attachment_count +=
-			radv_num_subpass_attachments2(&pCreateInfo->pSubpasses[i]);
-	}
-
-	if (subpass_attachment_count) {
-		pass->subpass_attachments =
-			vk_alloc2(&device->vk.alloc, pAllocator,
-				    subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8,
-				    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-		if (pass->subpass_attachments == NULL) {
-			radv_destroy_render_pass(device, pAllocator, pass);
-			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-		}
-	} else
-		pass->subpass_attachments = NULL;
-
-	p = pass->subpass_attachments;
-	for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
-		const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
-		struct radv_subpass *subpass = &pass->subpasses[i];
-
-		subpass->input_count = desc->inputAttachmentCount;
-		subpass->color_count = desc->colorAttachmentCount;
-		subpass->attachment_count = radv_num_subpass_attachments2(desc);
-		subpass->attachments = p;
-		subpass->view_mask = desc->viewMask;
-
-		if (desc->inputAttachmentCount > 0) {
-			subpass->input_attachments = p;
-			p += desc->inputAttachmentCount;
-
-			for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
-				subpass->input_attachments[j] = (struct radv_subpass_attachment) {
-					.attachment = desc->pInputAttachments[j].attachment,
-					.layout = desc->pInputAttachments[j].layout,
-					.stencil_layout = stencil_ref_layout(&desc->pInputAttachments[j]),
-				};
-			}
-		}
-
-		if (desc->colorAttachmentCount > 0) {
-			subpass->color_attachments = p;
-			p += desc->colorAttachmentCount;
-
-			for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
-				subpass->color_attachments[j] = (struct radv_subpass_attachment) {
-					.attachment = desc->pColorAttachments[j].attachment,
-					.layout = desc->pColorAttachments[j].layout,
-				};
-			}
-		}
-
-		if (desc->pResolveAttachments) {
-			subpass->resolve_attachments = p;
-			p += desc->colorAttachmentCount;
-
-			for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
-				subpass->resolve_attachments[j] = (struct radv_subpass_attachment) {
-					.attachment = desc->pResolveAttachments[j].attachment,
-					.layout = desc->pResolveAttachments[j].layout,
-				};
-			}
-		}
-
-		if (desc->pDepthStencilAttachment) {
-			subpass->depth_stencil_attachment = p++;
-
-			*subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
-				.attachment = desc->pDepthStencilAttachment->attachment,
-				.layout = desc->pDepthStencilAttachment->layout,
-				.stencil_layout = stencil_ref_layout(desc->pDepthStencilAttachment),
-			};
-		}
-
-		const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
-			vk_find_struct_const(desc->pNext,
-					     SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
-
-		if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) {
-			subpass->ds_resolve_attachment = p++;
-
-			*subpass->ds_resolve_attachment = (struct radv_subpass_attachment) {
-				.attachment =  ds_resolve->pDepthStencilResolveAttachment->attachment,
-				.layout =      ds_resolve->pDepthStencilResolveAttachment->layout,
-				.stencil_layout = stencil_ref_layout(ds_resolve->pDepthStencilResolveAttachment),
-			};
-
-			subpass->depth_resolve_mode = ds_resolve->depthResolveMode;
-			subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode;
-		}
-	}
-
-	for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
-		const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
-
-		radv_render_pass_add_subpass_dep(pass,
-						 &pCreateInfo->pDependencies[i]);
-
-		/* Determine if the subpass has explicit dependencies from/to
-		 * VK_SUBPASS_EXTERNAL.
-		 */
-		if (dep->srcSubpass == VK_SUBPASS_EXTERNAL &&
-		    dep->dstSubpass != VK_SUBPASS_EXTERNAL) {
-			pass->subpasses[dep->dstSubpass].has_ingoing_dep = true;
-		}
-
-		if (dep->dstSubpass == VK_SUBPASS_EXTERNAL &&
-		    dep->srcSubpass != VK_SUBPASS_EXTERNAL) {
-			pass->subpasses[dep->srcSubpass].has_outgoing_dep = true;
-		}
-	}
-
-	radv_render_pass_compile(pass);
-
-	radv_render_pass_add_implicit_deps(pass);
-
-	*pRenderPass = radv_render_pass_to_handle(pass);
-
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_render_pass *pass;
+   size_t size;
+   size_t attachments_offset;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
+
+   size = sizeof(*pass);
+   size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
+   attachments_offset = size;
+   size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
+
+   pass = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (pass == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   memset(pass, 0, size);
+
+   vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS);
+
+   pass->attachment_count = pCreateInfo->attachmentCount;
+   pass->subpass_count = pCreateInfo->subpassCount;
+   pass->attachments = (struct radv_render_pass_attachment *)((uint8_t *)pass + attachments_offset);
+
+   for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+      struct radv_render_pass_attachment *att = &pass->attachments[i];
+
+      att->format = pCreateInfo->pAttachments[i].format;
+      att->samples = pCreateInfo->pAttachments[i].samples;
+      att->load_op = pCreateInfo->pAttachments[i].loadOp;
+      att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
+      att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
+      att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
+      att->stencil_initial_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], false);
+      att->stencil_final_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], true);
+      // att->store_op = pCreateInfo->pAttachments[i].storeOp;
+      // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
+   }
+   uint32_t subpass_attachment_count = 0;
+   struct radv_subpass_attachment *p;
+   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+      subpass_attachment_count += radv_num_subpass_attachments2(&pCreateInfo->pSubpasses[i]);
+   }
+
+   if (subpass_attachment_count) {
+      pass->subpass_attachments =
+         vk_alloc2(&device->vk.alloc, pAllocator,
+                   subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8,
+                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+      if (pass->subpass_attachments == NULL) {
+         radv_destroy_render_pass(device, pAllocator, pass);
+         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+   } else
+      pass->subpass_attachments = NULL;
+
+   p = pass->subpass_attachments;
+   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
+      struct radv_subpass *subpass = &pass->subpasses[i];
+
+      subpass->input_count = desc->inputAttachmentCount;
+      subpass->color_count = desc->colorAttachmentCount;
+      subpass->attachment_count = radv_num_subpass_attachments2(desc);
+      subpass->attachments = p;
+      subpass->view_mask = desc->viewMask;
+
+      if (desc->inputAttachmentCount > 0) {
+         subpass->input_attachments = p;
+         p += desc->inputAttachmentCount;
+
+         for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
+            subpass->input_attachments[j] = (struct radv_subpass_attachment){
+               .attachment = desc->pInputAttachments[j].attachment,
+               .layout = desc->pInputAttachments[j].layout,
+               .stencil_layout = stencil_ref_layout(&desc->pInputAttachments[j]),
+            };
+         }
+      }
+
+      if (desc->colorAttachmentCount > 0) {
+         subpass->color_attachments = p;
+         p += desc->colorAttachmentCount;
+
+         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
+            subpass->color_attachments[j] = (struct radv_subpass_attachment){
+               .attachment = desc->pColorAttachments[j].attachment,
+               .layout = desc->pColorAttachments[j].layout,
+            };
+         }
+      }
+
+      if (desc->pResolveAttachments) {
+         subpass->resolve_attachments = p;
+         p += desc->colorAttachmentCount;
+
+         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
+            subpass->resolve_attachments[j] = (struct radv_subpass_attachment){
+               .attachment = desc->pResolveAttachments[j].attachment,
+               .layout = desc->pResolveAttachments[j].layout,
+            };
+         }
+      }
+
+      if (desc->pDepthStencilAttachment) {
+         subpass->depth_stencil_attachment = p++;
+
+         *subpass->depth_stencil_attachment = (struct radv_subpass_attachment){
+            .attachment = desc->pDepthStencilAttachment->attachment,
+            .layout = desc->pDepthStencilAttachment->layout,
+            .stencil_layout = stencil_ref_layout(desc->pDepthStencilAttachment),
+         };
+      }
+
+      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
+         vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
+
+      if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) {
+         subpass->ds_resolve_attachment = p++;
+
+         *subpass->ds_resolve_attachment = (struct radv_subpass_attachment){
+            .attachment = ds_resolve->pDepthStencilResolveAttachment->attachment,
+            .layout = ds_resolve->pDepthStencilResolveAttachment->layout,
+            .stencil_layout = stencil_ref_layout(ds_resolve->pDepthStencilResolveAttachment),
+         };
+
+         subpass->depth_resolve_mode = ds_resolve->depthResolveMode;
+         subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode;
+      }
+   }
+
+   for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
+      const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
+
+      radv_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
+
+      /* Determine if the subpass has explicit dependencies from/to
+       * VK_SUBPASS_EXTERNAL.
+       */
+      if (dep->srcSubpass == VK_SUBPASS_EXTERNAL && dep->dstSubpass != VK_SUBPASS_EXTERNAL) {
+         pass->subpasses[dep->dstSubpass].has_ingoing_dep = true;
+      }
+
+      if (dep->dstSubpass == VK_SUBPASS_EXTERNAL && dep->srcSubpass != VK_SUBPASS_EXTERNAL) {
+         pass->subpasses[dep->srcSubpass].has_outgoing_dep = true;
+      }
+   }
+
+   radv_render_pass_compile(pass);
+
+   radv_render_pass_add_implicit_deps(pass);
+
+   *pRenderPass = radv_render_pass_to_handle(pass);
+
+   return VK_SUCCESS;
 }
 
-void radv_DestroyRenderPass(
-	VkDevice                                    _device,
-	VkRenderPass                                _pass,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyRenderPass(VkDevice _device, VkRenderPass _pass,
+                       const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_render_pass, pass, _pass);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_render_pass, pass, _pass);
 
-	if (!_pass)
-		return;
+   if (!_pass)
+      return;
 
-	radv_destroy_render_pass(device, pAllocator, pass);
+   radv_destroy_render_pass(device, pAllocator, pass);
 }
 
-void radv_GetRenderAreaGranularity(
-    VkDevice                                    device,
-    VkRenderPass                                renderPass,
-    VkExtent2D*                                 pGranularity)
+void
+radv_GetRenderAreaGranularity(VkDevice device, VkRenderPass renderPass, VkExtent2D *pGranularity)
 {
-	pGranularity->width = 1;
-	pGranularity->height = 1;
+   pGranularity->width = 1;
+   pGranularity->height = 1;
 }
-
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index b47dc5ae4fe..71b2aaca605 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -25,559 +25,558 @@
  * IN THE SOFTWARE.
  */
 
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "nir/nir_xfb_info.h"
+#include "spirv/nir_spirv.h"
 #include "util/disk_cache.h"
 #include "util/mesa-sha1.h"
 #include "util/u_atomic.h"
+#include "radv_cs.h"
 #include "radv_debug.h"
 #include "radv_private.h"
-#include "radv_cs.h"
 #include "radv_shader.h"
-#include "nir/nir.h"
-#include "nir/nir_builder.h"
-#include "nir/nir_xfb_info.h"
-#include "spirv/nir_spirv.h"
 #include "vk_util.h"
 
-#include "sid.h"
+#include "util/debug.h"
 #include "ac_binary.h"
+#include "ac_exp_param.h"
 #include "ac_llvm_util.h"
 #include "ac_nir_to_llvm.h"
-#include "vk_format.h"
-#include "util/debug.h"
-#include "ac_exp_param.h"
 #include "ac_shader_util.h"
+#include "aco_interface.h"
+#include "sid.h"
+#include "vk_format.h"
 
 struct radv_blend_state {
-	uint32_t blend_enable_4bit;
-	uint32_t need_src_alpha;
+   uint32_t blend_enable_4bit;
+   uint32_t need_src_alpha;
 
-	uint32_t cb_color_control;
-	uint32_t cb_target_mask;
-	uint32_t cb_target_enabled_4bit;
-	uint32_t sx_mrt_blend_opt[8];
-	uint32_t cb_blend_control[8];
+   uint32_t cb_color_control;
+   uint32_t cb_target_mask;
+   uint32_t cb_target_enabled_4bit;
+   uint32_t sx_mrt_blend_opt[8];
+   uint32_t cb_blend_control[8];
 
-	uint32_t spi_shader_col_format;
-	uint32_t col_format_is_int8;
-	uint32_t col_format_is_int10;
-	uint32_t cb_shader_mask;
-	uint32_t db_alpha_to_mask;
+   uint32_t spi_shader_col_format;
+   uint32_t col_format_is_int8;
+   uint32_t col_format_is_int10;
+   uint32_t cb_shader_mask;
+   uint32_t db_alpha_to_mask;
 
-	uint32_t commutative_4bit;
+   uint32_t commutative_4bit;
 
-	bool single_cb_enable;
-	bool mrt0_is_dual_src;
+   bool single_cb_enable;
+   bool mrt0_is_dual_src;
 };
 
 struct radv_dsa_order_invariance {
-	/* Whether the final result in Z/S buffers is guaranteed to be
-	 * invariant under changes to the order in which fragments arrive.
-	 */
-	bool zs;
-
-	/* Whether the set of fragments that pass the combined Z/S test is
-	 * guaranteed to be invariant under changes to the order in which
-	 * fragments arrive.
-	 */
-	bool pass_set;
+   /* Whether the final result in Z/S buffers is guaranteed to be
+    * invariant under changes to the order in which fragments arrive.
+    */
+   bool zs;
+
+   /* Whether the set of fragments that pass the combined Z/S test is
+    * guaranteed to be invariant under changes to the order in which
+    * fragments arrive.
+    */
+   bool pass_set;
 };
 
 static const VkPipelineMultisampleStateCreateInfo *
 radv_pipeline_get_multisample_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable)
-		return pCreateInfo->pMultisampleState;
-	return NULL;
+   if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable)
+      return pCreateInfo->pMultisampleState;
+   return NULL;
 }
 
 static const VkPipelineTessellationStateCreateInfo *
 radv_pipeline_get_tessellation_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
-		if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
-		    pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
-			return pCreateInfo->pTessellationState;
-		}
-	}
-	return NULL;
+   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+      if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
+          pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
+         return pCreateInfo->pTessellationState;
+      }
+   }
+   return NULL;
 }
 
 static const VkPipelineDepthStencilStateCreateInfo *
 radv_pipeline_get_depth_stencil_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
 
-	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
-	    subpass->depth_stencil_attachment)
-		return pCreateInfo->pDepthStencilState;
-	return NULL;
+   if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
+       subpass->depth_stencil_attachment)
+      return pCreateInfo->pDepthStencilState;
+   return NULL;
 }
 
 static const VkPipelineColorBlendStateCreateInfo *
 radv_pipeline_get_color_blend_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
 
-	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
-	    subpass->has_color_att)
-		return pCreateInfo->pColorBlendState;
-	return NULL;
+   if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable && subpass->has_color_att)
+      return pCreateInfo->pColorBlendState;
+   return NULL;
 }
 
-bool radv_pipeline_has_ngg(const struct radv_pipeline *pipeline)
+bool
+radv_pipeline_has_ngg(const struct radv_pipeline *pipeline)
 {
-	struct radv_shader_variant *variant = NULL;
-	if (pipeline->shaders[MESA_SHADER_GEOMETRY])
-		variant = pipeline->shaders[MESA_SHADER_GEOMETRY];
-	else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
-		variant = pipeline->shaders[MESA_SHADER_TESS_EVAL];
-	else if (pipeline->shaders[MESA_SHADER_VERTEX])
-		variant = pipeline->shaders[MESA_SHADER_VERTEX];
-	else
-		return false;
-	return variant->info.is_ngg;
+   struct radv_shader_variant *variant = NULL;
+   if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+      variant = pipeline->shaders[MESA_SHADER_GEOMETRY];
+   else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+      variant = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+   else if (pipeline->shaders[MESA_SHADER_VERTEX])
+      variant = pipeline->shaders[MESA_SHADER_VERTEX];
+   else
+      return false;
+   return variant->info.is_ngg;
 }
 
-bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline)
+bool
+radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline)
 {
-	assert(radv_pipeline_has_ngg(pipeline));
-
-	struct radv_shader_variant *variant = NULL;
-	if (pipeline->shaders[MESA_SHADER_GEOMETRY])
-		variant = pipeline->shaders[MESA_SHADER_GEOMETRY];
-	else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
-		variant = pipeline->shaders[MESA_SHADER_TESS_EVAL];
-	else if (pipeline->shaders[MESA_SHADER_VERTEX])
-		variant = pipeline->shaders[MESA_SHADER_VERTEX];
-	else
-		return false;
-	return variant->info.is_ngg_passthrough;
+   assert(radv_pipeline_has_ngg(pipeline));
+
+   struct radv_shader_variant *variant = NULL;
+   if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+      variant = pipeline->shaders[MESA_SHADER_GEOMETRY];
+   else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+      variant = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+   else if (pipeline->shaders[MESA_SHADER_VERTEX])
+      variant = pipeline->shaders[MESA_SHADER_VERTEX];
+   else
+      return false;
+   return variant->info.is_ngg_passthrough;
 }
 
-bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline)
+bool
+radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline)
 {
-	if (!radv_pipeline_has_gs(pipeline))
-		return false;
-
-	/* The GS copy shader is required if the pipeline has GS on GFX6-GFX9.
-	 * On GFX10, it might be required in rare cases if it's not possible to
-	 * enable NGG.
-	 */
-	if (radv_pipeline_has_ngg(pipeline))
-		return false;
-
-	assert(pipeline->gs_copy_shader);
-	return true;
+   if (!radv_pipeline_has_gs(pipeline))
+      return false;
+
+   /* The GS copy shader is required if the pipeline has GS on GFX6-GFX9.
+    * On GFX10, it might be required in rare cases if it's not possible to
+    * enable NGG.
+    */
+   if (radv_pipeline_has_ngg(pipeline))
+      return false;
+
+   assert(pipeline->gs_copy_shader);
+   return true;
 }
 
 static void
-radv_pipeline_destroy(struct radv_device *device,
-                      struct radv_pipeline *pipeline,
-                      const VkAllocationCallbacks* allocator)
+radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
+                      const VkAllocationCallbacks *allocator)
 {
-	for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
-		if (pipeline->shaders[i])
-			radv_shader_variant_destroy(device, pipeline->shaders[i]);
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
+      if (pipeline->shaders[i])
+         radv_shader_variant_destroy(device, pipeline->shaders[i]);
 
-	if (pipeline->gs_copy_shader)
-		radv_shader_variant_destroy(device, pipeline->gs_copy_shader);
+   if (pipeline->gs_copy_shader)
+      radv_shader_variant_destroy(device, pipeline->gs_copy_shader);
 
-	if(pipeline->cs.buf)
-		free(pipeline->cs.buf);
+   if (pipeline->cs.buf)
+      free(pipeline->cs.buf);
 
-	vk_object_base_finish(&pipeline->base);
-	vk_free2(&device->vk.alloc, allocator, pipeline);
+   vk_object_base_finish(&pipeline->base);
+   vk_free2(&device->vk.alloc, allocator, pipeline);
 }
 
-void radv_DestroyPipeline(
-	VkDevice                                    _device,
-	VkPipeline                                  _pipeline,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline,
+                     const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
 
-	if (!_pipeline)
-		return;
+   if (!_pipeline)
+      return;
 
-	radv_pipeline_destroy(device, pipeline, pAllocator);
+   radv_pipeline_destroy(device, pipeline, pAllocator);
 }
 
-static uint32_t get_hash_flags(const struct radv_device *device, bool stats)
+static uint32_t
+get_hash_flags(const struct radv_device *device, bool stats)
 {
-	uint32_t hash_flags = 0;
-
-	if (device->instance->debug_flags & RADV_DEBUG_NO_NGG)
-		hash_flags |= RADV_HASH_SHADER_NO_NGG;
-	if (device->physical_device->cs_wave_size == 32)
-		hash_flags |= RADV_HASH_SHADER_CS_WAVE32;
-	if (device->physical_device->ps_wave_size == 32)
-		hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
-	if (device->physical_device->ge_wave_size == 32)
-		hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
-	if (device->physical_device->use_llvm)
-		hash_flags |= RADV_HASH_SHADER_LLVM;
-	if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
-		hash_flags |= RADV_HASH_SHADER_DISCARD_TO_DEMOTE;
-	if (device->instance->enable_mrt_output_nan_fixup)
-		hash_flags |= RADV_HASH_SHADER_MRT_NAN_FIXUP;
-	if (device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM)
-		hash_flags |= RADV_HASH_SHADER_INVARIANT_GEOM;
-	if (stats)
-		hash_flags |= RADV_HASH_SHADER_KEEP_STATISTICS;
-	if (device->force_vrs != RADV_FORCE_VRS_2x2)
-		hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x2;
-	if (device->force_vrs != RADV_FORCE_VRS_2x1)
-		hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x1;
-	if (device->force_vrs != RADV_FORCE_VRS_1x2)
-		hash_flags |= RADV_HASH_SHADER_FORCE_VRS_1x2;
-	return hash_flags;
+   uint32_t hash_flags = 0;
+
+   if (device->instance->debug_flags & RADV_DEBUG_NO_NGG)
+      hash_flags |= RADV_HASH_SHADER_NO_NGG;
+   if (device->physical_device->cs_wave_size == 32)
+      hash_flags |= RADV_HASH_SHADER_CS_WAVE32;
+   if (device->physical_device->ps_wave_size == 32)
+      hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
+   if (device->physical_device->ge_wave_size == 32)
+      hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
+   if (device->physical_device->use_llvm)
+      hash_flags |= RADV_HASH_SHADER_LLVM;
+   if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
+      hash_flags |= RADV_HASH_SHADER_DISCARD_TO_DEMOTE;
+   if (device->instance->enable_mrt_output_nan_fixup)
+      hash_flags |= RADV_HASH_SHADER_MRT_NAN_FIXUP;
+   if (device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM)
+      hash_flags |= RADV_HASH_SHADER_INVARIANT_GEOM;
+   if (stats)
+      hash_flags |= RADV_HASH_SHADER_KEEP_STATISTICS;
+   if (device->force_vrs != RADV_FORCE_VRS_2x2)
+      hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x2;
+   if (device->force_vrs != RADV_FORCE_VRS_2x1)
+      hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x1;
+   if (device->force_vrs != RADV_FORCE_VRS_1x2)
+      hash_flags |= RADV_HASH_SHADER_FORCE_VRS_1x2;
+   return hash_flags;
 }
 
 static void
-radv_pipeline_init_scratch(const struct radv_device *device,
-                           struct radv_pipeline *pipeline)
+radv_pipeline_init_scratch(const struct radv_device *device, struct radv_pipeline *pipeline)
 {
-	unsigned scratch_bytes_per_wave = 0;
-	unsigned max_waves = 0;
-	unsigned min_waves = 1;
-
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (pipeline->shaders[i] &&
-		    pipeline->shaders[i]->config.scratch_bytes_per_wave) {
-			unsigned max_stage_waves = device->scratch_waves;
-
-			scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave,
-			                              pipeline->shaders[i]->config.scratch_bytes_per_wave);
-
-			max_stage_waves = MIN2(max_stage_waves,
-			          4 * device->physical_device->rad_info.num_good_compute_units *
-			          (256 / pipeline->shaders[i]->config.num_vgprs));
-			max_waves = MAX2(max_waves, max_stage_waves);
-		}
-	}
-
-	if (pipeline->shaders[MESA_SHADER_COMPUTE]) {
-		unsigned group_size = pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] *
-		                      pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] *
-		                      pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2];
-		min_waves = MAX2(min_waves, round_up_u32(group_size, 64));
-	}
-
-	pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
-	pipeline->max_waves = max_waves;
+   unsigned scratch_bytes_per_wave = 0;
+   unsigned max_waves = 0;
+   unsigned min_waves = 1;
+
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (pipeline->shaders[i] && pipeline->shaders[i]->config.scratch_bytes_per_wave) {
+         unsigned max_stage_waves = device->scratch_waves;
+
+         scratch_bytes_per_wave =
+            MAX2(scratch_bytes_per_wave, pipeline->shaders[i]->config.scratch_bytes_per_wave);
+
+         max_stage_waves =
+            MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_good_compute_units *
+                                     (256 / pipeline->shaders[i]->config.num_vgprs));
+         max_waves = MAX2(max_waves, max_stage_waves);
+      }
+   }
+
+   if (pipeline->shaders[MESA_SHADER_COMPUTE]) {
+      unsigned group_size = pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] *
+                            pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] *
+                            pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2];
+      min_waves = MAX2(min_waves, round_up_u32(group_size, 64));
+   }
+
+   pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
+   pipeline->max_waves = max_waves;
 }
 
-static uint32_t si_translate_blend_logic_op(VkLogicOp op)
+static uint32_t
+si_translate_blend_logic_op(VkLogicOp op)
 {
-	switch (op) {
-	case VK_LOGIC_OP_CLEAR:
-		return V_028808_ROP3_CLEAR;
-	case VK_LOGIC_OP_AND:
-		return V_028808_ROP3_AND;
-	case VK_LOGIC_OP_AND_REVERSE:
-		return V_028808_ROP3_AND_REVERSE;
-	case VK_LOGIC_OP_COPY:
-		return V_028808_ROP3_COPY;
-	case VK_LOGIC_OP_AND_INVERTED:
-		return V_028808_ROP3_AND_INVERTED;
-	case VK_LOGIC_OP_NO_OP:
-		return V_028808_ROP3_NO_OP;
-	case VK_LOGIC_OP_XOR:
-		return V_028808_ROP3_XOR;
-	case VK_LOGIC_OP_OR:
-		return V_028808_ROP3_OR;
-	case VK_LOGIC_OP_NOR:
-		return V_028808_ROP3_NOR;
-	case VK_LOGIC_OP_EQUIVALENT:
-		return V_028808_ROP3_EQUIVALENT;
-	case VK_LOGIC_OP_INVERT:
-		return V_028808_ROP3_INVERT;
-	case VK_LOGIC_OP_OR_REVERSE:
-		return V_028808_ROP3_OR_REVERSE;
-	case VK_LOGIC_OP_COPY_INVERTED:
-		return V_028808_ROP3_COPY_INVERTED;
-	case VK_LOGIC_OP_OR_INVERTED:
-		return V_028808_ROP3_OR_INVERTED;
-	case VK_LOGIC_OP_NAND:
-		return V_028808_ROP3_NAND;
-	case VK_LOGIC_OP_SET:
-		return V_028808_ROP3_SET;
-	default:
-		unreachable("Unhandled logic op");
-	}
+   switch (op) {
+   case VK_LOGIC_OP_CLEAR:
+      return V_028808_ROP3_CLEAR;
+   case VK_LOGIC_OP_AND:
+      return V_028808_ROP3_AND;
+   case VK_LOGIC_OP_AND_REVERSE:
+      return V_028808_ROP3_AND_REVERSE;
+   case VK_LOGIC_OP_COPY:
+      return V_028808_ROP3_COPY;
+   case VK_LOGIC_OP_AND_INVERTED:
+      return V_028808_ROP3_AND_INVERTED;
+   case VK_LOGIC_OP_NO_OP:
+      return V_028808_ROP3_NO_OP;
+   case VK_LOGIC_OP_XOR:
+      return V_028808_ROP3_XOR;
+   case VK_LOGIC_OP_OR:
+      return V_028808_ROP3_OR;
+   case VK_LOGIC_OP_NOR:
+      return V_028808_ROP3_NOR;
+   case VK_LOGIC_OP_EQUIVALENT:
+      return V_028808_ROP3_EQUIVALENT;
+   case VK_LOGIC_OP_INVERT:
+      return V_028808_ROP3_INVERT;
+   case VK_LOGIC_OP_OR_REVERSE:
+      return V_028808_ROP3_OR_REVERSE;
+   case VK_LOGIC_OP_COPY_INVERTED:
+      return V_028808_ROP3_COPY_INVERTED;
+   case VK_LOGIC_OP_OR_INVERTED:
+      return V_028808_ROP3_OR_INVERTED;
+   case VK_LOGIC_OP_NAND:
+      return V_028808_ROP3_NAND;
+   case VK_LOGIC_OP_SET:
+      return V_028808_ROP3_SET;
+   default:
+      unreachable("Unhandled logic op");
+   }
 }
 
-
-static uint32_t si_translate_blend_function(VkBlendOp op)
+static uint32_t
+si_translate_blend_function(VkBlendOp op)
 {
-	switch (op) {
-	case VK_BLEND_OP_ADD:
-		return V_028780_COMB_DST_PLUS_SRC;
-	case VK_BLEND_OP_SUBTRACT:
-		return V_028780_COMB_SRC_MINUS_DST;
-	case VK_BLEND_OP_REVERSE_SUBTRACT:
-		return V_028780_COMB_DST_MINUS_SRC;
-	case VK_BLEND_OP_MIN:
-		return V_028780_COMB_MIN_DST_SRC;
-	case VK_BLEND_OP_MAX:
-		return V_028780_COMB_MAX_DST_SRC;
-	default:
-		return 0;
-	}
+   switch (op) {
+   case VK_BLEND_OP_ADD:
+      return V_028780_COMB_DST_PLUS_SRC;
+   case VK_BLEND_OP_SUBTRACT:
+      return V_028780_COMB_SRC_MINUS_DST;
+   case VK_BLEND_OP_REVERSE_SUBTRACT:
+      return V_028780_COMB_DST_MINUS_SRC;
+   case VK_BLEND_OP_MIN:
+      return V_028780_COMB_MIN_DST_SRC;
+   case VK_BLEND_OP_MAX:
+      return V_028780_COMB_MAX_DST_SRC;
+   default:
+      return 0;
+   }
 }
 
-static uint32_t si_translate_blend_factor(VkBlendFactor factor)
+static uint32_t
+si_translate_blend_factor(VkBlendFactor factor)
 {
-	switch (factor) {
-	case VK_BLEND_FACTOR_ZERO:
-		return V_028780_BLEND_ZERO;
-	case VK_BLEND_FACTOR_ONE:
-		return V_028780_BLEND_ONE;
-	case VK_BLEND_FACTOR_SRC_COLOR:
-		return V_028780_BLEND_SRC_COLOR;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
-		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
-	case VK_BLEND_FACTOR_DST_COLOR:
-		return V_028780_BLEND_DST_COLOR;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
-		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
-	case VK_BLEND_FACTOR_SRC_ALPHA:
-		return V_028780_BLEND_SRC_ALPHA;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
-		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
-	case VK_BLEND_FACTOR_DST_ALPHA:
-		return V_028780_BLEND_DST_ALPHA;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
-		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
-	case VK_BLEND_FACTOR_CONSTANT_COLOR:
-		return V_028780_BLEND_CONSTANT_COLOR;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
-		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
-	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
-		return V_028780_BLEND_CONSTANT_ALPHA;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
-		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
-	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
-		return V_028780_BLEND_SRC_ALPHA_SATURATE;
-	case VK_BLEND_FACTOR_SRC1_COLOR:
-		return V_028780_BLEND_SRC1_COLOR;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
-		return V_028780_BLEND_INV_SRC1_COLOR;
-	case VK_BLEND_FACTOR_SRC1_ALPHA:
-		return V_028780_BLEND_SRC1_ALPHA;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
-		return V_028780_BLEND_INV_SRC1_ALPHA;
-	default:
-		return 0;
-	}
+   switch (factor) {
+   case VK_BLEND_FACTOR_ZERO:
+      return V_028780_BLEND_ZERO;
+   case VK_BLEND_FACTOR_ONE:
+      return V_028780_BLEND_ONE;
+   case VK_BLEND_FACTOR_SRC_COLOR:
+      return V_028780_BLEND_SRC_COLOR;
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+      return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
+   case VK_BLEND_FACTOR_DST_COLOR:
+      return V_028780_BLEND_DST_COLOR;
+   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+      return V_028780_BLEND_ONE_MINUS_DST_COLOR;
+   case VK_BLEND_FACTOR_SRC_ALPHA:
+      return V_028780_BLEND_SRC_ALPHA;
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+      return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
+   case VK_BLEND_FACTOR_DST_ALPHA:
+      return V_028780_BLEND_DST_ALPHA;
+   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+      return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
+   case VK_BLEND_FACTOR_CONSTANT_COLOR:
+      return V_028780_BLEND_CONSTANT_COLOR;
+   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+      return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
+   case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+      return V_028780_BLEND_CONSTANT_ALPHA;
+   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+      return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
+   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+      return V_028780_BLEND_SRC_ALPHA_SATURATE;
+   case VK_BLEND_FACTOR_SRC1_COLOR:
+      return V_028780_BLEND_SRC1_COLOR;
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
+      return V_028780_BLEND_INV_SRC1_COLOR;
+   case VK_BLEND_FACTOR_SRC1_ALPHA:
+      return V_028780_BLEND_SRC1_ALPHA;
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
+      return V_028780_BLEND_INV_SRC1_ALPHA;
+   default:
+      return 0;
+   }
 }
 
-static uint32_t si_translate_blend_opt_function(VkBlendOp op)
+static uint32_t
+si_translate_blend_opt_function(VkBlendOp op)
 {
-	switch (op) {
-	case VK_BLEND_OP_ADD:
-		return V_028760_OPT_COMB_ADD;
-	case VK_BLEND_OP_SUBTRACT:
-		return V_028760_OPT_COMB_SUBTRACT;
-	case VK_BLEND_OP_REVERSE_SUBTRACT:
-		return V_028760_OPT_COMB_REVSUBTRACT;
-	case VK_BLEND_OP_MIN:
-		return V_028760_OPT_COMB_MIN;
-	case VK_BLEND_OP_MAX:
-		return V_028760_OPT_COMB_MAX;
-	default:
-		return V_028760_OPT_COMB_BLEND_DISABLED;
-	}
+   switch (op) {
+   case VK_BLEND_OP_ADD:
+      return V_028760_OPT_COMB_ADD;
+   case VK_BLEND_OP_SUBTRACT:
+      return V_028760_OPT_COMB_SUBTRACT;
+   case VK_BLEND_OP_REVERSE_SUBTRACT:
+      return V_028760_OPT_COMB_REVSUBTRACT;
+   case VK_BLEND_OP_MIN:
+      return V_028760_OPT_COMB_MIN;
+   case VK_BLEND_OP_MAX:
+      return V_028760_OPT_COMB_MAX;
+   default:
+      return V_028760_OPT_COMB_BLEND_DISABLED;
+   }
 }
 
-static uint32_t si_translate_blend_opt_factor(VkBlendFactor factor, bool is_alpha)
+static uint32_t
+si_translate_blend_opt_factor(VkBlendFactor factor, bool is_alpha)
 {
-	switch (factor) {
-	case VK_BLEND_FACTOR_ZERO:
-		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
-	case VK_BLEND_FACTOR_ONE:
-		return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
-	case VK_BLEND_FACTOR_SRC_COLOR:
-		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
-				: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
-		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
-				: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
-	case VK_BLEND_FACTOR_SRC_ALPHA:
-		return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
-		return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
-	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
-		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
-				: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
-	default:
-		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
-	}
+   switch (factor) {
+   case VK_BLEND_FACTOR_ZERO:
+      return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
+   case VK_BLEND_FACTOR_ONE:
+      return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
+   case VK_BLEND_FACTOR_SRC_COLOR:
+      return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
+                      : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+      return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
+                      : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
+   case VK_BLEND_FACTOR_SRC_ALPHA:
+      return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+      return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
+   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+      return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
+                      : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+   default:
+      return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+   }
 }
 
 /**
  * Get rid of DST in the blend factors by commuting the operands:
  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
  */
-static void si_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor,
-				VkBlendFactor *dst_factor, VkBlendFactor expected_dst,
-				VkBlendFactor replacement_src)
+static void
+si_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor,
+                    VkBlendFactor expected_dst, VkBlendFactor replacement_src)
 {
-	if (*src_factor == expected_dst &&
-	    *dst_factor == VK_BLEND_FACTOR_ZERO) {
-		*src_factor = VK_BLEND_FACTOR_ZERO;
-		*dst_factor = replacement_src;
-
-		/* Commuting the operands requires reversing subtractions. */
-		if (*func == VK_BLEND_OP_SUBTRACT)
-			*func = VK_BLEND_OP_REVERSE_SUBTRACT;
-		else if (*func == VK_BLEND_OP_REVERSE_SUBTRACT)
-			*func = VK_BLEND_OP_SUBTRACT;
-	}
+   if (*src_factor == expected_dst && *dst_factor == VK_BLEND_FACTOR_ZERO) {
+      *src_factor = VK_BLEND_FACTOR_ZERO;
+      *dst_factor = replacement_src;
+
+      /* Commuting the operands requires reversing subtractions. */
+      if (*func == VK_BLEND_OP_SUBTRACT)
+         *func = VK_BLEND_OP_REVERSE_SUBTRACT;
+      else if (*func == VK_BLEND_OP_REVERSE_SUBTRACT)
+         *func = VK_BLEND_OP_SUBTRACT;
+   }
 }
 
-static bool si_blend_factor_uses_dst(VkBlendFactor factor)
+static bool
+si_blend_factor_uses_dst(VkBlendFactor factor)
 {
-	return factor == VK_BLEND_FACTOR_DST_COLOR ||
-		factor == VK_BLEND_FACTOR_DST_ALPHA ||
-		factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
-		factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ||
-		factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
+   return factor == VK_BLEND_FACTOR_DST_COLOR || factor == VK_BLEND_FACTOR_DST_ALPHA ||
+          factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
+          factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ||
+          factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
 }
 
-static bool is_dual_src(VkBlendFactor factor)
+static bool
+is_dual_src(VkBlendFactor factor)
 {
-	switch (factor) {
-	case VK_BLEND_FACTOR_SRC1_COLOR:
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
-	case VK_BLEND_FACTOR_SRC1_ALPHA:
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
-		return true;
-	default:
-		return false;
-	}
+   switch (factor) {
+   case VK_BLEND_FACTOR_SRC1_COLOR:
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
+   case VK_BLEND_FACTOR_SRC1_ALPHA:
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
+      return true;
+   default:
+      return false;
+   }
 }
 
-static unsigned radv_choose_spi_color_format(const struct radv_device *device,
-					     VkFormat vk_format,
-					     bool blend_enable,
-					     bool blend_need_alpha)
+static unsigned
+radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format,
+                             bool blend_enable, bool blend_need_alpha)
 {
-	const struct util_format_description *desc = vk_format_description(vk_format);
-	bool use_rbplus = device->physical_device->rad_info.rbplus_allowed;
-	struct ac_spi_color_formats formats = {0};
-	unsigned format, ntype, swap;
-
-	format = radv_translate_colorformat(vk_format);
-	ntype = radv_translate_color_numformat(vk_format, desc,
-					       vk_format_get_first_non_void_channel(vk_format));
-	swap = radv_translate_colorswap(vk_format, false);
-
-	ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus,
-				    &formats);
-
-	if (blend_enable && blend_need_alpha)
-		return formats.blend_alpha;
-	else if(blend_need_alpha)
-		return formats.alpha;
-	else if(blend_enable)
-		return formats.blend;
-	else
-		return formats.normal;
+   const struct util_format_description *desc = vk_format_description(vk_format);
+   bool use_rbplus = device->physical_device->rad_info.rbplus_allowed;
+   struct ac_spi_color_formats formats = {0};
+   unsigned format, ntype, swap;
+
+   format = radv_translate_colorformat(vk_format);
+   ntype = radv_translate_color_numformat(vk_format, desc,
+                                          vk_format_get_first_non_void_channel(vk_format));
+   swap = radv_translate_colorswap(vk_format, false);
+
+   ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats);
+
+   if (blend_enable && blend_need_alpha)
+      return formats.blend_alpha;
+   else if (blend_need_alpha)
+      return formats.alpha;
+   else if (blend_enable)
+      return formats.blend;
+   else
+      return formats.normal;
 }
 
 static bool
 format_is_int8(VkFormat format)
 {
-	const struct util_format_description *desc = vk_format_description(format);
-	int channel =  vk_format_get_first_non_void_channel(format);
+   const struct util_format_description *desc = vk_format_description(format);
+   int channel = vk_format_get_first_non_void_channel(format);
 
-	return channel >= 0 && desc->channel[channel].pure_integer &&
-	       desc->channel[channel].size == 8;
+   return channel >= 0 && desc->channel[channel].pure_integer && desc->channel[channel].size == 8;
 }
 
 static bool
 format_is_int10(VkFormat format)
 {
-	const struct util_format_description *desc = vk_format_description(format);
-
-	if (desc->nr_channels != 4)
-		return false;
-	for (unsigned i = 0; i < 4; i++) {
-		if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
-			return true;
-	}
-	return false;
+   const struct util_format_description *desc = vk_format_description(format);
+
+   if (desc->nr_channels != 4)
+      return false;
+   for (unsigned i = 0; i < 4; i++) {
+      if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
+         return true;
+   }
+   return false;
 }
 
 static void
 radv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline,
-					const VkGraphicsPipelineCreateInfo *pCreateInfo,
-					struct radv_blend_state *blend)
+                                        const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                                        struct radv_blend_state *blend)
 {
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	unsigned col_format = 0, is_int8 = 0, is_int10 = 0;
-	unsigned num_targets;
-
-	for (unsigned i = 0; i < (blend->single_cb_enable ? 1 : subpass->color_count); ++i) {
-		unsigned cf;
-
-		if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED ||
-		    !(blend->cb_target_mask & (0xfu << (i * 4)))) {
-			cf = V_028714_SPI_SHADER_ZERO;
-		} else {
-			struct radv_render_pass_attachment *attachment = pass->attachments + subpass->color_attachments[i].attachment;
-			bool blend_enable =
-				blend->blend_enable_4bit & (0xfu << (i * 4));
-
-			cf = radv_choose_spi_color_format(pipeline->device,
-							  attachment->format,
-			                                  blend_enable,
-							  blend->need_src_alpha & (1 << i));
-
-			if (format_is_int8(attachment->format))
-				is_int8 |= 1 << i;
-			if (format_is_int10(attachment->format))
-				is_int10 |= 1 << i;
-		}
-
-		col_format |= cf << (4 * i);
-	}
-
-	if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) {
-		/* When a subpass doesn't have any color attachments, write the
-		 * alpha channel of MRT0 when alpha coverage is enabled because
-		 * the depth attachment needs it.
-		 */
-		col_format |= V_028714_SPI_SHADER_32_AR;
-	}
-
-	/* If the i-th target format is set, all previous target formats must
-	 * be non-zero to avoid hangs.
-	 */
-	num_targets = (util_last_bit(col_format) + 3) / 4;
-	for (unsigned i = 0; i < num_targets; i++) {
-		if (!(col_format & (0xfu << (i * 4)))) {
-			col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
-		}
-	}
-
-	/* The output for dual source blending should have the same format as
-	 * the first output.
-	 */
-	if (blend->mrt0_is_dual_src) {
-		assert(!(col_format >> 4));
-		col_format |= (col_format & 0xf) << 4;
-	}
-
-	blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
-	blend->spi_shader_col_format = col_format;
-	blend->col_format_is_int8 = is_int8;
-	blend->col_format_is_int10 = is_int10;
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+   unsigned col_format = 0, is_int8 = 0, is_int10 = 0;
+   unsigned num_targets;
+
+   for (unsigned i = 0; i < (blend->single_cb_enable ? 1 : subpass->color_count); ++i) {
+      unsigned cf;
+
+      if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED ||
+          !(blend->cb_target_mask & (0xfu << (i * 4)))) {
+         cf = V_028714_SPI_SHADER_ZERO;
+      } else {
+         struct radv_render_pass_attachment *attachment =
+            pass->attachments + subpass->color_attachments[i].attachment;
+         bool blend_enable = blend->blend_enable_4bit & (0xfu << (i * 4));
+
+         cf = radv_choose_spi_color_format(pipeline->device, attachment->format, blend_enable,
+                                           blend->need_src_alpha & (1 << i));
+
+         if (format_is_int8(attachment->format))
+            is_int8 |= 1 << i;
+         if (format_is_int10(attachment->format))
+            is_int10 |= 1 << i;
+      }
+
+      col_format |= cf << (4 * i);
+   }
+
+   if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) {
+      /* When a subpass doesn't have any color attachments, write the
+       * alpha channel of MRT0 when alpha coverage is enabled because
+       * the depth attachment needs it.
+       */
+      col_format |= V_028714_SPI_SHADER_32_AR;
+   }
+
+   /* If the i-th target format is set, all previous target formats must
+    * be non-zero to avoid hangs.
+    */
+   num_targets = (util_last_bit(col_format) + 3) / 4;
+   for (unsigned i = 0; i < num_targets; i++) {
+      if (!(col_format & (0xfu << (i * 4)))) {
+         col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
+      }
+   }
+
+   /* The output for dual source blending should have the same format as
+    * the first output.
+    */
+   if (blend->mrt0_is_dual_src) {
+      assert(!(col_format >> 4));
+      col_format |= (col_format & 0xf) << 4;
+   }
+
+   blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
+   blend->spi_shader_col_format = col_format;
+   blend->col_format_is_int8 = is_int8;
+   blend->col_format_is_int10 = is_int10;
 }
 
 /*
@@ -585,1965 +584,1874 @@ radv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline,
  * radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]) == i.
  */
 const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
-	VK_FORMAT_R32_SFLOAT,
-	VK_FORMAT_R32G32_SFLOAT,
-	VK_FORMAT_R8G8B8A8_UNORM,
-	VK_FORMAT_R16G16B16A16_UNORM,
-	VK_FORMAT_R16G16B16A16_SNORM,
-	VK_FORMAT_R16G16B16A16_UINT,
-	VK_FORMAT_R16G16B16A16_SINT,
-	VK_FORMAT_R32G32B32A32_SFLOAT,
-	VK_FORMAT_R8G8B8A8_UINT,
-	VK_FORMAT_R8G8B8A8_SINT,
-	VK_FORMAT_A2R10G10B10_UINT_PACK32,
-	VK_FORMAT_A2R10G10B10_SINT_PACK32,
+   VK_FORMAT_R32_SFLOAT,
+   VK_FORMAT_R32G32_SFLOAT,
+   VK_FORMAT_R8G8B8A8_UNORM,
+   VK_FORMAT_R16G16B16A16_UNORM,
+   VK_FORMAT_R16G16B16A16_SNORM,
+   VK_FORMAT_R16G16B16A16_UINT,
+   VK_FORMAT_R16G16B16A16_SINT,
+   VK_FORMAT_R32G32B32A32_SFLOAT,
+   VK_FORMAT_R8G8B8A8_UINT,
+   VK_FORMAT_R8G8B8A8_SINT,
+   VK_FORMAT_A2R10G10B10_UINT_PACK32,
+   VK_FORMAT_A2R10G10B10_SINT_PACK32,
 };
 
-unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format)
+unsigned
+radv_format_meta_fs_key(struct radv_device *device, VkFormat format)
 {
-	unsigned col_format = radv_choose_spi_color_format(device, format, false, false);
-	assert(col_format != V_028714_SPI_SHADER_32_AR);
-
-	bool is_int8 = format_is_int8(format);
-	bool is_int10 = format_is_int10(format);
-
-	if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8)
-		return 8;
-	else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8)
-		return 9;
-	else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10)
-		return 10;
-	else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10)
-		return 11;
-	else {
-		if (col_format >= V_028714_SPI_SHADER_32_AR)
-			--col_format; /* Skip V_028714_SPI_SHADER_32_AR  since there is no such VkFormat */
-
-		--col_format; /* Skip V_028714_SPI_SHADER_ZERO */
-		return col_format;
-	}
+   unsigned col_format = radv_choose_spi_color_format(device, format, false, false);
+   assert(col_format != V_028714_SPI_SHADER_32_AR);
+
+   bool is_int8 = format_is_int8(format);
+   bool is_int10 = format_is_int10(format);
+
+   if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8)
+      return 8;
+   else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8)
+      return 9;
+   else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10)
+      return 10;
+   else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10)
+      return 11;
+   else {
+      if (col_format >= V_028714_SPI_SHADER_32_AR)
+         --col_format; /* Skip V_028714_SPI_SHADER_32_AR  since there is no such VkFormat */
+
+      --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
+      return col_format;
+   }
 }
 
 static void
-radv_blend_check_commutativity(struct radv_blend_state *blend,
-			       VkBlendOp op, VkBlendFactor src,
-			       VkBlendFactor dst, unsigned chanmask)
+radv_blend_check_commutativity(struct radv_blend_state *blend, VkBlendOp op, VkBlendFactor src,
+                               VkBlendFactor dst, unsigned chanmask)
 {
-	/* Src factor is allowed when it does not depend on Dst. */
-	static const uint32_t src_allowed =
-		(1u << VK_BLEND_FACTOR_ONE) |
-		(1u << VK_BLEND_FACTOR_SRC_COLOR) |
-		(1u << VK_BLEND_FACTOR_SRC_ALPHA) |
-		(1u << VK_BLEND_FACTOR_SRC_ALPHA_SATURATE) |
-		(1u << VK_BLEND_FACTOR_CONSTANT_COLOR) |
-		(1u << VK_BLEND_FACTOR_CONSTANT_ALPHA) |
-		(1u << VK_BLEND_FACTOR_SRC1_COLOR) |
-		(1u << VK_BLEND_FACTOR_SRC1_ALPHA) |
-		(1u << VK_BLEND_FACTOR_ZERO) |
-		(1u << VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR) |
-		(1u << VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) |
-		(1u << VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR) |
-		(1u << VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA) |
-		(1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR) |
-		(1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA);
-
-	if (dst == VK_BLEND_FACTOR_ONE &&
-	    (src_allowed & (1u << src))) {
-		/* Addition is commutative, but floating point addition isn't
-		 * associative: subtle changes can be introduced via different
-		 * rounding. Be conservative, only enable for min and max.
-		 */
-		if (op == VK_BLEND_OP_MAX || op == VK_BLEND_OP_MIN)
-			blend->commutative_4bit |= chanmask;
-	}
+   /* Src factor is allowed when it does not depend on Dst. */
+   static const uint32_t src_allowed =
+      (1u << VK_BLEND_FACTOR_ONE) | (1u << VK_BLEND_FACTOR_SRC_COLOR) |
+      (1u << VK_BLEND_FACTOR_SRC_ALPHA) | (1u << VK_BLEND_FACTOR_SRC_ALPHA_SATURATE) |
+      (1u << VK_BLEND_FACTOR_CONSTANT_COLOR) | (1u << VK_BLEND_FACTOR_CONSTANT_ALPHA) |
+      (1u << VK_BLEND_FACTOR_SRC1_COLOR) | (1u << VK_BLEND_FACTOR_SRC1_ALPHA) |
+      (1u << VK_BLEND_FACTOR_ZERO) | (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR) |
+      (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) |
+      (1u << VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR) |
+      (1u << VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA) |
+      (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR) | (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA);
+
+   if (dst == VK_BLEND_FACTOR_ONE && (src_allowed & (1u << src))) {
+      /* Addition is commutative, but floating point addition isn't
+       * associative: subtle changes can be introduced via different
+       * rounding. Be conservative, only enable for min and max.
+       */
+      if (op == VK_BLEND_OP_MAX || op == VK_BLEND_OP_MIN)
+         blend->commutative_4bit |= chanmask;
+   }
 }
 
 static struct radv_blend_state
 radv_pipeline_init_blend_state(const struct radv_pipeline *pipeline,
-			       const VkGraphicsPipelineCreateInfo *pCreateInfo,
-			       const struct radv_graphics_pipeline_create_info *extra)
+                               const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                               const struct radv_graphics_pipeline_create_info *extra)
 {
-	const VkPipelineColorBlendStateCreateInfo *vkblend = radv_pipeline_get_color_blend_state(pCreateInfo);
-	const VkPipelineMultisampleStateCreateInfo *vkms = radv_pipeline_get_multisample_state(pCreateInfo);
-	struct radv_blend_state blend = {0};
-	unsigned mode = V_028808_CB_NORMAL;
-	int i;
-
-	if (extra && extra->custom_blend_mode) {
-		blend.single_cb_enable = true;
-		mode = extra->custom_blend_mode;
-	}
-
-	blend.cb_color_control = 0;
-	if (vkblend) {
-		if (vkblend->logicOpEnable)
-			blend.cb_color_control |= S_028808_ROP3(si_translate_blend_logic_op(vkblend->logicOp));
-		else
-			blend.cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY);
-	}
-
-	blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(3) |
-		S_028B70_ALPHA_TO_MASK_OFFSET1(1) |
-		S_028B70_ALPHA_TO_MASK_OFFSET2(0) |
-		S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
-		S_028B70_OFFSET_ROUND(1);
-
-	if (vkms && vkms->alphaToCoverageEnable) {
-		blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
-		blend.need_src_alpha |= 0x1;
-	}
-
-	blend.cb_target_mask = 0;
-	if (vkblend) {
-		for (i = 0; i < vkblend->attachmentCount; i++) {
-			const VkPipelineColorBlendAttachmentState *att = &vkblend->pAttachments[i];
-			unsigned blend_cntl = 0;
-			unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
-			VkBlendOp eqRGB = att->colorBlendOp;
-			VkBlendFactor srcRGB = att->srcColorBlendFactor;
-			VkBlendFactor dstRGB = att->dstColorBlendFactor;
-			VkBlendOp eqA = att->alphaBlendOp;
-			VkBlendFactor srcA = att->srcAlphaBlendFactor;
-			VkBlendFactor dstA = att->dstAlphaBlendFactor;
-
-			blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
-
-			if (!att->colorWriteMask)
-				continue;
-
-			/* Ignore other blend targets if dual-source blending
-			 * is enabled to prevent wrong behaviour.
-			 */
-			if (blend.mrt0_is_dual_src)
-				continue;
-
-			blend.cb_target_mask |= (unsigned)att->colorWriteMask << (4 * i);
-			blend.cb_target_enabled_4bit |= 0xfu << (4 * i);
-			if (!att->blendEnable) {
-				blend.cb_blend_control[i] = blend_cntl;
-				continue;
-			}
-
-			if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA))
-				if (i == 0)
-					blend.mrt0_is_dual_src = true;
-
-			if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
-				srcRGB = VK_BLEND_FACTOR_ONE;
-				dstRGB = VK_BLEND_FACTOR_ONE;
-			}
-			if (eqA == VK_BLEND_OP_MIN || eqA == VK_BLEND_OP_MAX) {
-				srcA = VK_BLEND_FACTOR_ONE;
-				dstA = VK_BLEND_FACTOR_ONE;
-			}
-
-			radv_blend_check_commutativity(&blend, eqRGB, srcRGB, dstRGB,
-						       0x7u << (4 * i));
-			radv_blend_check_commutativity(&blend, eqA, srcA, dstA,
-						       0x8u << (4 * i));
-
-			/* Blending optimizations for RB+.
-			 * These transformations don't change the behavior.
-			 *
-			 * First, get rid of DST in the blend factors:
-			 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
-			 */
-			si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
-					    VK_BLEND_FACTOR_DST_COLOR,
-					    VK_BLEND_FACTOR_SRC_COLOR);
-
-			si_blend_remove_dst(&eqA, &srcA, &dstA,
-					    VK_BLEND_FACTOR_DST_COLOR,
-					    VK_BLEND_FACTOR_SRC_COLOR);
-
-			si_blend_remove_dst(&eqA, &srcA, &dstA,
-					    VK_BLEND_FACTOR_DST_ALPHA,
-					    VK_BLEND_FACTOR_SRC_ALPHA);
-
-			/* Look up the ideal settings from tables. */
-			srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
-			dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
-			srcA_opt = si_translate_blend_opt_factor(srcA, true);
-			dstA_opt = si_translate_blend_opt_factor(dstA, true);
-
-			/* Handle interdependencies. */
-			if (si_blend_factor_uses_dst(srcRGB))
-				dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
-			if (si_blend_factor_uses_dst(srcA))
-				dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
-
-			if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE &&
-			    (dstRGB == VK_BLEND_FACTOR_ZERO ||
-			     dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
-			     dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE))
-				dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
-
-			/* Set the final value. */
-			blend.sx_mrt_blend_opt[i] =
-				S_028760_COLOR_SRC_OPT(srcRGB_opt) |
-				S_028760_COLOR_DST_OPT(dstRGB_opt) |
-				S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
-				S_028760_ALPHA_SRC_OPT(srcA_opt) |
-				S_028760_ALPHA_DST_OPT(dstA_opt) |
-				S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
-			blend_cntl |= S_028780_ENABLE(1);
-
-			blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
-			blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
-			blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
-			if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
-				blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
-				blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
-				blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
-				blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
-			}
-			blend.cb_blend_control[i] = blend_cntl;
-
-			blend.blend_enable_4bit |= 0xfu << (i * 4);
-
-			if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
-			    dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
-			    srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
-			    dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
-			    srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA ||
-			    dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
-				blend.need_src_alpha |= 1 << i;
-		}
-		for (i = vkblend->attachmentCount; i < 8; i++) {
-			blend.cb_blend_control[i] = 0;
-			blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
-		}
-	}
-
-	if (pipeline->device->physical_device->rad_info.has_rbplus) {
-		/* Disable RB+ blend optimizations for dual source blending. */
-		if (blend.mrt0_is_dual_src) {
-			for (i = 0; i < 8; i++) {
-				blend.sx_mrt_blend_opt[i] =
-					S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
-					S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
-			}
-		}
-
-		/* RB+ doesn't work with dual source blending, logic op and
-		 * RESOLVE.
-		 */
-		if (blend.mrt0_is_dual_src ||
-		    (vkblend && vkblend->logicOpEnable) ||
-		    mode == V_028808_CB_RESOLVE)
-			blend.cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1);
-	}
-
-	if (blend.cb_target_mask)
-		blend.cb_color_control |= S_028808_MODE(mode);
-	else
-		blend.cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);
-
-	radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo, &blend);
-	return blend;
+   const VkPipelineColorBlendStateCreateInfo *vkblend =
+      radv_pipeline_get_color_blend_state(pCreateInfo);
+   const VkPipelineMultisampleStateCreateInfo *vkms =
+      radv_pipeline_get_multisample_state(pCreateInfo);
+   struct radv_blend_state blend = {0};
+   unsigned mode = V_028808_CB_NORMAL;
+   int i;
+
+   if (extra && extra->custom_blend_mode) {
+      blend.single_cb_enable = true;
+      mode = extra->custom_blend_mode;
+   }
+
+   blend.cb_color_control = 0;
+   if (vkblend) {
+      if (vkblend->logicOpEnable)
+         blend.cb_color_control |= S_028808_ROP3(si_translate_blend_logic_op(vkblend->logicOp));
+      else
+         blend.cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY);
+   }
+
+   blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) |
+                            S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
+                            S_028B70_OFFSET_ROUND(1);
+
+   if (vkms && vkms->alphaToCoverageEnable) {
+      blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+      blend.need_src_alpha |= 0x1;
+   }
+
+   blend.cb_target_mask = 0;
+   if (vkblend) {
+      for (i = 0; i < vkblend->attachmentCount; i++) {
+         const VkPipelineColorBlendAttachmentState *att = &vkblend->pAttachments[i];
+         unsigned blend_cntl = 0;
+         unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
+         VkBlendOp eqRGB = att->colorBlendOp;
+         VkBlendFactor srcRGB = att->srcColorBlendFactor;
+         VkBlendFactor dstRGB = att->dstColorBlendFactor;
+         VkBlendOp eqA = att->alphaBlendOp;
+         VkBlendFactor srcA = att->srcAlphaBlendFactor;
+         VkBlendFactor dstA = att->dstAlphaBlendFactor;
+
+         blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+                                     S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+
+         if (!att->colorWriteMask)
+            continue;
+
+         /* Ignore other blend targets if dual-source blending
+          * is enabled to prevent wrong behaviour.
+          */
+         if (blend.mrt0_is_dual_src)
+            continue;
+
+         blend.cb_target_mask |= (unsigned)att->colorWriteMask << (4 * i);
+         blend.cb_target_enabled_4bit |= 0xfu << (4 * i);
+         if (!att->blendEnable) {
+            blend.cb_blend_control[i] = blend_cntl;
+            continue;
+         }
+
+         if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA))
+            if (i == 0)
+               blend.mrt0_is_dual_src = true;
+
+         if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
+            srcRGB = VK_BLEND_FACTOR_ONE;
+            dstRGB = VK_BLEND_FACTOR_ONE;
+         }
+         if (eqA == VK_BLEND_OP_MIN || eqA == VK_BLEND_OP_MAX) {
+            srcA = VK_BLEND_FACTOR_ONE;
+            dstA = VK_BLEND_FACTOR_ONE;
+         }
+
+         radv_blend_check_commutativity(&blend, eqRGB, srcRGB, dstRGB, 0x7u << (4 * i));
+         radv_blend_check_commutativity(&blend, eqA, srcA, dstA, 0x8u << (4 * i));
+
+         /* Blending optimizations for RB+.
+          * These transformations don't change the behavior.
+          *
+          * First, get rid of DST in the blend factors:
+          *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
+          */
+         si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, VK_BLEND_FACTOR_DST_COLOR,
+                             VK_BLEND_FACTOR_SRC_COLOR);
+
+         si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_COLOR,
+                             VK_BLEND_FACTOR_SRC_COLOR);
+
+         si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_ALPHA,
+                             VK_BLEND_FACTOR_SRC_ALPHA);
+
+         /* Look up the ideal settings from tables. */
+         srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
+         dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
+         srcA_opt = si_translate_blend_opt_factor(srcA, true);
+         dstA_opt = si_translate_blend_opt_factor(dstA, true);
+
+         /* Handle interdependencies. */
+         if (si_blend_factor_uses_dst(srcRGB))
+            dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+         if (si_blend_factor_uses_dst(srcA))
+            dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+
+         if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE &&
+             (dstRGB == VK_BLEND_FACTOR_ZERO || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
+              dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE))
+            dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+
+         /* Set the final value. */
+         blend.sx_mrt_blend_opt[i] =
+            S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) |
+            S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
+            S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) |
+            S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
+         blend_cntl |= S_028780_ENABLE(1);
+
+         blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
+         blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
+         blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
+         if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
+            blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
+            blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
+            blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
+            blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
+         }
+         blend.cb_blend_control[i] = blend_cntl;
+
+         blend.blend_enable_4bit |= 0xfu << (i * 4);
+
+         if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
+             srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
+             dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
+             srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA ||
+             dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
+            blend.need_src_alpha |= 1 << i;
+      }
+      for (i = vkblend->attachmentCount; i < 8; i++) {
+         blend.cb_blend_control[i] = 0;
+         blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+                                     S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+      }
+   }
+
+   if (pipeline->device->physical_device->rad_info.has_rbplus) {
+      /* Disable RB+ blend optimizations for dual source blending. */
+      if (blend.mrt0_is_dual_src) {
+         for (i = 0; i < 8; i++) {
+            blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
+                                        S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
+         }
+      }
+
+      /* RB+ doesn't work with dual source blending, logic op and
+       * RESOLVE.
+       */
+      if (blend.mrt0_is_dual_src || (vkblend && vkblend->logicOpEnable) ||
+          mode == V_028808_CB_RESOLVE)
+         blend.cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1);
+   }
+
+   if (blend.cb_target_mask)
+      blend.cb_color_control |= S_028808_MODE(mode);
+   else
+      blend.cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);
+
+   radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo, &blend);
+   return blend;
 }
 
-static uint32_t si_translate_fill(VkPolygonMode func)
+static uint32_t
+si_translate_fill(VkPolygonMode func)
 {
-	switch(func) {
-	case VK_POLYGON_MODE_FILL:
-		return V_028814_X_DRAW_TRIANGLES;
-	case VK_POLYGON_MODE_LINE:
-		return V_028814_X_DRAW_LINES;
-	case VK_POLYGON_MODE_POINT:
-		return V_028814_X_DRAW_POINTS;
-	default:
-		assert(0);
-		return V_028814_X_DRAW_POINTS;
-	}
+   switch (func) {
+   case VK_POLYGON_MODE_FILL:
+      return V_028814_X_DRAW_TRIANGLES;
+   case VK_POLYGON_MODE_LINE:
+      return V_028814_X_DRAW_LINES;
+   case VK_POLYGON_MODE_POINT:
+      return V_028814_X_DRAW_POINTS;
+   default:
+      assert(0);
+      return V_028814_X_DRAW_POINTS;
+   }
 }
 
-static uint8_t radv_pipeline_get_ps_iter_samples(const VkGraphicsPipelineCreateInfo *pCreateInfo)
+static uint8_t
+radv_pipeline_get_ps_iter_samples(const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
-	uint32_t ps_iter_samples = 1;
-	uint32_t num_samples;
-
-	/* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
-	 *
-	 * "If the VK_AMD_mixed_attachment_samples extension is enabled and the
-	 *  subpass uses color attachments, totalSamples is the number of
-	 *  samples of the color attachments. Otherwise, totalSamples is the
-	 *  value of VkPipelineMultisampleStateCreateInfo::rasterizationSamples
-	 *  specified at pipeline creation time."
-	 */
-	if (subpass->has_color_att) {
-		num_samples = subpass->color_sample_count;
-	} else {
-		num_samples = vkms->rasterizationSamples;
-	}
-
-	if (vkms->sampleShadingEnable) {
-		ps_iter_samples = ceilf(vkms->minSampleShading * num_samples);
-		ps_iter_samples = util_next_power_of_two(ps_iter_samples);
-	}
-	return ps_iter_samples;
+   const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
+   uint32_t ps_iter_samples = 1;
+   uint32_t num_samples;
+
+   /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
+    *
+    * "If the VK_AMD_mixed_attachment_samples extension is enabled and the
+    *  subpass uses color attachments, totalSamples is the number of
+    *  samples of the color attachments. Otherwise, totalSamples is the
+    *  value of VkPipelineMultisampleStateCreateInfo::rasterizationSamples
+    *  specified at pipeline creation time."
+    */
+   if (subpass->has_color_att) {
+      num_samples = subpass->color_sample_count;
+   } else {
+      num_samples = vkms->rasterizationSamples;
+   }
+
+   if (vkms->sampleShadingEnable) {
+      ps_iter_samples = ceilf(vkms->minSampleShading * num_samples);
+      ps_iter_samples = util_next_power_of_two(ps_iter_samples);
+   }
+   return ps_iter_samples;
 }
 
 static bool
 radv_is_depth_write_enabled(const VkPipelineDepthStencilStateCreateInfo *pCreateInfo)
 {
-	return pCreateInfo->depthTestEnable &&
-	       pCreateInfo->depthWriteEnable &&
-	       pCreateInfo->depthCompareOp != VK_COMPARE_OP_NEVER;
+   return pCreateInfo->depthTestEnable && pCreateInfo->depthWriteEnable &&
+          pCreateInfo->depthCompareOp != VK_COMPARE_OP_NEVER;
 }
 
 static bool
 radv_writes_stencil(const VkStencilOpState *state)
 {
-	return state->writeMask &&
-	       (state->failOp != VK_STENCIL_OP_KEEP ||
-		state->passOp != VK_STENCIL_OP_KEEP ||
-		state->depthFailOp != VK_STENCIL_OP_KEEP);
+   return state->writeMask &&
+          (state->failOp != VK_STENCIL_OP_KEEP || state->passOp != VK_STENCIL_OP_KEEP ||
+           state->depthFailOp != VK_STENCIL_OP_KEEP);
 }
 
 static bool
 radv_is_stencil_write_enabled(const VkPipelineDepthStencilStateCreateInfo *pCreateInfo)
 {
-	return pCreateInfo->stencilTestEnable &&
-	       (radv_writes_stencil(&pCreateInfo->front) ||
-		radv_writes_stencil(&pCreateInfo->back));
+   return pCreateInfo->stencilTestEnable &&
+          (radv_writes_stencil(&pCreateInfo->front) || radv_writes_stencil(&pCreateInfo->back));
 }
 
 static bool
 radv_is_ds_write_enabled(const VkPipelineDepthStencilStateCreateInfo *pCreateInfo)
 {
-	return radv_is_depth_write_enabled(pCreateInfo) ||
-	       radv_is_stencil_write_enabled(pCreateInfo);
+   return radv_is_depth_write_enabled(pCreateInfo) || radv_is_stencil_write_enabled(pCreateInfo);
 }
 
 static bool
 radv_order_invariant_stencil_op(VkStencilOp op)
 {
-	/* REPLACE is normally order invariant, except when the stencil
-	 * reference value is written by the fragment shader. Tracking this
-	 * interaction does not seem worth the effort, so be conservative.
-	 */
-	return op != VK_STENCIL_OP_INCREMENT_AND_CLAMP &&
-	       op != VK_STENCIL_OP_DECREMENT_AND_CLAMP &&
-	       op != VK_STENCIL_OP_REPLACE;
+   /* REPLACE is normally order invariant, except when the stencil
+    * reference value is written by the fragment shader. Tracking this
+    * interaction does not seem worth the effort, so be conservative.
+    */
+   return op != VK_STENCIL_OP_INCREMENT_AND_CLAMP && op != VK_STENCIL_OP_DECREMENT_AND_CLAMP &&
+          op != VK_STENCIL_OP_REPLACE;
 }
 
 static bool
 radv_order_invariant_stencil_state(const VkStencilOpState *state)
 {
-	/* Compute whether, assuming Z writes are disabled, this stencil state
-	 * is order invariant in the sense that the set of passing fragments as
-	 * well as the final stencil buffer result does not depend on the order
-	 * of fragments.
-	 */
-	return !state->writeMask ||
-	       /* The following assumes that Z writes are disabled. */
-	       (state->compareOp == VK_COMPARE_OP_ALWAYS &&
-		radv_order_invariant_stencil_op(state->passOp) &&
-		radv_order_invariant_stencil_op(state->depthFailOp)) ||
-	       (state->compareOp == VK_COMPARE_OP_NEVER &&
-		radv_order_invariant_stencil_op(state->failOp));
+   /* Compute whether, assuming Z writes are disabled, this stencil state
+    * is order invariant in the sense that the set of passing fragments as
+    * well as the final stencil buffer result does not depend on the order
+    * of fragments.
+    */
+   return !state->writeMask ||
+          /* The following assumes that Z writes are disabled. */
+          (state->compareOp == VK_COMPARE_OP_ALWAYS &&
+           radv_order_invariant_stencil_op(state->passOp) &&
+           radv_order_invariant_stencil_op(state->depthFailOp)) ||
+          (state->compareOp == VK_COMPARE_OP_NEVER &&
+           radv_order_invariant_stencil_op(state->failOp));
 }
 
 static bool
-radv_is_state_dynamic(const VkGraphicsPipelineCreateInfo *pCreateInfo,
-		      VkDynamicState state)
+radv_is_state_dynamic(const VkGraphicsPipelineCreateInfo *pCreateInfo, VkDynamicState state)
 {
-	if (pCreateInfo->pDynamicState) {
-		uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
-		for (uint32_t i = 0; i < count; i++) {
-			if (pCreateInfo->pDynamicState->pDynamicStates[i] == state)
-				return true;
-		}
-	}
-
-	return false;
+   if (pCreateInfo->pDynamicState) {
+      uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
+      for (uint32_t i = 0; i < count; i++) {
+         if (pCreateInfo->pDynamicState->pDynamicStates[i] == state)
+            return true;
+      }
+   }
+
+   return false;
 }
 
 static bool
 radv_pipeline_has_dynamic_ds_states(const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	VkDynamicState ds_states[] = {
-		VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
-		VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
-		VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
-		VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
-		VK_DYNAMIC_STATE_STENCIL_OP_EXT,
-	};
-
-	for (uint32_t i = 0; i < ARRAY_SIZE(ds_states); i++) {
-		if (radv_is_state_dynamic(pCreateInfo, ds_states[i]))
-			return true;
-	}
-
-	return false;
+   VkDynamicState ds_states[] = {
+      VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
+      VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,  VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
+      VK_DYNAMIC_STATE_STENCIL_OP_EXT,
+   };
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(ds_states); i++) {
+      if (radv_is_state_dynamic(pCreateInfo, ds_states[i]))
+         return true;
+   }
+
+   return false;
 }
 
 static bool
 radv_pipeline_out_of_order_rast(struct radv_pipeline *pipeline,
-				const struct radv_blend_state *blend,
-				const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                                const struct radv_blend_state *blend,
+                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	const VkPipelineDepthStencilStateCreateInfo *vkds = radv_pipeline_get_depth_stencil_state(pCreateInfo);
-	const VkPipelineColorBlendStateCreateInfo *vkblend = radv_pipeline_get_color_blend_state(pCreateInfo);
-	unsigned colormask = blend->cb_target_enabled_4bit;
-
-	if (!pipeline->device->physical_device->out_of_order_rast_allowed)
-		return false;
-
-	/* Be conservative if a logic operation is enabled with color buffers. */
-	if (colormask && vkblend && vkblend->logicOpEnable)
-		return false;
-
-	/* Be conservative if an extended dynamic depth/stencil state is
-	 * enabled because the driver can't update out-of-order rasterization
-	 * dynamically.
-	 */
-	if (radv_pipeline_has_dynamic_ds_states(pCreateInfo))
-		return false;
-
-	/* Default depth/stencil invariance when no attachment is bound. */
-	struct radv_dsa_order_invariance dsa_order_invariant = {
-		.zs = true, .pass_set = true
-	};
-
-	if (vkds) {
-		struct radv_render_pass_attachment *attachment =
-			pass->attachments + subpass->depth_stencil_attachment->attachment;
-		bool has_stencil = vk_format_has_stencil(attachment->format);
-		struct radv_dsa_order_invariance order_invariance[2];
-		struct radv_shader_variant *ps =
-			pipeline->shaders[MESA_SHADER_FRAGMENT];
-
-		/* Compute depth/stencil order invariance in order to know if
-		 * it's safe to enable out-of-order.
-		 */
-		bool zfunc_is_ordered =
-			vkds->depthCompareOp == VK_COMPARE_OP_NEVER ||
-			vkds->depthCompareOp == VK_COMPARE_OP_LESS ||
-			vkds->depthCompareOp == VK_COMPARE_OP_LESS_OR_EQUAL ||
-			vkds->depthCompareOp == VK_COMPARE_OP_GREATER ||
-			vkds->depthCompareOp == VK_COMPARE_OP_GREATER_OR_EQUAL;
-
-		bool nozwrite_and_order_invariant_stencil =
-			!radv_is_ds_write_enabled(vkds) ||
-			(!radv_is_depth_write_enabled(vkds) &&
-			 radv_order_invariant_stencil_state(&vkds->front) &&
-			 radv_order_invariant_stencil_state(&vkds->back));
-
-		order_invariance[1].zs =
-			nozwrite_and_order_invariant_stencil ||
-			(!radv_is_stencil_write_enabled(vkds) &&
-			 zfunc_is_ordered);
-		order_invariance[0].zs =
-			!radv_is_depth_write_enabled(vkds) || zfunc_is_ordered;
-
-		order_invariance[1].pass_set =
-			nozwrite_and_order_invariant_stencil ||
-			(!radv_is_stencil_write_enabled(vkds) &&
-			 (vkds->depthCompareOp == VK_COMPARE_OP_ALWAYS ||
-			  vkds->depthCompareOp == VK_COMPARE_OP_NEVER));
-		order_invariance[0].pass_set =
-			!radv_is_depth_write_enabled(vkds) ||
-			(vkds->depthCompareOp == VK_COMPARE_OP_ALWAYS ||
-			 vkds->depthCompareOp == VK_COMPARE_OP_NEVER);
-
-		dsa_order_invariant = order_invariance[has_stencil];
-		if (!dsa_order_invariant.zs)
-			return false;
-
-		/* The set of PS invocations is always order invariant,
-		 * except when early Z/S tests are requested.
-		 */
-		if (ps &&
-		    ps->info.ps.writes_memory &&
-		    ps->info.ps.early_fragment_test &&
-		    !dsa_order_invariant.pass_set)
-			return false;
-
-		/* Determine if out-of-order rasterization should be disabled
-		 * when occlusion queries are used.
-		 */
-		pipeline->graphics.disable_out_of_order_rast_for_occlusion =
-			!dsa_order_invariant.pass_set;
-	}
-
-	/* No color buffers are enabled for writing. */
-	if (!colormask)
-		return true;
-
-	unsigned blendmask = colormask & blend->blend_enable_4bit;
-
-	if (blendmask) {
-		/* Only commutative blending. */
-		if (blendmask & ~blend->commutative_4bit)
-			return false;
-
-		if (!dsa_order_invariant.pass_set)
-			return false;
-	}
-
-	if (colormask & ~blendmask)
-		return false;
-
-	return true;
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+   const VkPipelineDepthStencilStateCreateInfo *vkds =
+      radv_pipeline_get_depth_stencil_state(pCreateInfo);
+   const VkPipelineColorBlendStateCreateInfo *vkblend =
+      radv_pipeline_get_color_blend_state(pCreateInfo);
+   unsigned colormask = blend->cb_target_enabled_4bit;
+
+   if (!pipeline->device->physical_device->out_of_order_rast_allowed)
+      return false;
+
+   /* Be conservative if a logic operation is enabled with color buffers. */
+   if (colormask && vkblend && vkblend->logicOpEnable)
+      return false;
+
+   /* Be conservative if an extended dynamic depth/stencil state is
+    * enabled because the driver can't update out-of-order rasterization
+    * dynamically.
+    */
+   if (radv_pipeline_has_dynamic_ds_states(pCreateInfo))
+      return false;
+
+   /* Default depth/stencil invariance when no attachment is bound. */
+   struct radv_dsa_order_invariance dsa_order_invariant = {.zs = true, .pass_set = true};
+
+   if (vkds) {
+      struct radv_render_pass_attachment *attachment =
+         pass->attachments + subpass->depth_stencil_attachment->attachment;
+      bool has_stencil = vk_format_has_stencil(attachment->format);
+      struct radv_dsa_order_invariance order_invariance[2];
+      struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+
+      /* Compute depth/stencil order invariance in order to know if
+       * it's safe to enable out-of-order.
+       */
+      bool zfunc_is_ordered = vkds->depthCompareOp == VK_COMPARE_OP_NEVER ||
+                              vkds->depthCompareOp == VK_COMPARE_OP_LESS ||
+                              vkds->depthCompareOp == VK_COMPARE_OP_LESS_OR_EQUAL ||
+                              vkds->depthCompareOp == VK_COMPARE_OP_GREATER ||
+                              vkds->depthCompareOp == VK_COMPARE_OP_GREATER_OR_EQUAL;
+
+      bool nozwrite_and_order_invariant_stencil =
+         !radv_is_ds_write_enabled(vkds) ||
+         (!radv_is_depth_write_enabled(vkds) && radv_order_invariant_stencil_state(&vkds->front) &&
+          radv_order_invariant_stencil_state(&vkds->back));
+
+      order_invariance[1].zs = nozwrite_and_order_invariant_stencil ||
+                               (!radv_is_stencil_write_enabled(vkds) && zfunc_is_ordered);
+      order_invariance[0].zs = !radv_is_depth_write_enabled(vkds) || zfunc_is_ordered;
+
+      order_invariance[1].pass_set =
+         nozwrite_and_order_invariant_stencil ||
+         (!radv_is_stencil_write_enabled(vkds) && (vkds->depthCompareOp == VK_COMPARE_OP_ALWAYS ||
+                                                   vkds->depthCompareOp == VK_COMPARE_OP_NEVER));
+      order_invariance[0].pass_set =
+         !radv_is_depth_write_enabled(vkds) || (vkds->depthCompareOp == VK_COMPARE_OP_ALWAYS ||
+                                                vkds->depthCompareOp == VK_COMPARE_OP_NEVER);
+
+      dsa_order_invariant = order_invariance[has_stencil];
+      if (!dsa_order_invariant.zs)
+         return false;
+
+      /* The set of PS invocations is always order invariant,
+       * except when early Z/S tests are requested.
+       */
+      if (ps && ps->info.ps.writes_memory && ps->info.ps.early_fragment_test &&
+          !dsa_order_invariant.pass_set)
+         return false;
+
+      /* Determine if out-of-order rasterization should be disabled
+       * when occlusion queries are used.
+       */
+      pipeline->graphics.disable_out_of_order_rast_for_occlusion = !dsa_order_invariant.pass_set;
+   }
+
+   /* No color buffers are enabled for writing. */
+   if (!colormask)
+      return true;
+
+   unsigned blendmask = colormask & blend->blend_enable_4bit;
+
+   if (blendmask) {
+      /* Only commutative blending. */
+      if (blendmask & ~blend->commutative_4bit)
+         return false;
+
+      if (!dsa_order_invariant.pass_set)
+         return false;
+   }
+
+   if (colormask & ~blendmask)
+      return false;
+
+   return true;
 }
 
 static const VkConservativeRasterizationModeEXT
 radv_get_conservative_raster_mode(const VkPipelineRasterizationStateCreateInfo *pCreateInfo)
 {
-	const VkPipelineRasterizationConservativeStateCreateInfoEXT *conservative_raster =
-		vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT);
+   const VkPipelineRasterizationConservativeStateCreateInfoEXT *conservative_raster =
+      vk_find_struct_const(pCreateInfo->pNext,
+                           PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT);
 
-	if (!conservative_raster)
-		return VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT;
-	return conservative_raster->conservativeRasterizationMode;
+   if (!conservative_raster)
+      return VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT;
+   return conservative_raster->conservativeRasterizationMode;
 }
 
 static void
 radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
-				     const struct radv_blend_state *blend,
-				     const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                                     const struct radv_blend_state *blend,
+                                     const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	const VkPipelineMultisampleStateCreateInfo *vkms = radv_pipeline_get_multisample_state(pCreateInfo);
-	struct radv_multisample_state *ms = &pipeline->graphics.ms;
-	unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
-	const VkConservativeRasterizationModeEXT mode =
-		radv_get_conservative_raster_mode(pCreateInfo->pRasterizationState);
-	bool out_of_order_rast = false;
-	int ps_iter_samples = 1;
-	uint32_t mask = 0xffff;
-
-	if (vkms) {
-		ms->num_samples = vkms->rasterizationSamples;
-
-		/* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
-		 *
-		 * "Sample shading is enabled for a graphics pipeline:
-		 *
-		 * - If the interface of the fragment shader entry point of the
-		 *   graphics pipeline includes an input variable decorated
-		 *   with SampleId or SamplePosition. In this case
-		 *   minSampleShadingFactor takes the value 1.0.
-		 * - Else if the sampleShadingEnable member of the
-		 *   VkPipelineMultisampleStateCreateInfo structure specified
-		 *   when creating the graphics pipeline is set to VK_TRUE. In
-		 *   this case minSampleShadingFactor takes the value of
-		 *   VkPipelineMultisampleStateCreateInfo::minSampleShading.
-		 *
-		 * Otherwise, sample shading is considered disabled."
-		 */
-		if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.uses_sample_shading) {
-			ps_iter_samples = ms->num_samples;
-		} else {
-			ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo);
-		}
-	} else {
-		ms->num_samples = 1;
-	}
-
-	const struct VkPipelineRasterizationStateRasterizationOrderAMD *raster_order =
-		vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD);
-	if (raster_order && raster_order->rasterizationOrder == VK_RASTERIZATION_ORDER_RELAXED_AMD) {
-		/* Out-of-order rasterization is explicitly enabled by the
-		 * application.
-		 */
-		out_of_order_rast = true;
-	} else {
-		/* Determine if the driver can enable out-of-order
-		 * rasterization internally.
-		 */
-		out_of_order_rast =
-			radv_pipeline_out_of_order_rast(pipeline, blend, pCreateInfo);
-	}
-
-	ms->pa_sc_aa_config = 0;
-	ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
-		      S_028804_INCOHERENT_EQAA_READS(1) |
-		      S_028804_INTERPOLATE_COMP_Z(1) |
-		      S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
-
-	/* Adjust MSAA state if conservative rasterization is enabled. */
-	if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
-		ms->pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1);
-
-		ms->db_eqaa |= S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) |
-			       S_028804_OVERRASTERIZATION_AMOUNT(4);
-	}
-
-	ms->pa_sc_mode_cntl_1 =
-		S_028A4C_WALK_FENCE_ENABLE(1) | //TODO linear dst fixes
-		S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
-		S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
-		S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
-		/* always 1: */
-		S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
-		S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
-		S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
-		S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
-		S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
-		S_028A4C_FORCE_EOV_REZ_ENABLE(1);
-	ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(pipeline->device->physical_device->rad_info.chip_class >= GFX9) |
-	                        S_028A48_VPORT_SCISSOR_ENABLE(1);
-
-	const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line =
-		vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
-				     PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
-	if (rast_line) {
-		ms->pa_sc_mode_cntl_0 |= S_028A48_LINE_STIPPLE_ENABLE(rast_line->stippledLineEnable);
-		if (rast_line->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) {
-			/* From the Vulkan spec 1.1.129:
-			 *
-			 * "When VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT lines
-			 *  are being rasterized, sample locations may all be
-			 *  treated as being at the pixel center (this may
-			 *  affect attribute and depth interpolation)."
-			 */
-			ms->num_samples = 1;
-		}
-	}
-
-	if (ms->num_samples > 1) {
-		RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-		struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
-		uint32_t z_samples = subpass->depth_stencil_attachment ? subpass->depth_sample_count : ms->num_samples;
-		unsigned log_samples = util_logbase2(ms->num_samples);
-		unsigned log_z_samples = util_logbase2(z_samples);
-		unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);
-		ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1);
-		ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
-			S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
-			S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
-			S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
-		ms->pa_sc_aa_config |= S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
-			S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) |
-			S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | /* CM_R_028BE0_PA_SC_AA_CONFIG */
-			S_028BE0_COVERED_CENTROID_IS_CENTER(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3);
-		ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
-		if (ps_iter_samples > 1)
-			pipeline->graphics.spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
-	}
-
-	if (vkms && vkms->pSampleMask) {
-		mask = vkms->pSampleMask[0] & 0xffff;
-	}
-
-	ms->pa_sc_aa_mask[0] = mask | (mask << 16);
-	ms->pa_sc_aa_mask[1] = mask | (mask << 16);
+   const VkPipelineMultisampleStateCreateInfo *vkms =
+      radv_pipeline_get_multisample_state(pCreateInfo);
+   struct radv_multisample_state *ms = &pipeline->graphics.ms;
+   unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
+   const VkConservativeRasterizationModeEXT mode =
+      radv_get_conservative_raster_mode(pCreateInfo->pRasterizationState);
+   bool out_of_order_rast = false;
+   int ps_iter_samples = 1;
+   uint32_t mask = 0xffff;
+
+   if (vkms) {
+      ms->num_samples = vkms->rasterizationSamples;
+
+      /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
+       *
+       * "Sample shading is enabled for a graphics pipeline:
+       *
+       * - If the interface of the fragment shader entry point of the
+       *   graphics pipeline includes an input variable decorated
+       *   with SampleId or SamplePosition. In this case
+       *   minSampleShadingFactor takes the value 1.0.
+       * - Else if the sampleShadingEnable member of the
+       *   VkPipelineMultisampleStateCreateInfo structure specified
+       *   when creating the graphics pipeline is set to VK_TRUE. In
+       *   this case minSampleShadingFactor takes the value of
+       *   VkPipelineMultisampleStateCreateInfo::minSampleShading.
+       *
+       * Otherwise, sample shading is considered disabled."
+       */
+      if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.uses_sample_shading) {
+         ps_iter_samples = ms->num_samples;
+      } else {
+         ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo);
+      }
+   } else {
+      ms->num_samples = 1;
+   }
+
+   const struct VkPipelineRasterizationStateRasterizationOrderAMD *raster_order =
+      vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
+                           PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD);
+   if (raster_order && raster_order->rasterizationOrder == VK_RASTERIZATION_ORDER_RELAXED_AMD) {
+      /* Out-of-order rasterization is explicitly enabled by the
+       * application.
+       */
+      out_of_order_rast = true;
+   } else {
+      /* Determine if the driver can enable out-of-order
+       * rasterization internally.
+       */
+      out_of_order_rast = radv_pipeline_out_of_order_rast(pipeline, blend, pCreateInfo);
+   }
+
+   ms->pa_sc_aa_config = 0;
+   ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) |
+                 S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
+
+   /* Adjust MSAA state if conservative rasterization is enabled. */
+   if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
+      ms->pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1);
+
+      ms->db_eqaa |=
+         S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) | S_028804_OVERRASTERIZATION_AMOUNT(4);
+   }
+
+   ms->pa_sc_mode_cntl_1 =
+      S_028A4C_WALK_FENCE_ENABLE(1) | // TODO linear dst fixes
+      S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
+      S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
+      S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
+      /* always 1: */
+      S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
+      S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
+      S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1);
+   ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(
+                              pipeline->device->physical_device->rad_info.chip_class >= GFX9) |
+                           S_028A48_VPORT_SCISSOR_ENABLE(1);
+
+   const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line = vk_find_struct_const(
+      pCreateInfo->pRasterizationState->pNext, PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
+   if (rast_line) {
+      ms->pa_sc_mode_cntl_0 |= S_028A48_LINE_STIPPLE_ENABLE(rast_line->stippledLineEnable);
+      if (rast_line->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) {
+         /* From the Vulkan spec 1.1.129:
+          *
+          * "When VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT lines
+          *  are being rasterized, sample locations may all be
+          *  treated as being at the pixel center (this may
+          *  affect attribute and depth interpolation)."
+          */
+         ms->num_samples = 1;
+      }
+   }
+
+   if (ms->num_samples > 1) {
+      RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+      struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
+      uint32_t z_samples =
+         subpass->depth_stencil_attachment ? subpass->depth_sample_count : ms->num_samples;
+      unsigned log_samples = util_logbase2(ms->num_samples);
+      unsigned log_z_samples = util_logbase2(z_samples);
+      unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);
+      ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1);
+      ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
+                     S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
+                     S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
+                     S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
+      ms->pa_sc_aa_config |=
+         S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
+         S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) |
+         S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | /* CM_R_028BE0_PA_SC_AA_CONFIG */
+         S_028BE0_COVERED_CENTROID_IS_CENTER(
+            pipeline->device->physical_device->rad_info.chip_class >= GFX10_3);
+      ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
+      if (ps_iter_samples > 1)
+         pipeline->graphics.spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
+   }
+
+   if (vkms && vkms->pSampleMask) {
+      mask = vkms->pSampleMask[0] & 0xffff;
+   }
+
+   ms->pa_sc_aa_mask[0] = mask | (mask << 16);
+   ms->pa_sc_aa_mask[1] = mask | (mask << 16);
 }
 
 static void
 gfx103_pipeline_init_vrs_state(struct radv_pipeline *pipeline,
-			       const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	const VkPipelineMultisampleStateCreateInfo *vkms = radv_pipeline_get_multisample_state(pCreateInfo);
-	struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-	struct radv_multisample_state *ms = &pipeline->graphics.ms;
-	struct radv_vrs_state *vrs = &pipeline->graphics.vrs;
-
-	if (vkms &&
-	    (vkms->sampleShadingEnable ||
-	     ps->info.ps.uses_sample_shading || ps->info.ps.reads_sample_mask_in)) {
-		/* Disable VRS and use the rates from PS_ITER_SAMPLES if:
-		 *
-		 * 1) sample shading is enabled or per-sample interpolation is
-		 *    used by the fragment shader
-		 * 2) the fragment shader reads gl_SampleMaskIn because the
-		 *    16-bit sample coverage mask isn't enough for MSAA8x and
-		 *    2x2 coarse shading isn't enough.
-		 */
-		vrs->pa_cl_vrs_cntl =
-			S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE);
-
-		/* Make sure sample shading is enabled even if only MSAA1x is
-		 * used because the SAMPLE_ITER combiner is in passthrough
-		 * mode if PS_ITER_SAMPLE is 0, and it uses the per-draw rate.
-		 * The default VRS rate when sample shading is enabled is 1x1.
-		 */
-		if (!G_028A4C_PS_ITER_SAMPLE(ms->pa_sc_mode_cntl_1))
-			ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(1);
-	} else {
-		vrs->pa_cl_vrs_cntl =
-			S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU);
-	}
-
-	/* Primitive and HTILE combiners are always passthrough. */
-	vrs->pa_cl_vrs_cntl |= S_028848_PRIMITIVE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU) |
-			       S_028848_HTILE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU);
+   const VkPipelineMultisampleStateCreateInfo *vkms =
+      radv_pipeline_get_multisample_state(pCreateInfo);
+   struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+   struct radv_multisample_state *ms = &pipeline->graphics.ms;
+   struct radv_vrs_state *vrs = &pipeline->graphics.vrs;
+
+   if (vkms && (vkms->sampleShadingEnable || ps->info.ps.uses_sample_shading ||
+                ps->info.ps.reads_sample_mask_in)) {
+      /* Disable VRS and use the rates from PS_ITER_SAMPLES if:
+       *
+       * 1) sample shading is enabled or per-sample interpolation is
+       *    used by the fragment shader
+       * 2) the fragment shader reads gl_SampleMaskIn because the
+       *    16-bit sample coverage mask isn't enough for MSAA8x and
+       *    2x2 coarse shading isn't enough.
+       */
+      vrs->pa_cl_vrs_cntl = S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE);
+
+      /* Make sure sample shading is enabled even if only MSAA1x is
+       * used because the SAMPLE_ITER combiner is in passthrough
+       * mode if PS_ITER_SAMPLE is 0, and it uses the per-draw rate.
+       * The default VRS rate when sample shading is enabled is 1x1.
+       */
+      if (!G_028A4C_PS_ITER_SAMPLE(ms->pa_sc_mode_cntl_1))
+         ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(1);
+   } else {
+      vrs->pa_cl_vrs_cntl = S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU);
+   }
+
+   /* Primitive and HTILE combiners are always passthrough. */
+   vrs->pa_cl_vrs_cntl |= S_028848_PRIMITIVE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU) |
+                          S_028848_HTILE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU);
 }
 
 static bool
 radv_prim_can_use_guardband(enum VkPrimitiveTopology topology)
 {
-	switch (topology) {
-	case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
-	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
-	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
-	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
-		return false;
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
-		return true;
-	default:
-		unreachable("unhandled primitive type");
-	}
+   switch (topology) {
+   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+      return false;
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+      return true;
+   default:
+      unreachable("unhandled primitive type");
+   }
 }
 
 static uint32_t
 si_conv_gl_prim_to_gs_out(unsigned gl_prim)
 {
-	switch (gl_prim) {
-	case 0: /* GL_POINTS */
-		return V_028A6C_POINTLIST;
-	case 1: /* GL_LINES */
-	case 3: /* GL_LINE_STRIP */
-	case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */
-	case 0x8E7A: /* GL_ISOLINES */
-		return V_028A6C_LINESTRIP;
-
-	case 4: /* GL_TRIANGLES */
-	case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */
-	case 5: /* GL_TRIANGLE_STRIP */
-	case 7: /* GL_QUADS */
-		return V_028A6C_TRISTRIP;
-	default:
-		assert(0);
-		return 0;
-	}
+   switch (gl_prim) {
+   case 0: /* GL_POINTS */
+      return V_028A6C_POINTLIST;
+   case 1:      /* GL_LINES */
+   case 3:      /* GL_LINE_STRIP */
+   case 0xA:    /* GL_LINE_STRIP_ADJACENCY_ARB */
+   case 0x8E7A: /* GL_ISOLINES */
+      return V_028A6C_LINESTRIP;
+
+   case 4:   /* GL_TRIANGLES */
+   case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */
+   case 5:   /* GL_TRIANGLE_STRIP */
+   case 7:   /* GL_QUADS */
+      return V_028A6C_TRISTRIP;
+   default:
+      assert(0);
+      return 0;
+   }
 }
 
 static uint32_t
 si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
 {
-	switch (topology) {
-	case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
-	case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
-		return V_028A6C_POINTLIST;
-	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
-	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
-	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
-		return V_028A6C_LINESTRIP;
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
-		return V_028A6C_TRISTRIP;
-	default:
-		assert(0);
-		return 0;
-	}
+   switch (topology) {
+   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+      return V_028A6C_POINTLIST;
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+      return V_028A6C_LINESTRIP;
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+      return V_028A6C_TRISTRIP;
+   default:
+      assert(0);
+      return 0;
+   }
 }
 
-static uint64_t radv_dynamic_state_mask(VkDynamicState state)
+static uint64_t
+radv_dynamic_state_mask(VkDynamicState state)
 {
-	switch(state) {
-	case VK_DYNAMIC_STATE_VIEWPORT:
-	case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
-		return RADV_DYNAMIC_VIEWPORT;
-	case VK_DYNAMIC_STATE_SCISSOR:
-	case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
-		return RADV_DYNAMIC_SCISSOR;
-	case VK_DYNAMIC_STATE_LINE_WIDTH:
-		return RADV_DYNAMIC_LINE_WIDTH;
-	case VK_DYNAMIC_STATE_DEPTH_BIAS:
-		return RADV_DYNAMIC_DEPTH_BIAS;
-	case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
-		return RADV_DYNAMIC_BLEND_CONSTANTS;
-	case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
-		return RADV_DYNAMIC_DEPTH_BOUNDS;
-	case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
-		return RADV_DYNAMIC_STENCIL_COMPARE_MASK;
-	case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
-		return RADV_DYNAMIC_STENCIL_WRITE_MASK;
-	case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
-		return RADV_DYNAMIC_STENCIL_REFERENCE;
-	case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
-		return RADV_DYNAMIC_DISCARD_RECTANGLE;
-	case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
-		return RADV_DYNAMIC_SAMPLE_LOCATIONS;
-	case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
-		return RADV_DYNAMIC_LINE_STIPPLE;
-	case VK_DYNAMIC_STATE_CULL_MODE_EXT:
-		return RADV_DYNAMIC_CULL_MODE;
-	case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
-		return RADV_DYNAMIC_FRONT_FACE;
-	case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
-		return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
-	case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
-		return RADV_DYNAMIC_DEPTH_TEST_ENABLE;
-	case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
-		return RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
-	case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
-		return RADV_DYNAMIC_DEPTH_COMPARE_OP;
-	case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
-		return RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
-	case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
-		return RADV_DYNAMIC_STENCIL_TEST_ENABLE;
-	case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
-		return RADV_DYNAMIC_STENCIL_OP;
-	case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
-		return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
-	case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
-		return RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
-	default:
-		unreachable("Unhandled dynamic state");
-	}
+   switch (state) {
+   case VK_DYNAMIC_STATE_VIEWPORT:
+   case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
+      return RADV_DYNAMIC_VIEWPORT;
+   case VK_DYNAMIC_STATE_SCISSOR:
+   case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
+      return RADV_DYNAMIC_SCISSOR;
+   case VK_DYNAMIC_STATE_LINE_WIDTH:
+      return RADV_DYNAMIC_LINE_WIDTH;
+   case VK_DYNAMIC_STATE_DEPTH_BIAS:
+      return RADV_DYNAMIC_DEPTH_BIAS;
+   case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
+      return RADV_DYNAMIC_BLEND_CONSTANTS;
+   case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
+      return RADV_DYNAMIC_DEPTH_BOUNDS;
+   case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
+      return RADV_DYNAMIC_STENCIL_COMPARE_MASK;
+   case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
+      return RADV_DYNAMIC_STENCIL_WRITE_MASK;
+   case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
+      return RADV_DYNAMIC_STENCIL_REFERENCE;
+   case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
+      return RADV_DYNAMIC_DISCARD_RECTANGLE;
+   case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
+      return RADV_DYNAMIC_SAMPLE_LOCATIONS;
+   case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
+      return RADV_DYNAMIC_LINE_STIPPLE;
+   case VK_DYNAMIC_STATE_CULL_MODE_EXT:
+      return RADV_DYNAMIC_CULL_MODE;
+   case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
+      return RADV_DYNAMIC_FRONT_FACE;
+   case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
+      return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
+   case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
+      return RADV_DYNAMIC_DEPTH_TEST_ENABLE;
+   case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
+      return RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
+   case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
+      return RADV_DYNAMIC_DEPTH_COMPARE_OP;
+   case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
+      return RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
+   case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
+      return RADV_DYNAMIC_STENCIL_TEST_ENABLE;
+   case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
+      return RADV_DYNAMIC_STENCIL_OP;
+   case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
+      return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
+   case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
+      return RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
+   default:
+      unreachable("Unhandled dynamic state");
+   }
 }
 
-static uint64_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
+static uint64_t
+radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	uint64_t states = RADV_DYNAMIC_ALL;
-
-	/* If rasterization is disabled we do not care about any of the
-	 * dynamic states, since they are all rasterization related only,
-	 * except primitive topology and vertex binding stride.
-	 */
-	if (pCreateInfo->pRasterizationState->rasterizerDiscardEnable)
-		return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY |
-		       RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
-
-	if (!pCreateInfo->pRasterizationState->depthBiasEnable)
-		states &= ~RADV_DYNAMIC_DEPTH_BIAS;
-
-	if (!pCreateInfo->pDepthStencilState ||
-	    (!pCreateInfo->pDepthStencilState->depthBoundsTestEnable &&
-	     !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT)))
-		states &= ~RADV_DYNAMIC_DEPTH_BOUNDS;
-
-	if (!pCreateInfo->pDepthStencilState ||
-	    (!pCreateInfo->pDepthStencilState->stencilTestEnable &&
-	     !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT)))
-		states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK |
-		            RADV_DYNAMIC_STENCIL_WRITE_MASK |
-		            RADV_DYNAMIC_STENCIL_REFERENCE);
-
-	if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT))
-		states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
-
-	if (!pCreateInfo->pMultisampleState ||
-	    !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
-				  PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT))
-		states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
-
-	if (!pCreateInfo->pRasterizationState ||
-	    !vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
-				  PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT))
-		states &= ~RADV_DYNAMIC_LINE_STIPPLE;
-
-	if (!vk_find_struct_const(pCreateInfo->pNext,
-				  PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR))
-		states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
-
-	/* TODO: blend constants & line width. */
-
-	return states;
+   uint64_t states = RADV_DYNAMIC_ALL;
+
+   /* If rasterization is disabled we do not care about any of the
+    * dynamic states, since they are all rasterization related only,
+    * except primitive topology and vertex binding stride.
+    */
+   if (pCreateInfo->pRasterizationState->rasterizerDiscardEnable)
+      return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
+
+   if (!pCreateInfo->pRasterizationState->depthBiasEnable)
+      states &= ~RADV_DYNAMIC_DEPTH_BIAS;
+
+   if (!pCreateInfo->pDepthStencilState ||
+       (!pCreateInfo->pDepthStencilState->depthBoundsTestEnable &&
+        !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT)))
+      states &= ~RADV_DYNAMIC_DEPTH_BOUNDS;
+
+   if (!pCreateInfo->pDepthStencilState ||
+       (!pCreateInfo->pDepthStencilState->stencilTestEnable &&
+        !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT)))
+      states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK |
+                  RADV_DYNAMIC_STENCIL_REFERENCE);
+
+   if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT))
+      states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
+
+   if (!pCreateInfo->pMultisampleState ||
+       !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
+                             PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT))
+      states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
+
+   if (!pCreateInfo->pRasterizationState ||
+       !vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
+                             PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT))
+      states &= ~RADV_DYNAMIC_LINE_STIPPLE;
+
+   if (!vk_find_struct_const(pCreateInfo->pNext,
+                             PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR))
+      states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
+
+   /* TODO: blend constants & line width. */
+
+   return states;
 }
 
 static struct radv_ia_multi_vgt_param_helpers
 radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline)
 {
-	struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
-	const struct radv_device *device = pipeline->device;
-
-	if (radv_pipeline_has_tess(pipeline))
-		ia_multi_vgt_param.primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
-	else if (radv_pipeline_has_gs(pipeline))
-		ia_multi_vgt_param.primgroup_size = 64;
-	else
-		ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */
-
-	/* GS requirement. */
-	ia_multi_vgt_param.partial_es_wave = false;
-	if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8)
-		if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
-			ia_multi_vgt_param.partial_es_wave = true;
-
-	ia_multi_vgt_param.ia_switch_on_eoi = false;
-	if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
-		ia_multi_vgt_param.ia_switch_on_eoi = true;
-	if (radv_pipeline_has_gs(pipeline) &&
-	    pipeline->shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
-		ia_multi_vgt_param.ia_switch_on_eoi = true;
-	if (radv_pipeline_has_tess(pipeline)) {
-		/* SWITCH_ON_EOI must be set if PrimID is used. */
-		if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
-		    radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
-			ia_multi_vgt_param.ia_switch_on_eoi = true;
-	}
-
-	ia_multi_vgt_param.partial_vs_wave = false;
-	if (radv_pipeline_has_tess(pipeline)) {
-		/* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
-		if ((device->physical_device->rad_info.family == CHIP_TAHITI ||
-		     device->physical_device->rad_info.family == CHIP_PITCAIRN ||
-		     device->physical_device->rad_info.family == CHIP_BONAIRE) &&
-		    radv_pipeline_has_gs(pipeline))
-			ia_multi_vgt_param.partial_vs_wave = true;
-		/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
-		if (device->physical_device->rad_info.has_distributed_tess) {
-			if (radv_pipeline_has_gs(pipeline)) {
-				if (device->physical_device->rad_info.chip_class <= GFX8)
-					ia_multi_vgt_param.partial_es_wave = true;
-			} else {
-				ia_multi_vgt_param.partial_vs_wave = true;
-			}
-		}
-	}
-
-	if (radv_pipeline_has_gs(pipeline)) {
-		/* On these chips there is the possibility of a hang if the
-		 * pipeline uses a GS and partial_vs_wave is not set.
-		 *
-		 * This mostly does not hit 4-SE chips, as those typically set
-		 * ia_switch_on_eoi and then partial_vs_wave is set for pipelines
-		 * with GS due to another workaround.
-		 *
-		 * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
-		 */
-		if (device->physical_device->rad_info.family == CHIP_TONGA ||
-		    device->physical_device->rad_info.family == CHIP_FIJI ||
-		    device->physical_device->rad_info.family == CHIP_POLARIS10 ||
-		    device->physical_device->rad_info.family == CHIP_POLARIS11 ||
-		    device->physical_device->rad_info.family == CHIP_POLARIS12 ||
-	            device->physical_device->rad_info.family == CHIP_VEGAM) {
-			ia_multi_vgt_param.partial_vs_wave = true;
-		}
-	}
-
-	ia_multi_vgt_param.base =
-		S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
-		/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
-		S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) |
-		S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
-		S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
-
-	return ia_multi_vgt_param;
+   struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
+   const struct radv_device *device = pipeline->device;
+
+   if (radv_pipeline_has_tess(pipeline))
+      ia_multi_vgt_param.primgroup_size =
+         pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
+   else if (radv_pipeline_has_gs(pipeline))
+      ia_multi_vgt_param.primgroup_size = 64;
+   else
+      ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */
+
+   /* GS requirement. */
+   ia_multi_vgt_param.partial_es_wave = false;
+   if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8)
+      if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
+         ia_multi_vgt_param.partial_es_wave = true;
+
+   ia_multi_vgt_param.ia_switch_on_eoi = false;
+   if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
+      ia_multi_vgt_param.ia_switch_on_eoi = true;
+   if (radv_pipeline_has_gs(pipeline) && pipeline->shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
+      ia_multi_vgt_param.ia_switch_on_eoi = true;
+   if (radv_pipeline_has_tess(pipeline)) {
+      /* SWITCH_ON_EOI must be set if PrimID is used. */
+      if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
+          radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
+         ia_multi_vgt_param.ia_switch_on_eoi = true;
+   }
+
+   ia_multi_vgt_param.partial_vs_wave = false;
+   if (radv_pipeline_has_tess(pipeline)) {
+      /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
+      if ((device->physical_device->rad_info.family == CHIP_TAHITI ||
+           device->physical_device->rad_info.family == CHIP_PITCAIRN ||
+           device->physical_device->rad_info.family == CHIP_BONAIRE) &&
+          radv_pipeline_has_gs(pipeline))
+         ia_multi_vgt_param.partial_vs_wave = true;
+      /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
+      if (device->physical_device->rad_info.has_distributed_tess) {
+         if (radv_pipeline_has_gs(pipeline)) {
+            if (device->physical_device->rad_info.chip_class <= GFX8)
+               ia_multi_vgt_param.partial_es_wave = true;
+         } else {
+            ia_multi_vgt_param.partial_vs_wave = true;
+         }
+      }
+   }
+
+   if (radv_pipeline_has_gs(pipeline)) {
+      /* On these chips there is the possibility of a hang if the
+       * pipeline uses a GS and partial_vs_wave is not set.
+       *
+       * This mostly does not hit 4-SE chips, as those typically set
+       * ia_switch_on_eoi and then partial_vs_wave is set for pipelines
+       * with GS due to another workaround.
+       *
+       * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
+       */
+      if (device->physical_device->rad_info.family == CHIP_TONGA ||
+          device->physical_device->rad_info.family == CHIP_FIJI ||
+          device->physical_device->rad_info.family == CHIP_POLARIS10 ||
+          device->physical_device->rad_info.family == CHIP_POLARIS11 ||
+          device->physical_device->rad_info.family == CHIP_POLARIS12 ||
+          device->physical_device->rad_info.family == CHIP_VEGAM) {
+         ia_multi_vgt_param.partial_vs_wave = true;
+      }
+   }
+
+   ia_multi_vgt_param.base =
+      S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
+      /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
+      S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) |
+      S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
+      S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
+
+   return ia_multi_vgt_param;
 }
 
 static void
 radv_pipeline_init_input_assembly_state(struct radv_pipeline *pipeline,
-					const VkGraphicsPipelineCreateInfo *pCreateInfo,
-					const struct radv_graphics_pipeline_create_info *extra)
+                                        const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                                        const struct radv_graphics_pipeline_create_info *extra)
 {
-	const VkPipelineInputAssemblyStateCreateInfo *ia_state = pCreateInfo->pInputAssemblyState;
-	struct radv_shader_variant *tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
-	struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
-
-	pipeline->graphics.prim_restart_enable = !!ia_state->primitiveRestartEnable;
-	pipeline->graphics.can_use_guardband = radv_prim_can_use_guardband(ia_state->topology);
-
-	if (radv_pipeline_has_gs(pipeline)) {
-		if (si_conv_gl_prim_to_gs_out(gs->info.gs.output_prim) == V_028A6C_TRISTRIP)
-			pipeline->graphics.can_use_guardband = true;
-	} else if (radv_pipeline_has_tess(pipeline)) {
-		if (!tes->info.tes.point_mode &&
-		    si_conv_gl_prim_to_gs_out(tes->info.tes.primitive_mode) == V_028A6C_TRISTRIP)
-			pipeline->graphics.can_use_guardband = true;
-	}
-
-	if (extra && extra->use_rectlist) {
-		pipeline->graphics.can_use_guardband = true;
-	}
-
-	pipeline->graphics.ia_multi_vgt_param =
-		radv_compute_ia_multi_vgt_param_helpers(pipeline);
+   const VkPipelineInputAssemblyStateCreateInfo *ia_state = pCreateInfo->pInputAssemblyState;
+   struct radv_shader_variant *tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+   struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+
+   pipeline->graphics.prim_restart_enable = !!ia_state->primitiveRestartEnable;
+   pipeline->graphics.can_use_guardband = radv_prim_can_use_guardband(ia_state->topology);
+
+   if (radv_pipeline_has_gs(pipeline)) {
+      if (si_conv_gl_prim_to_gs_out(gs->info.gs.output_prim) == V_028A6C_TRISTRIP)
+         pipeline->graphics.can_use_guardband = true;
+   } else if (radv_pipeline_has_tess(pipeline)) {
+      if (!tes->info.tes.point_mode &&
+          si_conv_gl_prim_to_gs_out(tes->info.tes.primitive_mode) == V_028A6C_TRISTRIP)
+         pipeline->graphics.can_use_guardband = true;
+   }
+
+   if (extra && extra->use_rectlist) {
+      pipeline->graphics.can_use_guardband = true;
+   }
+
+   pipeline->graphics.ia_multi_vgt_param = radv_compute_ia_multi_vgt_param_helpers(pipeline);
 }
 
 static void
 radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
-				 const VkGraphicsPipelineCreateInfo *pCreateInfo,
-				 const struct radv_graphics_pipeline_create_info *extra)
+                                 const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                                 const struct radv_graphics_pipeline_create_info *extra)
 {
-	uint64_t needed_states = radv_pipeline_needed_dynamic_state(pCreateInfo);
-	uint64_t states = needed_states;
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
-
-	pipeline->dynamic_state = default_dynamic_state;
-	pipeline->graphics.needed_dynamic_state = needed_states;
-
-	if (pCreateInfo->pDynamicState) {
-		/* Remove all of the states that are marked as dynamic */
-		uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
-		for (uint32_t s = 0; s < count; s++)
-			states &= ~radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
-	}
-
-	struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
-
-	if (needed_states & RADV_DYNAMIC_VIEWPORT) {
-		assert(pCreateInfo->pViewportState);
-
-		dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
-		if (states & RADV_DYNAMIC_VIEWPORT) {
-			typed_memcpy(dynamic->viewport.viewports,
-			             pCreateInfo->pViewportState->pViewports,
-			             pCreateInfo->pViewportState->viewportCount);
-		}
-	}
-
-	if (needed_states & RADV_DYNAMIC_SCISSOR) {
-		dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
-		if (states & RADV_DYNAMIC_SCISSOR) {
-			typed_memcpy(dynamic->scissor.scissors,
-			             pCreateInfo->pViewportState->pScissors,
-			             pCreateInfo->pViewportState->scissorCount);
-		}
-	}
-
-	if (states & RADV_DYNAMIC_LINE_WIDTH) {
-		assert(pCreateInfo->pRasterizationState);
-		dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
-	}
-
-	if (states & RADV_DYNAMIC_DEPTH_BIAS) {
-		assert(pCreateInfo->pRasterizationState);
-		dynamic->depth_bias.bias =
-			pCreateInfo->pRasterizationState->depthBiasConstantFactor;
-		dynamic->depth_bias.clamp =
-			pCreateInfo->pRasterizationState->depthBiasClamp;
-		dynamic->depth_bias.slope =
-			pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
-	}
-
-	/* Section 9.2 of the Vulkan 1.0.15 spec says:
-	 *
-	 *    pColorBlendState is [...] NULL if the pipeline has rasterization
-	 *    disabled or if the subpass of the render pass the pipeline is
-	 *    created against does not use any color attachments.
-	 */
-	if (subpass->has_color_att && states & RADV_DYNAMIC_BLEND_CONSTANTS) {
-		assert(pCreateInfo->pColorBlendState);
-		typed_memcpy(dynamic->blend_constants,
-			     pCreateInfo->pColorBlendState->blendConstants, 4);
-	}
-
-	if (states & RADV_DYNAMIC_CULL_MODE) {
-		dynamic->cull_mode =
-			pCreateInfo->pRasterizationState->cullMode;
-	}
-
-	if (states & RADV_DYNAMIC_FRONT_FACE) {
-		dynamic->front_face =
-			pCreateInfo->pRasterizationState->frontFace;
-	}
-
-	if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
-		dynamic->primitive_topology =
-			si_translate_prim(pCreateInfo->pInputAssemblyState->topology);
-		if (extra && extra->use_rectlist) {
-			dynamic->primitive_topology = V_008958_DI_PT_RECTLIST;
-		}
-	}
-
-	/* If there is no depthstencil attachment, then don't read
-	 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
-	 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
-	 * no need to override the depthstencil defaults in
-	 * radv_pipeline::dynamic_state when there is no depthstencil attachment.
-	 *
-	 * Section 9.2 of the Vulkan 1.0.15 spec says:
-	 *
-	 *    pDepthStencilState is [...] NULL if the pipeline has rasterization
-	 *    disabled or if the subpass of the render pass the pipeline is created
-	 *    against does not use a depth/stencil attachment.
-	 */
-	if (needed_states && subpass->depth_stencil_attachment) {
-		assert(pCreateInfo->pDepthStencilState);
-
-		if (states & RADV_DYNAMIC_DEPTH_BOUNDS) {
-			dynamic->depth_bounds.min =
-				pCreateInfo->pDepthStencilState->minDepthBounds;
-			dynamic->depth_bounds.max =
-				pCreateInfo->pDepthStencilState->maxDepthBounds;
-		}
-
-		if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
-			dynamic->stencil_compare_mask.front =
-				pCreateInfo->pDepthStencilState->front.compareMask;
-			dynamic->stencil_compare_mask.back =
-				pCreateInfo->pDepthStencilState->back.compareMask;
-		}
-
-		if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
-			dynamic->stencil_write_mask.front =
-				pCreateInfo->pDepthStencilState->front.writeMask;
-			dynamic->stencil_write_mask.back =
-				pCreateInfo->pDepthStencilState->back.writeMask;
-		}
-
-		if (states & RADV_DYNAMIC_STENCIL_REFERENCE) {
-			dynamic->stencil_reference.front =
-				pCreateInfo->pDepthStencilState->front.reference;
-			dynamic->stencil_reference.back =
-				pCreateInfo->pDepthStencilState->back.reference;
-		}
-
-		if (states & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
-			dynamic->depth_test_enable =
-				pCreateInfo->pDepthStencilState->depthTestEnable;
-		}
-
-		if (states & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
-			dynamic->depth_write_enable =
-				pCreateInfo->pDepthStencilState->depthWriteEnable;
-		}
-
-		if (states & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
-			dynamic->depth_compare_op =
-				pCreateInfo->pDepthStencilState->depthCompareOp;
-		}
-
-		if (states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
-			dynamic->depth_bounds_test_enable =
-				pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
-		}
-
-		if (states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
-			dynamic->stencil_test_enable =
-				pCreateInfo->pDepthStencilState->stencilTestEnable;
-		}
-
-		if (states & RADV_DYNAMIC_STENCIL_OP) {
-			dynamic->stencil_op.front.compare_op =
-				pCreateInfo->pDepthStencilState->front.compareOp;
-			dynamic->stencil_op.front.fail_op =
-				pCreateInfo->pDepthStencilState->front.failOp;
-			dynamic->stencil_op.front.pass_op =
-				pCreateInfo->pDepthStencilState->front.passOp;
-			dynamic->stencil_op.front.depth_fail_op =
-				pCreateInfo->pDepthStencilState->front.depthFailOp;
-
-			dynamic->stencil_op.back.compare_op =
-				pCreateInfo->pDepthStencilState->back.compareOp;
-			dynamic->stencil_op.back.fail_op =
-				pCreateInfo->pDepthStencilState->back.failOp;
-			dynamic->stencil_op.back.pass_op =
-				pCreateInfo->pDepthStencilState->back.passOp;
-			dynamic->stencil_op.back.depth_fail_op =
-				pCreateInfo->pDepthStencilState->back.depthFailOp;
-		}
-	}
-
-	const  VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
-			vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
-	if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
-		dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount;
-		if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
-			typed_memcpy(dynamic->discard_rectangle.rectangles,
-			             discard_rectangle_info->pDiscardRectangles,
-			             discard_rectangle_info->discardRectangleCount);
-		}
-	}
-
-	if (needed_states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
-		const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
-			vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
-					     PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
-		/* If sampleLocationsEnable is VK_FALSE, the default sample
-		 * locations are used and the values specified in
-		 * sampleLocationsInfo are ignored.
-		 */
-		if (sample_location_info->sampleLocationsEnable) {
-			const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
-				&sample_location_info->sampleLocationsInfo;
-
-			assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
-
-			dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
-			dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
-			dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
-			typed_memcpy(&dynamic->sample_location.locations[0],
-				     pSampleLocationsInfo->pSampleLocations,
-				     pSampleLocationsInfo->sampleLocationsCount);
-		}
-	}
-
-	const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_info =
-		vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
-				     PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
-	if (needed_states & RADV_DYNAMIC_LINE_STIPPLE) {
-		dynamic->line_stipple.factor = rast_line_info->lineStippleFactor;
-		dynamic->line_stipple.pattern = rast_line_info->lineStipplePattern;
-	}
-
-	if (!(states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE))
-		pipeline->graphics.uses_dynamic_stride = true;
-
-	const VkPipelineFragmentShadingRateStateCreateInfoKHR *shading_rate =
-		vk_find_struct_const(pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
-	if (needed_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
-		dynamic->fragment_shading_rate.size = shading_rate->fragmentSize;
-		for (int i = 0; i < 2; i++)
-			dynamic->fragment_shading_rate.combiner_ops[i] = shading_rate->combinerOps[i];
-	}
-
-	pipeline->dynamic_state.mask = states;
+   uint64_t needed_states = radv_pipeline_needed_dynamic_state(pCreateInfo);
+   uint64_t states = needed_states;
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
+
+   pipeline->dynamic_state = default_dynamic_state;
+   pipeline->graphics.needed_dynamic_state = needed_states;
+
+   if (pCreateInfo->pDynamicState) {
+      /* Remove all of the states that are marked as dynamic */
+      uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
+      for (uint32_t s = 0; s < count; s++)
+         states &= ~radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
+   }
+
+   struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
+
+   if (needed_states & RADV_DYNAMIC_VIEWPORT) {
+      assert(pCreateInfo->pViewportState);
+
+      dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
+      if (states & RADV_DYNAMIC_VIEWPORT) {
+         typed_memcpy(dynamic->viewport.viewports, pCreateInfo->pViewportState->pViewports,
+                      pCreateInfo->pViewportState->viewportCount);
+      }
+   }
+
+   if (needed_states & RADV_DYNAMIC_SCISSOR) {
+      dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
+      if (states & RADV_DYNAMIC_SCISSOR) {
+         typed_memcpy(dynamic->scissor.scissors, pCreateInfo->pViewportState->pScissors,
+                      pCreateInfo->pViewportState->scissorCount);
+      }
+   }
+
+   if (states & RADV_DYNAMIC_LINE_WIDTH) {
+      assert(pCreateInfo->pRasterizationState);
+      dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
+   }
+
+   if (states & RADV_DYNAMIC_DEPTH_BIAS) {
+      assert(pCreateInfo->pRasterizationState);
+      dynamic->depth_bias.bias = pCreateInfo->pRasterizationState->depthBiasConstantFactor;
+      dynamic->depth_bias.clamp = pCreateInfo->pRasterizationState->depthBiasClamp;
+      dynamic->depth_bias.slope = pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
+   }
+
+   /* Section 9.2 of the Vulkan 1.0.15 spec says:
+    *
+    *    pColorBlendState is [...] NULL if the pipeline has rasterization
+    *    disabled or if the subpass of the render pass the pipeline is
+    *    created against does not use any color attachments.
+    */
+   if (subpass->has_color_att && states & RADV_DYNAMIC_BLEND_CONSTANTS) {
+      assert(pCreateInfo->pColorBlendState);
+      typed_memcpy(dynamic->blend_constants, pCreateInfo->pColorBlendState->blendConstants, 4);
+   }
+
+   if (states & RADV_DYNAMIC_CULL_MODE) {
+      dynamic->cull_mode = pCreateInfo->pRasterizationState->cullMode;
+   }
+
+   if (states & RADV_DYNAMIC_FRONT_FACE) {
+      dynamic->front_face = pCreateInfo->pRasterizationState->frontFace;
+   }
+
+   if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
+      dynamic->primitive_topology = si_translate_prim(pCreateInfo->pInputAssemblyState->topology);
+      if (extra && extra->use_rectlist) {
+         dynamic->primitive_topology = V_008958_DI_PT_RECTLIST;
+      }
+   }
+
+   /* If there is no depthstencil attachment, then don't read
+    * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
+    * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
+    * no need to override the depthstencil defaults in
+    * radv_pipeline::dynamic_state when there is no depthstencil attachment.
+    *
+    * Section 9.2 of the Vulkan 1.0.15 spec says:
+    *
+    *    pDepthStencilState is [...] NULL if the pipeline has rasterization
+    *    disabled or if the subpass of the render pass the pipeline is created
+    *    against does not use a depth/stencil attachment.
+    */
+   if (needed_states && subpass->depth_stencil_attachment) {
+      assert(pCreateInfo->pDepthStencilState);
+
+      if (states & RADV_DYNAMIC_DEPTH_BOUNDS) {
+         dynamic->depth_bounds.min = pCreateInfo->pDepthStencilState->minDepthBounds;
+         dynamic->depth_bounds.max = pCreateInfo->pDepthStencilState->maxDepthBounds;
+      }
+
+      if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
+         dynamic->stencil_compare_mask.front = pCreateInfo->pDepthStencilState->front.compareMask;
+         dynamic->stencil_compare_mask.back = pCreateInfo->pDepthStencilState->back.compareMask;
+      }
+
+      if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
+         dynamic->stencil_write_mask.front = pCreateInfo->pDepthStencilState->front.writeMask;
+         dynamic->stencil_write_mask.back = pCreateInfo->pDepthStencilState->back.writeMask;
+      }
+
+      if (states & RADV_DYNAMIC_STENCIL_REFERENCE) {
+         dynamic->stencil_reference.front = pCreateInfo->pDepthStencilState->front.reference;
+         dynamic->stencil_reference.back = pCreateInfo->pDepthStencilState->back.reference;
+      }
+
+      if (states & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
+         dynamic->depth_test_enable = pCreateInfo->pDepthStencilState->depthTestEnable;
+      }
+
+      if (states & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
+         dynamic->depth_write_enable = pCreateInfo->pDepthStencilState->depthWriteEnable;
+      }
+
+      if (states & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
+         dynamic->depth_compare_op = pCreateInfo->pDepthStencilState->depthCompareOp;
+      }
+
+      if (states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
+         dynamic->depth_bounds_test_enable = pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
+      }
+
+      if (states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
+         dynamic->stencil_test_enable = pCreateInfo->pDepthStencilState->stencilTestEnable;
+      }
+
+      if (states & RADV_DYNAMIC_STENCIL_OP) {
+         dynamic->stencil_op.front.compare_op = pCreateInfo->pDepthStencilState->front.compareOp;
+         dynamic->stencil_op.front.fail_op = pCreateInfo->pDepthStencilState->front.failOp;
+         dynamic->stencil_op.front.pass_op = pCreateInfo->pDepthStencilState->front.passOp;
+         dynamic->stencil_op.front.depth_fail_op =
+            pCreateInfo->pDepthStencilState->front.depthFailOp;
+
+         dynamic->stencil_op.back.compare_op = pCreateInfo->pDepthStencilState->back.compareOp;
+         dynamic->stencil_op.back.fail_op = pCreateInfo->pDepthStencilState->back.failOp;
+         dynamic->stencil_op.back.pass_op = pCreateInfo->pDepthStencilState->back.passOp;
+         dynamic->stencil_op.back.depth_fail_op = pCreateInfo->pDepthStencilState->back.depthFailOp;
+      }
+   }
+
+   const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
+      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
+   if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+      dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount;
+      if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+         typed_memcpy(dynamic->discard_rectangle.rectangles,
+                      discard_rectangle_info->pDiscardRectangles,
+                      discard_rectangle_info->discardRectangleCount);
+      }
+   }
+
+   if (needed_states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+      const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
+         vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
+                              PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
+      /* If sampleLocationsEnable is VK_FALSE, the default sample
+       * locations are used and the values specified in
+       * sampleLocationsInfo are ignored.
+       */
+      if (sample_location_info->sampleLocationsEnable) {
+         const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
+            &sample_location_info->sampleLocationsInfo;
+
+         assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
+
+         dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
+         dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
+         dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
+         typed_memcpy(&dynamic->sample_location.locations[0],
+                      pSampleLocationsInfo->pSampleLocations,
+                      pSampleLocationsInfo->sampleLocationsCount);
+      }
+   }
+
+   const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_info = vk_find_struct_const(
+      pCreateInfo->pRasterizationState->pNext, PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
+   if (needed_states & RADV_DYNAMIC_LINE_STIPPLE) {
+      dynamic->line_stipple.factor = rast_line_info->lineStippleFactor;
+      dynamic->line_stipple.pattern = rast_line_info->lineStipplePattern;
+   }
+
+   if (!(states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE))
+      pipeline->graphics.uses_dynamic_stride = true;
+
+   const VkPipelineFragmentShadingRateStateCreateInfoKHR *shading_rate = vk_find_struct_const(
+      pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
+   if (needed_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
+      dynamic->fragment_shading_rate.size = shading_rate->fragmentSize;
+      for (int i = 0; i < 2; i++)
+         dynamic->fragment_shading_rate.combiner_ops[i] = shading_rate->combinerOps[i];
+   }
+
+   pipeline->dynamic_state.mask = states;
 }
 
 static void
 radv_pipeline_init_raster_state(struct radv_pipeline *pipeline,
-				const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	const VkPipelineRasterizationStateCreateInfo *raster_info =
-		pCreateInfo->pRasterizationState;
-
-	pipeline->graphics.pa_su_sc_mode_cntl =
-		S_028814_FACE(raster_info->frontFace) |
-		S_028814_CULL_FRONT(!!(raster_info->cullMode & VK_CULL_MODE_FRONT_BIT)) |
-		S_028814_CULL_BACK(!!(raster_info->cullMode & VK_CULL_MODE_BACK_BIT)) |
-		S_028814_POLY_MODE(raster_info->polygonMode != VK_POLYGON_MODE_FILL) |
-		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(raster_info->polygonMode)) |
-		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(raster_info->polygonMode)) |
-		S_028814_POLY_OFFSET_FRONT_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
-		S_028814_POLY_OFFSET_BACK_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
-		S_028814_POLY_OFFSET_PARA_ENABLE(raster_info->depthBiasEnable ? 1 : 0);
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
-		/* It should also be set if PERPENDICULAR_ENDCAP_ENA is set. */
-		pipeline->graphics.pa_su_sc_mode_cntl |=
-			S_028814_KEEP_TOGETHER_ENABLE(raster_info->polygonMode != VK_POLYGON_MODE_FILL);
-	}
+   const VkPipelineRasterizationStateCreateInfo *raster_info = pCreateInfo->pRasterizationState;
+
+   pipeline->graphics.pa_su_sc_mode_cntl =
+      S_028814_FACE(raster_info->frontFace) |
+      S_028814_CULL_FRONT(!!(raster_info->cullMode & VK_CULL_MODE_FRONT_BIT)) |
+      S_028814_CULL_BACK(!!(raster_info->cullMode & VK_CULL_MODE_BACK_BIT)) |
+      S_028814_POLY_MODE(raster_info->polygonMode != VK_POLYGON_MODE_FILL) |
+      S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(raster_info->polygonMode)) |
+      S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(raster_info->polygonMode)) |
+      S_028814_POLY_OFFSET_FRONT_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
+      S_028814_POLY_OFFSET_BACK_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
+      S_028814_POLY_OFFSET_PARA_ENABLE(raster_info->depthBiasEnable ? 1 : 0);
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+      /* It should also be set if PERPENDICULAR_ENDCAP_ENA is set. */
+      pipeline->graphics.pa_su_sc_mode_cntl |=
+         S_028814_KEEP_TOGETHER_ENABLE(raster_info->polygonMode != VK_POLYGON_MODE_FILL);
+   }
 }
 
 static void
 radv_pipeline_init_depth_stencil_state(struct radv_pipeline *pipeline,
-				       const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                                       const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	const VkPipelineDepthStencilStateCreateInfo *ds_info
-		= radv_pipeline_get_depth_stencil_state(pCreateInfo);
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	struct radv_render_pass_attachment *attachment = NULL;
-	uint32_t db_depth_control = 0;
-
-	if (subpass->depth_stencil_attachment)
-		attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
-
-	bool has_depth_attachment = attachment && vk_format_has_depth(attachment->format);
-	bool has_stencil_attachment = attachment && vk_format_has_stencil(attachment->format);
-
-	if (ds_info) {
-		if (has_depth_attachment) {
-			db_depth_control = S_028800_Z_ENABLE(ds_info->depthTestEnable ? 1 : 0) |
-			                   S_028800_Z_WRITE_ENABLE(ds_info->depthWriteEnable ? 1 : 0) |
-			                   S_028800_ZFUNC(ds_info->depthCompareOp) |
-			                   S_028800_DEPTH_BOUNDS_ENABLE(ds_info->depthBoundsTestEnable ? 1 : 0);
-		}
-
-		if (has_stencil_attachment && ds_info->stencilTestEnable) {
-			db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1);
-			db_depth_control |= S_028800_STENCILFUNC(ds_info->front.compareOp);
-			db_depth_control |= S_028800_STENCILFUNC_BF(ds_info->back.compareOp);
-		}
-	}
-
-	pipeline->graphics.db_depth_control = db_depth_control;
+   const VkPipelineDepthStencilStateCreateInfo *ds_info =
+      radv_pipeline_get_depth_stencil_state(pCreateInfo);
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+   struct radv_render_pass_attachment *attachment = NULL;
+   uint32_t db_depth_control = 0;
+
+   if (subpass->depth_stencil_attachment)
+      attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
+
+   bool has_depth_attachment = attachment && vk_format_has_depth(attachment->format);
+   bool has_stencil_attachment = attachment && vk_format_has_stencil(attachment->format);
+
+   if (ds_info) {
+      if (has_depth_attachment) {
+         db_depth_control = S_028800_Z_ENABLE(ds_info->depthTestEnable ? 1 : 0) |
+                            S_028800_Z_WRITE_ENABLE(ds_info->depthWriteEnable ? 1 : 0) |
+                            S_028800_ZFUNC(ds_info->depthCompareOp) |
+                            S_028800_DEPTH_BOUNDS_ENABLE(ds_info->depthBoundsTestEnable ? 1 : 0);
+      }
+
+      if (has_stencil_attachment && ds_info->stencilTestEnable) {
+         db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1);
+         db_depth_control |= S_028800_STENCILFUNC(ds_info->front.compareOp);
+         db_depth_control |= S_028800_STENCILFUNC_BF(ds_info->back.compareOp);
+      }
+   }
+
+   pipeline->graphics.db_depth_control = db_depth_control;
 }
 
 static void
-gfx9_get_gs_info(const struct radv_pipeline_key *key,
-                 const struct radv_pipeline *pipeline,
-		 nir_shader **nir,
-		 struct radv_shader_info *infos,
-		 struct gfx9_gs_info *out)
+gfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline *pipeline,
+                 nir_shader **nir, struct radv_shader_info *infos, struct gfx9_gs_info *out)
 {
-	struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
-	struct radv_es_output_info *es_info;
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
-		es_info = nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
-	else
-		es_info = nir[MESA_SHADER_TESS_CTRL] ?
-                       &infos[MESA_SHADER_TESS_EVAL].tes.es_info :
-                       &infos[MESA_SHADER_VERTEX].vs.es_info;
-
-	unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
-	bool uses_adjacency;
-	switch(key->topology) {
-	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
-		uses_adjacency = true;
-		break;
-	default:
-		uses_adjacency = false;
-		break;
-	}
-
-	/* All these are in dwords: */
-	/* We can't allow using the whole LDS, because GS waves compete with
-	 * other shader stages for LDS space. */
-	const unsigned max_lds_size = 8 * 1024;
-	const unsigned esgs_itemsize = es_info->esgs_itemsize / 4;
-	unsigned esgs_lds_size;
-
-	/* All these are per subgroup: */
-	const unsigned max_out_prims = 32 * 1024;
-	const unsigned max_es_verts = 255;
-	const unsigned ideal_gs_prims = 64;
-	unsigned max_gs_prims, gs_prims;
-	unsigned min_es_verts, es_verts, worst_case_es_verts;
-
-	if (uses_adjacency || gs_num_invocations > 1)
-		max_gs_prims = 127 / gs_num_invocations;
-	else
-		max_gs_prims = 255;
-
-	/* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations.
-	 * Make sure we don't go over the maximum value.
-	 */
-	if (gs_info->gs.vertices_out > 0) {
-		max_gs_prims = MIN2(max_gs_prims,
-				    max_out_prims /
-				    (gs_info->gs.vertices_out * gs_num_invocations));
-	}
-	assert(max_gs_prims > 0);
-
-	/* If the primitive has adjacency, halve the number of vertices
-	 * that will be reused in multiple primitives.
-	 */
-	min_es_verts = gs_info->gs.vertices_in / (uses_adjacency ? 2 : 1);
-
-	gs_prims = MIN2(ideal_gs_prims, max_gs_prims);
-	worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
-
-	/* Compute ESGS LDS size based on the worst case number of ES vertices
-	 * needed to create the target number of GS prims per subgroup.
-	 */
-	esgs_lds_size = esgs_itemsize * worst_case_es_verts;
-
-	/* If total LDS usage is too big, refactor partitions based on ratio
-	 * of ESGS item sizes.
-	 */
-	if (esgs_lds_size > max_lds_size) {
-		/* Our target GS Prims Per Subgroup was too large. Calculate
-		 * the maximum number of GS Prims Per Subgroup that will fit
-		 * into LDS, capped by the maximum that the hardware can support.
-		 */
-		gs_prims = MIN2((max_lds_size / (esgs_itemsize * min_es_verts)),
-				max_gs_prims);
-		assert(gs_prims > 0);
-		worst_case_es_verts = MIN2(min_es_verts * gs_prims,
-					   max_es_verts);
-
-		esgs_lds_size = esgs_itemsize * worst_case_es_verts;
-		assert(esgs_lds_size <= max_lds_size);
-	}
-
-	/* Now calculate remaining ESGS information. */
-	if (esgs_lds_size)
-		es_verts = MIN2(esgs_lds_size / esgs_itemsize, max_es_verts);
-	else
-		es_verts = max_es_verts;
-
-	/* Vertices for adjacency primitives are not always reused, so restore
-	 * it for ES_VERTS_PER_SUBGRP.
-	 */
-	min_es_verts = gs_info->gs.vertices_in;
-
-	/* For normal primitives, the VGT only checks if they are past the ES
-	 * verts per subgroup after allocating a full GS primitive and if they
-	 * are, kick off a new subgroup.  But if those additional ES verts are
-	 * unique (e.g. not reused) we need to make sure there is enough LDS
-	 * space to account for those ES verts beyond ES_VERTS_PER_SUBGRP.
-	 */
-	es_verts -= min_es_verts - 1;
-
-	uint32_t es_verts_per_subgroup = es_verts;
-	uint32_t gs_prims_per_subgroup = gs_prims;
-	uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
-	uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
-	out->lds_size = align(esgs_lds_size, 128) / 128;
-	out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
-	                        S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
-	                        S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
-	out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
-	out->vgt_esgs_ring_itemsize  = esgs_itemsize;
-	assert(max_prims_per_subgroup <= max_out_prims);
+   struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
+   struct radv_es_output_info *es_info;
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
+      es_info = nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
+   else
+      es_info = nir[MESA_SHADER_TESS_CTRL] ? &infos[MESA_SHADER_TESS_EVAL].tes.es_info
+                                           : &infos[MESA_SHADER_VERTEX].vs.es_info;
+
+   unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
+   bool uses_adjacency;
+   switch (key->topology) {
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+      uses_adjacency = true;
+      break;
+   default:
+      uses_adjacency = false;
+      break;
+   }
+
+   /* All these are in dwords: */
+   /* We can't allow using the whole LDS, because GS waves compete with
+    * other shader stages for LDS space. */
+   const unsigned max_lds_size = 8 * 1024;
+   const unsigned esgs_itemsize = es_info->esgs_itemsize / 4;
+   unsigned esgs_lds_size;
+
+   /* All these are per subgroup: */
+   const unsigned max_out_prims = 32 * 1024;
+   const unsigned max_es_verts = 255;
+   const unsigned ideal_gs_prims = 64;
+   unsigned max_gs_prims, gs_prims;
+   unsigned min_es_verts, es_verts, worst_case_es_verts;
+
+   if (uses_adjacency || gs_num_invocations > 1)
+      max_gs_prims = 127 / gs_num_invocations;
+   else
+      max_gs_prims = 255;
+
+   /* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations.
+    * Make sure we don't go over the maximum value.
+    */
+   if (gs_info->gs.vertices_out > 0) {
+      max_gs_prims =
+         MIN2(max_gs_prims, max_out_prims / (gs_info->gs.vertices_out * gs_num_invocations));
+   }
+   assert(max_gs_prims > 0);
+
+   /* If the primitive has adjacency, halve the number of vertices
+    * that will be reused in multiple primitives.
+    */
+   min_es_verts = gs_info->gs.vertices_in / (uses_adjacency ? 2 : 1);
+
+   gs_prims = MIN2(ideal_gs_prims, max_gs_prims);
+   worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
+
+   /* Compute ESGS LDS size based on the worst case number of ES vertices
+    * needed to create the target number of GS prims per subgroup.
+    */
+   esgs_lds_size = esgs_itemsize * worst_case_es_verts;
+
+   /* If total LDS usage is too big, refactor partitions based on ratio
+    * of ESGS item sizes.
+    */
+   if (esgs_lds_size > max_lds_size) {
+      /* Our target GS Prims Per Subgroup was too large. Calculate
+       * the maximum number of GS Prims Per Subgroup that will fit
+       * into LDS, capped by the maximum that the hardware can support.
+       */
+      gs_prims = MIN2((max_lds_size / (esgs_itemsize * min_es_verts)), max_gs_prims);
+      assert(gs_prims > 0);
+      worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
+
+      esgs_lds_size = esgs_itemsize * worst_case_es_verts;
+      assert(esgs_lds_size <= max_lds_size);
+   }
+
+   /* Now calculate remaining ESGS information. */
+   if (esgs_lds_size)
+      es_verts = MIN2(esgs_lds_size / esgs_itemsize, max_es_verts);
+   else
+      es_verts = max_es_verts;
+
+   /* Vertices for adjacency primitives are not always reused, so restore
+    * it for ES_VERTS_PER_SUBGRP.
+    */
+   min_es_verts = gs_info->gs.vertices_in;
+
+   /* For normal primitives, the VGT only checks if they are past the ES
+    * verts per subgroup after allocating a full GS primitive and if they
+    * are, kick off a new subgroup.  But if those additional ES verts are
+    * unique (e.g. not reused) we need to make sure there is enough LDS
+    * space to account for those ES verts beyond ES_VERTS_PER_SUBGRP.
+    */
+   es_verts -= min_es_verts - 1;
+
+   uint32_t es_verts_per_subgroup = es_verts;
+   uint32_t gs_prims_per_subgroup = gs_prims;
+   uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
+   uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
+   out->lds_size = align(esgs_lds_size, 128) / 128;
+   out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
+                             S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
+                             S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
+   out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
+   out->vgt_esgs_ring_itemsize = esgs_itemsize;
+   assert(max_prims_per_subgroup <= max_out_prims);
 }
 
-static void clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts,
-				     unsigned min_verts_per_prim, bool use_adjacency)
+static void
+clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts, unsigned min_verts_per_prim,
+                         bool use_adjacency)
 {
-	unsigned max_reuse = max_esverts - min_verts_per_prim;
-	if (use_adjacency)
-		max_reuse /= 2;
-	*max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
+   unsigned max_reuse = max_esverts - min_verts_per_prim;
+   if (use_adjacency)
+      max_reuse /= 2;
+   *max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
 }
 
 static unsigned
 radv_get_num_input_vertices(nir_shader **nir)
 {
-	if (nir[MESA_SHADER_GEOMETRY]) {
-		nir_shader *gs = nir[MESA_SHADER_GEOMETRY];
+   if (nir[MESA_SHADER_GEOMETRY]) {
+      nir_shader *gs = nir[MESA_SHADER_GEOMETRY];
 
-		return gs->info.gs.vertices_in;
-	}
+      return gs->info.gs.vertices_in;
+   }
 
-	if (nir[MESA_SHADER_TESS_CTRL]) {
-		nir_shader *tes = nir[MESA_SHADER_TESS_EVAL];
+   if (nir[MESA_SHADER_TESS_CTRL]) {
+      nir_shader *tes = nir[MESA_SHADER_TESS_EVAL];
 
-		if (tes->info.tess.point_mode)
-			return 1;
-		if (tes->info.tess.primitive_mode == GL_ISOLINES)
-			return 2;
-		return 3;
-	}
+      if (tes->info.tess.point_mode)
+         return 1;
+      if (tes->info.tess.primitive_mode == GL_ISOLINES)
+         return 2;
+      return 3;
+   }
 
-	return 3;
+   return 3;
 }
 
 static void
-gfx10_get_ngg_info(const struct radv_pipeline_key *key,
-		   struct radv_pipeline *pipeline,
-		   nir_shader **nir,
-		   struct radv_shader_info *infos,
-		   struct gfx10_ngg_info *ngg)
+gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline,
+                   nir_shader **nir, struct radv_shader_info *infos, struct gfx10_ngg_info *ngg)
 {
-	struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
-	struct radv_es_output_info *es_info =
-		nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
-	unsigned gs_type = nir[MESA_SHADER_GEOMETRY] ? MESA_SHADER_GEOMETRY : MESA_SHADER_VERTEX;
-	unsigned max_verts_per_prim = radv_get_num_input_vertices(nir);
-	unsigned min_verts_per_prim =
-		gs_type == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1;
-	unsigned gs_num_invocations = nir[MESA_SHADER_GEOMETRY] ? MAX2(gs_info->gs.invocations, 1) : 1;
-	bool uses_adjacency;
-	switch(key->topology) {
-	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
-		uses_adjacency = true;
-		break;
-	default:
-		uses_adjacency = false;
-		break;
-	}
-
-	/* All these are in dwords: */
-	/* We can't allow using the whole LDS, because GS waves compete with
-	 * other shader stages for LDS space.
-	 *
-	 * TODO: We should really take the shader's internal LDS use into
-	 *       account. The linker will fail if the size is greater than
-	 *       8K dwords.
-	 */
-	const unsigned max_lds_size = 8 * 1024 - 768;
-	const unsigned target_lds_size = max_lds_size;
-	unsigned esvert_lds_size = 0;
-	unsigned gsprim_lds_size = 0;
-
-	/* All these are per subgroup: */
-	const unsigned min_esverts = pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 29 : 24;
-	bool max_vert_out_per_gs_instance = false;
-	unsigned max_esverts_base = 256;
-	unsigned max_gsprims_base = 128; /* default prim group size clamp */
-
-	/* Hardware has the following non-natural restrictions on the value
-	 * of GE_CNTL.VERT_GRP_SIZE based on based on the primitive type of
-	 * the draw:
-	 *  - at most 252 for any line input primitive type
-	 *  - at most 251 for any quad input primitive type
-	 *  - at most 251 for triangle strips with adjacency (this happens to
-	 *    be the natural limit for triangle *lists* with adjacency)
-	 */
-	max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
-
-	if (gs_type == MESA_SHADER_GEOMETRY) {
-		unsigned max_out_verts_per_gsprim =
-			gs_info->gs.vertices_out * gs_num_invocations;
-
-		if (max_out_verts_per_gsprim <= 256) {
-			if (max_out_verts_per_gsprim) {
-				max_gsprims_base = MIN2(max_gsprims_base,
-							256 / max_out_verts_per_gsprim);
-			}
-		} else {
-			/* Use special multi-cycling mode in which each GS
-			 * instance gets its own subgroup. Does not work with
-			 * tessellation. */
-			max_vert_out_per_gs_instance = true;
-			max_gsprims_base = 1;
-			max_out_verts_per_gsprim = gs_info->gs.vertices_out;
-		}
-
-		esvert_lds_size = es_info->esgs_itemsize / 4;
-		gsprim_lds_size = (gs_info->gs.gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
-	} else {
-		/* VS and TES. */
-		/* LDS size for passing data from GS to ES. */
-		struct radv_streamout_info *so_info = nir[MESA_SHADER_TESS_CTRL]
-			? &infos[MESA_SHADER_TESS_EVAL].so
-			: &infos[MESA_SHADER_VERTEX].so;
-
-		if (so_info->num_outputs)
-			esvert_lds_size = 4 * so_info->num_outputs + 1;
-
-		/* GS stores Primitive IDs (one DWORD) into LDS at the address
-		 * corresponding to the ES thread of the provoking vertex. All
-		 * ES threads load and export PrimitiveID for their thread.
-		 */
-		if (!nir[MESA_SHADER_TESS_CTRL] &&
-		    infos[MESA_SHADER_VERTEX].vs.outinfo.export_prim_id)
-			esvert_lds_size = MAX2(esvert_lds_size, 1);
-	}
-
-	unsigned max_gsprims = max_gsprims_base;
-	unsigned max_esverts = max_esverts_base;
-
-	if (esvert_lds_size)
-		max_esverts = MIN2(max_esverts, target_lds_size / esvert_lds_size);
-	if (gsprim_lds_size)
-		max_gsprims = MIN2(max_gsprims, target_lds_size / gsprim_lds_size);
-
-	max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
-	clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
-	assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
-
-	if (esvert_lds_size || gsprim_lds_size) {
-		/* Now that we have a rough proportionality between esverts
-		 * and gsprims based on the primitive type, scale both of them
-		 * down simultaneously based on required LDS space.
-		 *
-		 * We could be smarter about this if we knew how much vertex
-		 * reuse to expect.
-		 */
-		unsigned lds_total = max_esverts * esvert_lds_size +
-				     max_gsprims * gsprim_lds_size;
-		if (lds_total > target_lds_size) {
-			max_esverts = max_esverts * target_lds_size / lds_total;
-			max_gsprims = max_gsprims * target_lds_size / lds_total;
-
-			max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
-			clamp_gsprims_to_esverts(&max_gsprims, max_esverts,
-						 min_verts_per_prim, uses_adjacency);
-			assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
-		}
-	}
-
-	/* Round up towards full wave sizes for better ALU utilization. */
-	if (!max_vert_out_per_gs_instance) {
-		unsigned orig_max_esverts;
-		unsigned orig_max_gsprims;
-		unsigned wavesize;
-
-		if (gs_type == MESA_SHADER_GEOMETRY) {
-			wavesize = gs_info->wave_size;
-		} else {
-			wavesize = nir[MESA_SHADER_TESS_CTRL]
-				? infos[MESA_SHADER_TESS_EVAL].wave_size
-				: infos[MESA_SHADER_VERTEX].wave_size;
-		}
-
-		do {
-			orig_max_esverts = max_esverts;
-			orig_max_gsprims = max_gsprims;
-
-			max_esverts = align(max_esverts, wavesize);
-			max_esverts = MIN2(max_esverts, max_esverts_base);
-			if (esvert_lds_size)
-				max_esverts = MIN2(max_esverts,
-						   (max_lds_size - max_gsprims * gsprim_lds_size) /
-						   esvert_lds_size);
-			max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
-
-			/* Hardware restriction: minimum value of max_esverts */
-			if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
-				max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
-			else
-				max_esverts = MAX2(max_esverts, min_esverts);
-
-			max_gsprims = align(max_gsprims, wavesize);
-			max_gsprims = MIN2(max_gsprims, max_gsprims_base);
-			if (gsprim_lds_size) {
-				/* Don't count unusable vertices to the LDS
-				 * size. Those are vertices above the maximum
-				 * number of vertices that can occur in the
-				 * workgroup, which is e.g. max_gsprims * 3
-				 * for triangles.
-				 */
-				unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
-				max_gsprims =
-					MIN2(max_gsprims, (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size);
-			}
-			clamp_gsprims_to_esverts(&max_gsprims, max_esverts,
-						 min_verts_per_prim, uses_adjacency);
-			assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
-		} while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
-
-		/* Verify the restriction. */
-		if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
-			assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
-		else
-			assert(max_esverts >= min_esverts);
-	} else {
-		/* Hardware restriction: minimum value of max_esverts */
-		if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
-			max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
-		else
-			max_esverts = MAX2(max_esverts, min_esverts);
-	}
-
-	unsigned max_out_vertices =
-		max_vert_out_per_gs_instance ? gs_info->gs.vertices_out :
-		gs_type == MESA_SHADER_GEOMETRY ?
-		max_gsprims * gs_num_invocations * gs_info->gs.vertices_out :
-		max_esverts;
-	assert(max_out_vertices <= 256);
-
-	unsigned prim_amp_factor = 1;
-	if (gs_type == MESA_SHADER_GEOMETRY) {
-		/* Number of output primitives per GS input primitive after
-		 * GS instancing. */
-		prim_amp_factor = gs_info->gs.vertices_out;
-	}
-
-	/* On Gfx10, the GE only checks against the maximum number of ES verts
-	 * after allocating a full GS primitive. So we need to ensure that
-	 * whenever this check passes, there is enough space for a full
-	 * primitive without vertex reuse.
-	 */
-	if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
-		ngg->hw_max_esverts = max_esverts - max_verts_per_prim + 1;
-	else
-		ngg->hw_max_esverts = max_esverts;
-
-	ngg->max_gsprims = max_gsprims;
-	ngg->max_out_verts = max_out_vertices;
-	ngg->prim_amp_factor = prim_amp_factor;
-	ngg->max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
-	ngg->ngg_emit_size = max_gsprims * gsprim_lds_size;
-
-	/* Don't count unusable vertices. */
-	ngg->esgs_ring_size =
-		MIN2(max_esverts, max_gsprims * max_verts_per_prim) * esvert_lds_size * 4;
-
-	if (gs_type == MESA_SHADER_GEOMETRY) {
-		ngg->vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4;
-	} else {
-		ngg->vgt_esgs_ring_itemsize = 1;
-	}
-
-	assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */
+   struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
+   struct radv_es_output_info *es_info =
+      nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
+   unsigned gs_type = nir[MESA_SHADER_GEOMETRY] ? MESA_SHADER_GEOMETRY : MESA_SHADER_VERTEX;
+   unsigned max_verts_per_prim = radv_get_num_input_vertices(nir);
+   unsigned min_verts_per_prim = gs_type == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1;
+   unsigned gs_num_invocations = nir[MESA_SHADER_GEOMETRY] ? MAX2(gs_info->gs.invocations, 1) : 1;
+   bool uses_adjacency;
+   switch (key->topology) {
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+      uses_adjacency = true;
+      break;
+   default:
+      uses_adjacency = false;
+      break;
+   }
+
+   /* All these are in dwords: */
+   /* We can't allow using the whole LDS, because GS waves compete with
+    * other shader stages for LDS space.
+    *
+    * TODO: We should really take the shader's internal LDS use into
+    *       account. The linker will fail if the size is greater than
+    *       8K dwords.
+    */
+   const unsigned max_lds_size = 8 * 1024 - 768;
+   const unsigned target_lds_size = max_lds_size;
+   unsigned esvert_lds_size = 0;
+   unsigned gsprim_lds_size = 0;
+
+   /* All these are per subgroup: */
+   const unsigned min_esverts =
+      pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 29 : 24;
+   bool max_vert_out_per_gs_instance = false;
+   unsigned max_esverts_base = 256;
+   unsigned max_gsprims_base = 128; /* default prim group size clamp */
+
+   /* Hardware has the following non-natural restrictions on the value
+    * of GE_CNTL.VERT_GRP_SIZE based on based on the primitive type of
+    * the draw:
+    *  - at most 252 for any line input primitive type
+    *  - at most 251 for any quad input primitive type
+    *  - at most 251 for triangle strips with adjacency (this happens to
+    *    be the natural limit for triangle *lists* with adjacency)
+    */
+   max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
+
+   if (gs_type == MESA_SHADER_GEOMETRY) {
+      unsigned max_out_verts_per_gsprim = gs_info->gs.vertices_out * gs_num_invocations;
+
+      if (max_out_verts_per_gsprim <= 256) {
+         if (max_out_verts_per_gsprim) {
+            max_gsprims_base = MIN2(max_gsprims_base, 256 / max_out_verts_per_gsprim);
+         }
+      } else {
+         /* Use special multi-cycling mode in which each GS
+          * instance gets its own subgroup. Does not work with
+          * tessellation. */
+         max_vert_out_per_gs_instance = true;
+         max_gsprims_base = 1;
+         max_out_verts_per_gsprim = gs_info->gs.vertices_out;
+      }
+
+      esvert_lds_size = es_info->esgs_itemsize / 4;
+      gsprim_lds_size = (gs_info->gs.gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
+   } else {
+      /* VS and TES. */
+      /* LDS size for passing data from GS to ES. */
+      struct radv_streamout_info *so_info = nir[MESA_SHADER_TESS_CTRL]
+                                               ? &infos[MESA_SHADER_TESS_EVAL].so
+                                               : &infos[MESA_SHADER_VERTEX].so;
+
+      if (so_info->num_outputs)
+         esvert_lds_size = 4 * so_info->num_outputs + 1;
+
+      /* GS stores Primitive IDs (one DWORD) into LDS at the address
+       * corresponding to the ES thread of the provoking vertex. All
+       * ES threads load and export PrimitiveID for their thread.
+       */
+      if (!nir[MESA_SHADER_TESS_CTRL] && infos[MESA_SHADER_VERTEX].vs.outinfo.export_prim_id)
+         esvert_lds_size = MAX2(esvert_lds_size, 1);
+   }
+
+   unsigned max_gsprims = max_gsprims_base;
+   unsigned max_esverts = max_esverts_base;
+
+   if (esvert_lds_size)
+      max_esverts = MIN2(max_esverts, target_lds_size / esvert_lds_size);
+   if (gsprim_lds_size)
+      max_gsprims = MIN2(max_gsprims, target_lds_size / gsprim_lds_size);
+
+   max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+   clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
+   assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
+
+   if (esvert_lds_size || gsprim_lds_size) {
+      /* Now that we have a rough proportionality between esverts
+       * and gsprims based on the primitive type, scale both of them
+       * down simultaneously based on required LDS space.
+       *
+       * We could be smarter about this if we knew how much vertex
+       * reuse to expect.
+       */
+      unsigned lds_total = max_esverts * esvert_lds_size + max_gsprims * gsprim_lds_size;
+      if (lds_total > target_lds_size) {
+         max_esverts = max_esverts * target_lds_size / lds_total;
+         max_gsprims = max_gsprims * target_lds_size / lds_total;
+
+         max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+         clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
+         assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
+      }
+   }
+
+   /* Round up towards full wave sizes for better ALU utilization. */
+   if (!max_vert_out_per_gs_instance) {
+      unsigned orig_max_esverts;
+      unsigned orig_max_gsprims;
+      unsigned wavesize;
+
+      if (gs_type == MESA_SHADER_GEOMETRY) {
+         wavesize = gs_info->wave_size;
+      } else {
+         wavesize = nir[MESA_SHADER_TESS_CTRL] ? infos[MESA_SHADER_TESS_EVAL].wave_size
+                                               : infos[MESA_SHADER_VERTEX].wave_size;
+      }
+
+      do {
+         orig_max_esverts = max_esverts;
+         orig_max_gsprims = max_gsprims;
+
+         max_esverts = align(max_esverts, wavesize);
+         max_esverts = MIN2(max_esverts, max_esverts_base);
+         if (esvert_lds_size)
+            max_esverts =
+               MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size);
+         max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+
+         /* Hardware restriction: minimum value of max_esverts */
+         if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
+            max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+         else
+            max_esverts = MAX2(max_esverts, min_esverts);
+
+         max_gsprims = align(max_gsprims, wavesize);
+         max_gsprims = MIN2(max_gsprims, max_gsprims_base);
+         if (gsprim_lds_size) {
+            /* Don't count unusable vertices to the LDS
+             * size. Those are vertices above the maximum
+             * number of vertices that can occur in the
+             * workgroup, which is e.g. max_gsprims * 3
+             * for triangles.
+             */
+            unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+            max_gsprims = MIN2(max_gsprims,
+                               (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size);
+         }
+         clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
+         assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
+      } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
+
+      /* Verify the restriction. */
+      if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
+         assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
+      else
+         assert(max_esverts >= min_esverts);
+   } else {
+      /* Hardware restriction: minimum value of max_esverts */
+      if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
+         max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+      else
+         max_esverts = MAX2(max_esverts, min_esverts);
+   }
+
+   unsigned max_out_vertices = max_vert_out_per_gs_instance ? gs_info->gs.vertices_out
+                               : gs_type == MESA_SHADER_GEOMETRY
+                                  ? max_gsprims * gs_num_invocations * gs_info->gs.vertices_out
+                                  : max_esverts;
+   assert(max_out_vertices <= 256);
+
+   unsigned prim_amp_factor = 1;
+   if (gs_type == MESA_SHADER_GEOMETRY) {
+      /* Number of output primitives per GS input primitive after
+       * GS instancing. */
+      prim_amp_factor = gs_info->gs.vertices_out;
+   }
+
+   /* On Gfx10, the GE only checks against the maximum number of ES verts
+    * after allocating a full GS primitive. So we need to ensure that
+    * whenever this check passes, there is enough space for a full
+    * primitive without vertex reuse.
+    */
+   if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
+      ngg->hw_max_esverts = max_esverts - max_verts_per_prim + 1;
+   else
+      ngg->hw_max_esverts = max_esverts;
+
+   ngg->max_gsprims = max_gsprims;
+   ngg->max_out_verts = max_out_vertices;
+   ngg->prim_amp_factor = prim_amp_factor;
+   ngg->max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
+   ngg->ngg_emit_size = max_gsprims * gsprim_lds_size;
+
+   /* Don't count unusable vertices. */
+   ngg->esgs_ring_size = MIN2(max_esverts, max_gsprims * max_verts_per_prim) * esvert_lds_size * 4;
+
+   if (gs_type == MESA_SHADER_GEOMETRY) {
+      ngg->vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4;
+   } else {
+      ngg->vgt_esgs_ring_itemsize = 1;
+   }
+
+   assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */
 }
 
 static void
-radv_pipeline_init_gs_ring_state(struct radv_pipeline *pipeline,
-				 const struct gfx9_gs_info *gs)
+radv_pipeline_init_gs_ring_state(struct radv_pipeline *pipeline, const struct gfx9_gs_info *gs)
 {
-	struct radv_device *device = pipeline->device;
-	unsigned num_se = device->physical_device->rad_info.max_se;
-	unsigned wave_size = 64;
-	unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
-	/* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
-	 * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
-	 */
-	unsigned gs_vertex_reuse =
-		(device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se;
-	unsigned alignment = 256 * num_se;
-	/* The maximum size is 63.999 MB per SE. */
-	unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
-	struct radv_shader_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
-
-	/* Calculate the minimum size. */
-	unsigned min_esgs_ring_size = align(gs->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse *
-					    wave_size, alignment);
-	/* These are recommended sizes, not minimum sizes. */
-	unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
-		gs->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in;
-	unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
-		gs_info->gs.max_gsvs_emit_size;
-
-	min_esgs_ring_size = align(min_esgs_ring_size, alignment);
-	esgs_ring_size = align(esgs_ring_size, alignment);
-	gsvs_ring_size = align(gsvs_ring_size, alignment);
-
-	if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
-		pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
-
-	pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
+   struct radv_device *device = pipeline->device;
+   unsigned num_se = device->physical_device->rad_info.max_se;
+   unsigned wave_size = 64;
+   unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
+   /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
+    * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
+    */
+   unsigned gs_vertex_reuse =
+      (device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se;
+   unsigned alignment = 256 * num_se;
+   /* The maximum size is 63.999 MB per SE. */
+   unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
+   struct radv_shader_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
+
+   /* Calculate the minimum size. */
+   unsigned min_esgs_ring_size =
+      align(gs->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse * wave_size, alignment);
+   /* These are recommended sizes, not minimum sizes. */
+   unsigned esgs_ring_size =
+      max_gs_waves * 2 * wave_size * gs->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in;
+   unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs_info->gs.max_gsvs_emit_size;
+
+   min_esgs_ring_size = align(min_esgs_ring_size, alignment);
+   esgs_ring_size = align(esgs_ring_size, alignment);
+   gsvs_ring_size = align(gsvs_ring_size, alignment);
+
+   if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
+      pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
+
+   pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
 }
 
 struct radv_shader_variant *
-radv_get_shader(const struct radv_pipeline *pipeline,
-		gl_shader_stage stage)
+radv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage)
 {
-	if (stage == MESA_SHADER_VERTEX) {
-		if (pipeline->shaders[MESA_SHADER_VERTEX])
-			return pipeline->shaders[MESA_SHADER_VERTEX];
-		if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
-			return pipeline->shaders[MESA_SHADER_TESS_CTRL];
-		if (pipeline->shaders[MESA_SHADER_GEOMETRY])
-			return pipeline->shaders[MESA_SHADER_GEOMETRY];
-	} else if (stage == MESA_SHADER_TESS_EVAL) {
-		if (!radv_pipeline_has_tess(pipeline))
-			return NULL;
-		if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
-			return pipeline->shaders[MESA_SHADER_TESS_EVAL];
-		if (pipeline->shaders[MESA_SHADER_GEOMETRY])
-			return pipeline->shaders[MESA_SHADER_GEOMETRY];
-	}
-	return pipeline->shaders[stage];
+   if (stage == MESA_SHADER_VERTEX) {
+      if (pipeline->shaders[MESA_SHADER_VERTEX])
+         return pipeline->shaders[MESA_SHADER_VERTEX];
+      if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
+         return pipeline->shaders[MESA_SHADER_TESS_CTRL];
+      if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+         return pipeline->shaders[MESA_SHADER_GEOMETRY];
+   } else if (stage == MESA_SHADER_TESS_EVAL) {
+      if (!radv_pipeline_has_tess(pipeline))
+         return NULL;
+      if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+         return pipeline->shaders[MESA_SHADER_TESS_EVAL];
+      if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+         return pipeline->shaders[MESA_SHADER_GEOMETRY];
+   }
+   return pipeline->shaders[stage];
 }
 
-static const struct radv_vs_output_info *get_vs_output_info(const struct radv_pipeline *pipeline)
+static const struct radv_vs_output_info *
+get_vs_output_info(const struct radv_pipeline *pipeline)
 {
-	if (radv_pipeline_has_gs(pipeline))
-		if (radv_pipeline_has_ngg(pipeline))
-			return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.vs.outinfo;
-		else
-			return &pipeline->gs_copy_shader->info.vs.outinfo;
-	else if (radv_pipeline_has_tess(pipeline))
-		return &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.outinfo;
-	else
-		return &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.outinfo;
+   if (radv_pipeline_has_gs(pipeline))
+      if (radv_pipeline_has_ngg(pipeline))
+         return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.vs.outinfo;
+      else
+         return &pipeline->gs_copy_shader->info.vs.outinfo;
+   else if (radv_pipeline_has_tess(pipeline))
+      return &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.outinfo;
+   else
+      return &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.outinfo;
 }
 
 static void
 radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders,
-		  bool optimize_conservatively)
+                  bool optimize_conservatively)
 {
-	nir_shader* ordered_shaders[MESA_SHADER_STAGES];
-	int shader_count = 0;
-
-	if(shaders[MESA_SHADER_FRAGMENT]) {
-		ordered_shaders[shader_count++] = shaders[MESA_SHADER_FRAGMENT];
-	}
-	if(shaders[MESA_SHADER_GEOMETRY]) {
-		ordered_shaders[shader_count++] = shaders[MESA_SHADER_GEOMETRY];
-	}
-	if(shaders[MESA_SHADER_TESS_EVAL]) {
-		ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_EVAL];
-	}
-	if(shaders[MESA_SHADER_TESS_CTRL]) {
-		ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_CTRL];
-	}
-	if(shaders[MESA_SHADER_VERTEX]) {
-		ordered_shaders[shader_count++] = shaders[MESA_SHADER_VERTEX];
-	}
-	if(shaders[MESA_SHADER_COMPUTE]) {
-		ordered_shaders[shader_count++] = shaders[MESA_SHADER_COMPUTE];
-	}
-
-	bool has_geom_tess = shaders[MESA_SHADER_GEOMETRY] || shaders[MESA_SHADER_TESS_CTRL];
-	bool merged_gs = shaders[MESA_SHADER_GEOMETRY] &&
-			 pipeline->device->physical_device->rad_info.chip_class >= GFX9;
-
-	if (!optimize_conservatively && shader_count > 1) {
-		unsigned first = ordered_shaders[shader_count - 1]->info.stage;
-		unsigned last = ordered_shaders[0]->info.stage;
-
-		if (ordered_shaders[0]->info.stage == MESA_SHADER_FRAGMENT &&
-		    ordered_shaders[1]->info.has_transform_feedback_varyings)
-			nir_link_xfb_varyings(ordered_shaders[1], ordered_shaders[0]);
-
-		for (int i = 1; i < shader_count; ++i) {
-			nir_lower_io_arrays_to_elements(ordered_shaders[i],
-							ordered_shaders[i - 1]);
-		}
-
-		for (int i = 0; i < shader_count; ++i)  {
-			nir_variable_mode mask = 0;
-
-			if (ordered_shaders[i]->info.stage != first)
-				mask = mask | nir_var_shader_in;
-
-			if (ordered_shaders[i]->info.stage != last)
-				mask = mask | nir_var_shader_out;
-
-			if (nir_lower_io_to_scalar_early(ordered_shaders[i], mask)) {
-				/* Optimize the new vector code and then remove dead vars */
-				nir_copy_prop(ordered_shaders[i]);
-				nir_opt_shrink_vectors(ordered_shaders[i],
-						       !pipeline->device->instance->disable_shrink_image_store);
-
-		                if (ordered_shaders[i]->info.stage != last) {
-					/* Optimize swizzled movs of load_const for
-					 * nir_link_opt_varyings's constant propagation
-					 */
-				        nir_opt_constant_folding(ordered_shaders[i]);
-				        /* For nir_link_opt_varyings's duplicate input opt */
-				        nir_opt_cse(ordered_shaders[i]);
-				}
-
-				/* Run copy-propagation to help remove dead
-				 * output variables (some shaders have useless
-				 * copies to/from an output), so compaction
-				 * later will be more effective.
-				 *
-				 * This will have been done earlier but it might
-				 * not have worked because the outputs were vector.
-				 */
-				if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL)
-					nir_opt_copy_prop_vars(ordered_shaders[i]);
-
-				nir_opt_dce(ordered_shaders[i]);
-				nir_remove_dead_variables(ordered_shaders[i],
-							  nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
-			}
-		}
-	}
-
-	for (int i = 1; !optimize_conservatively && (i < shader_count); ++i)  {
-		if (nir_link_opt_varyings(ordered_shaders[i], ordered_shaders[i - 1])) {
-			nir_opt_constant_folding(ordered_shaders[i - 1]);
-			nir_opt_algebraic(ordered_shaders[i - 1]);
-			nir_opt_dce(ordered_shaders[i - 1]);
-		}
-
-		nir_remove_dead_variables(ordered_shaders[i],
-					  nir_var_shader_out, NULL);
-		nir_remove_dead_variables(ordered_shaders[i - 1],
-					  nir_var_shader_in, NULL);
-
-		bool progress = nir_remove_unused_varyings(ordered_shaders[i],
-							   ordered_shaders[i - 1]);
-
-		nir_compact_varyings(ordered_shaders[i],
-				     ordered_shaders[i - 1], true);
-
-		if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL ||
-		    (ordered_shaders[i]->info.stage == MESA_SHADER_VERTEX && has_geom_tess) ||
-		    (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) {
-			nir_lower_io_to_vector(ordered_shaders[i], nir_var_shader_out);
-			if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL)
-				nir_vectorize_tess_levels(ordered_shaders[i]);
-			nir_opt_combine_stores(ordered_shaders[i], nir_var_shader_out);
-		}
-		if (ordered_shaders[i - 1]->info.stage == MESA_SHADER_GEOMETRY ||
-		    ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_CTRL ||
-		    ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_EVAL) {
-			nir_lower_io_to_vector(ordered_shaders[i - 1], nir_var_shader_in);
-		}
-
-		if (progress) {
-			if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
-				ac_lower_indirect_derefs(ordered_shaders[i],
-				                         pipeline->device->physical_device->rad_info.chip_class);
-				/* remove dead writes, which can remove input loads */
-				nir_lower_vars_to_ssa(ordered_shaders[i]);
-				nir_opt_dce(ordered_shaders[i]);
-			}
-
-			if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
-				ac_lower_indirect_derefs(ordered_shaders[i - 1],
-				                         pipeline->device->physical_device->rad_info.chip_class);
-			}
-		}
-	}
+   nir_shader *ordered_shaders[MESA_SHADER_STAGES];
+   int shader_count = 0;
+
+   if (shaders[MESA_SHADER_FRAGMENT]) {
+      ordered_shaders[shader_count++] = shaders[MESA_SHADER_FRAGMENT];
+   }
+   if (shaders[MESA_SHADER_GEOMETRY]) {
+      ordered_shaders[shader_count++] = shaders[MESA_SHADER_GEOMETRY];
+   }
+   if (shaders[MESA_SHADER_TESS_EVAL]) {
+      ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_EVAL];
+   }
+   if (shaders[MESA_SHADER_TESS_CTRL]) {
+      ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_CTRL];
+   }
+   if (shaders[MESA_SHADER_VERTEX]) {
+      ordered_shaders[shader_count++] = shaders[MESA_SHADER_VERTEX];
+   }
+   if (shaders[MESA_SHADER_COMPUTE]) {
+      ordered_shaders[shader_count++] = shaders[MESA_SHADER_COMPUTE];
+   }
+
+   bool has_geom_tess = shaders[MESA_SHADER_GEOMETRY] || shaders[MESA_SHADER_TESS_CTRL];
+   bool merged_gs = shaders[MESA_SHADER_GEOMETRY] &&
+                    pipeline->device->physical_device->rad_info.chip_class >= GFX9;
+
+   if (!optimize_conservatively && shader_count > 1) {
+      unsigned first = ordered_shaders[shader_count - 1]->info.stage;
+      unsigned last = ordered_shaders[0]->info.stage;
+
+      if (ordered_shaders[0]->info.stage == MESA_SHADER_FRAGMENT &&
+          ordered_shaders[1]->info.has_transform_feedback_varyings)
+         nir_link_xfb_varyings(ordered_shaders[1], ordered_shaders[0]);
+
+      for (int i = 1; i < shader_count; ++i) {
+         nir_lower_io_arrays_to_elements(ordered_shaders[i], ordered_shaders[i - 1]);
+      }
+
+      for (int i = 0; i < shader_count; ++i) {
+         nir_variable_mode mask = 0;
+
+         if (ordered_shaders[i]->info.stage != first)
+            mask = mask | nir_var_shader_in;
+
+         if (ordered_shaders[i]->info.stage != last)
+            mask = mask | nir_var_shader_out;
+
+         if (nir_lower_io_to_scalar_early(ordered_shaders[i], mask)) {
+            /* Optimize the new vector code and then remove dead vars */
+            nir_copy_prop(ordered_shaders[i]);
+            nir_opt_shrink_vectors(ordered_shaders[i],
+                                   !pipeline->device->instance->disable_shrink_image_store);
+
+            if (ordered_shaders[i]->info.stage != last) {
+               /* Optimize swizzled movs of load_const for
+                * nir_link_opt_varyings's constant propagation
+                */
+               nir_opt_constant_folding(ordered_shaders[i]);
+               /* For nir_link_opt_varyings's duplicate input opt */
+               nir_opt_cse(ordered_shaders[i]);
+            }
+
+            /* Run copy-propagation to help remove dead
+             * output variables (some shaders have useless
+             * copies to/from an output), so compaction
+             * later will be more effective.
+             *
+             * This will have been done earlier but it might
+             * not have worked because the outputs were vector.
+             */
+            if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL)
+               nir_opt_copy_prop_vars(ordered_shaders[i]);
+
+            nir_opt_dce(ordered_shaders[i]);
+            nir_remove_dead_variables(
+               ordered_shaders[i], nir_var_function_temp | nir_var_shader_in | nir_var_shader_out,
+               NULL);
+         }
+      }
+   }
+
+   for (int i = 1; !optimize_conservatively && (i < shader_count); ++i) {
+      if (nir_link_opt_varyings(ordered_shaders[i], ordered_shaders[i - 1])) {
+         nir_opt_constant_folding(ordered_shaders[i - 1]);
+         nir_opt_algebraic(ordered_shaders[i - 1]);
+         nir_opt_dce(ordered_shaders[i - 1]);
+      }
+
+      nir_remove_dead_variables(ordered_shaders[i], nir_var_shader_out, NULL);
+      nir_remove_dead_variables(ordered_shaders[i - 1], nir_var_shader_in, NULL);
+
+      bool progress = nir_remove_unused_varyings(ordered_shaders[i], ordered_shaders[i - 1]);
+
+      nir_compact_varyings(ordered_shaders[i], ordered_shaders[i - 1], true);
+
+      if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL ||
+          (ordered_shaders[i]->info.stage == MESA_SHADER_VERTEX && has_geom_tess) ||
+          (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) {
+         nir_lower_io_to_vector(ordered_shaders[i], nir_var_shader_out);
+         if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL)
+            nir_vectorize_tess_levels(ordered_shaders[i]);
+         nir_opt_combine_stores(ordered_shaders[i], nir_var_shader_out);
+      }
+      if (ordered_shaders[i - 1]->info.stage == MESA_SHADER_GEOMETRY ||
+          ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_CTRL ||
+          ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_EVAL) {
+         nir_lower_io_to_vector(ordered_shaders[i - 1], nir_var_shader_in);
+      }
+
+      if (progress) {
+         if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
+            ac_lower_indirect_derefs(ordered_shaders[i],
+                                     pipeline->device->physical_device->rad_info.chip_class);
+            /* remove dead writes, which can remove input loads */
+            nir_lower_vars_to_ssa(ordered_shaders[i]);
+            nir_opt_dce(ordered_shaders[i]);
+         }
+
+         if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
+            ac_lower_indirect_derefs(ordered_shaders[i - 1],
+                                     pipeline->device->physical_device->rad_info.chip_class);
+         }
+      }
+   }
 }
 
 static void
 radv_set_driver_locations(struct radv_pipeline *pipeline, nir_shader **shaders,
-                              struct radv_shader_info infos[MESA_SHADER_STAGES])
+                          struct radv_shader_info infos[MESA_SHADER_STAGES])
 {
-	if (shaders[MESA_SHADER_FRAGMENT]) {
-		nir_foreach_shader_out_variable(var, shaders[MESA_SHADER_FRAGMENT])
-		{
-			var->data.driver_location = var->data.location + var->data.index;
-		}
-	}
-
-	if (!shaders[MESA_SHADER_VERTEX])
-		return;
-
-	bool has_tess = shaders[MESA_SHADER_TESS_CTRL];
-	bool has_gs = shaders[MESA_SHADER_GEOMETRY];
-
-	/* Merged stage for VS and TES */
-	unsigned vs_info_idx = MESA_SHADER_VERTEX;
-	unsigned tes_info_idx = MESA_SHADER_TESS_EVAL;
-
-	/* Which stage is the last in the vertex, tess, geometry pipeline */
-	unsigned last_vtg_stage = MESA_SHADER_VERTEX;
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
-		/* These are merged into the next stage */
-		vs_info_idx = has_tess ? MESA_SHADER_TESS_CTRL : MESA_SHADER_GEOMETRY;
-		tes_info_idx = has_gs ? MESA_SHADER_GEOMETRY : MESA_SHADER_TESS_EVAL;
-	}
-
-	nir_foreach_shader_in_variable(var, shaders[MESA_SHADER_VERTEX]) {
-		var->data.driver_location = var->data.location;
-	}
-
-	if (has_tess) {
-		nir_linked_io_var_info vs2tcs =
-			nir_assign_linked_io_var_locations(shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_TESS_CTRL]);
-		nir_linked_io_var_info tcs2tes =
-			nir_assign_linked_io_var_locations(shaders[MESA_SHADER_TESS_CTRL], shaders[MESA_SHADER_TESS_EVAL]);
-
-		infos[MESA_SHADER_VERTEX].vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
-		infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs = vs2tcs.num_linked_io_vars;
-		infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs = tcs2tes.num_linked_io_vars;
-		infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs = tcs2tes.num_linked_patch_io_vars;
-		infos[MESA_SHADER_TESS_EVAL].tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
-		infos[MESA_SHADER_TESS_EVAL].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars;
-
-		/* Copy data to merged stage */
-		infos[vs_info_idx].vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
-		infos[tes_info_idx].tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
-		infos[tes_info_idx].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars;
-
-		if (has_gs) {
-			nir_linked_io_var_info tes2gs =
-				nir_assign_linked_io_var_locations(shaders[MESA_SHADER_TESS_EVAL], shaders[MESA_SHADER_GEOMETRY]);
-
-			infos[MESA_SHADER_TESS_EVAL].tes.num_linked_outputs = tes2gs.num_linked_io_vars;
-			infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = tes2gs.num_linked_io_vars;
-
-			/* Copy data to merged stage */
-			infos[tes_info_idx].tes.num_linked_outputs = tes2gs.num_linked_io_vars;
-
-			last_vtg_stage = MESA_SHADER_GEOMETRY;
-
-		} else {
-			last_vtg_stage = MESA_SHADER_TESS_EVAL;
-		}
-	} else if (has_gs) {
-		nir_linked_io_var_info vs2gs =
-			nir_assign_linked_io_var_locations(shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_GEOMETRY]);
-
-		infos[MESA_SHADER_VERTEX].vs.num_linked_outputs = vs2gs.num_linked_io_vars;
-		infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = vs2gs.num_linked_io_vars;
-		last_vtg_stage = MESA_SHADER_GEOMETRY;
-
-		/* Copy data to merged stage */
-		infos[vs_info_idx].vs.num_linked_outputs = vs2gs.num_linked_io_vars;
-	}
-
-	nir_foreach_shader_out_variable(var, shaders[last_vtg_stage]) {
-		var->data.driver_location = var->data.location;
-	}
+   if (shaders[MESA_SHADER_FRAGMENT]) {
+      nir_foreach_shader_out_variable(var, shaders[MESA_SHADER_FRAGMENT])
+      {
+         var->data.driver_location = var->data.location + var->data.index;
+      }
+   }
+
+   if (!shaders[MESA_SHADER_VERTEX])
+      return;
+
+   bool has_tess = shaders[MESA_SHADER_TESS_CTRL];
+   bool has_gs = shaders[MESA_SHADER_GEOMETRY];
+
+   /* Merged stage for VS and TES */
+   unsigned vs_info_idx = MESA_SHADER_VERTEX;
+   unsigned tes_info_idx = MESA_SHADER_TESS_EVAL;
+
+   /* Which stage is the last in the vertex, tess, geometry pipeline */
+   unsigned last_vtg_stage = MESA_SHADER_VERTEX;
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
+      /* These are merged into the next stage */
+      vs_info_idx = has_tess ? MESA_SHADER_TESS_CTRL : MESA_SHADER_GEOMETRY;
+      tes_info_idx = has_gs ? MESA_SHADER_GEOMETRY : MESA_SHADER_TESS_EVAL;
+   }
+
+   nir_foreach_shader_in_variable (var, shaders[MESA_SHADER_VERTEX]) {
+      var->data.driver_location = var->data.location;
+   }
+
+   if (has_tess) {
+      nir_linked_io_var_info vs2tcs = nir_assign_linked_io_var_locations(
+         shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_TESS_CTRL]);
+      nir_linked_io_var_info tcs2tes = nir_assign_linked_io_var_locations(
+         shaders[MESA_SHADER_TESS_CTRL], shaders[MESA_SHADER_TESS_EVAL]);
+
+      infos[MESA_SHADER_VERTEX].vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
+      infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs = vs2tcs.num_linked_io_vars;
+      infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs = tcs2tes.num_linked_io_vars;
+      infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs = tcs2tes.num_linked_patch_io_vars;
+      infos[MESA_SHADER_TESS_EVAL].tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
+      infos[MESA_SHADER_TESS_EVAL].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars;
+
+      /* Copy data to merged stage */
+      infos[vs_info_idx].vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
+      infos[tes_info_idx].tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
+      infos[tes_info_idx].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars;
+
+      if (has_gs) {
+         nir_linked_io_var_info tes2gs = nir_assign_linked_io_var_locations(
+            shaders[MESA_SHADER_TESS_EVAL], shaders[MESA_SHADER_GEOMETRY]);
+
+         infos[MESA_SHADER_TESS_EVAL].tes.num_linked_outputs = tes2gs.num_linked_io_vars;
+         infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = tes2gs.num_linked_io_vars;
+
+         /* Copy data to merged stage */
+         infos[tes_info_idx].tes.num_linked_outputs = tes2gs.num_linked_io_vars;
+
+         last_vtg_stage = MESA_SHADER_GEOMETRY;
+
+      } else {
+         last_vtg_stage = MESA_SHADER_TESS_EVAL;
+      }
+   } else if (has_gs) {
+      nir_linked_io_var_info vs2gs = nir_assign_linked_io_var_locations(
+         shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_GEOMETRY]);
+
+      infos[MESA_SHADER_VERTEX].vs.num_linked_outputs = vs2gs.num_linked_io_vars;
+      infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = vs2gs.num_linked_io_vars;
+      last_vtg_stage = MESA_SHADER_GEOMETRY;
+
+      /* Copy data to merged stage */
+      infos[vs_info_idx].vs.num_linked_outputs = vs2gs.num_linked_io_vars;
+   }
+
+   nir_foreach_shader_out_variable(var, shaders[last_vtg_stage])
+   {
+      var->data.driver_location = var->data.location;
+   }
 }
 
 static uint32_t
 radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *input_state,
-		       uint32_t attrib_binding)
+                       uint32_t attrib_binding)
 {
-	for (uint32_t i = 0; i < input_state->vertexBindingDescriptionCount; i++) {
-		const VkVertexInputBindingDescription *input_binding =
-			&input_state->pVertexBindingDescriptions[i];
+   for (uint32_t i = 0; i < input_state->vertexBindingDescriptionCount; i++) {
+      const VkVertexInputBindingDescription *input_binding =
+         &input_state->pVertexBindingDescriptions[i];
 
-		if (input_binding->binding == attrib_binding)
-			return input_binding->stride;
-	}
+      if (input_binding->binding == attrib_binding)
+         return input_binding->stride;
+   }
 
-	return 0;
+   return 0;
 }
 
 static struct radv_pipeline_key
@@ -2551,692 +2459,666 @@ radv_generate_graphics_pipeline_key(const struct radv_pipeline *pipeline,
                                     const VkGraphicsPipelineCreateInfo *pCreateInfo,
                                     const struct radv_blend_state *blend)
 {
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	const VkPipelineVertexInputStateCreateInfo *input_state =
-	                                         pCreateInfo->pVertexInputState;
-	const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state =
-		vk_find_struct_const(input_state->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
-	bool uses_dynamic_stride = false;
-
-	struct radv_pipeline_key key;
-	memset(&key, 0, sizeof(key));
-
-	if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
-		key.optimisations_disabled = 1;
-
-	key.has_multiview_view_index = !!subpass->view_mask;
-
-	uint32_t binding_input_rate = 0;
-	uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
-	for (unsigned i = 0; i < input_state->vertexBindingDescriptionCount; ++i) {
-		if (input_state->pVertexBindingDescriptions[i].inputRate) {
-			unsigned binding = input_state->pVertexBindingDescriptions[i].binding;
-			binding_input_rate |= 1u << binding;
-			instance_rate_divisors[binding] = 1;
-		}
-	}
-	if (divisor_state) {
-		for (unsigned i = 0; i < divisor_state->vertexBindingDivisorCount; ++i) {
-			instance_rate_divisors[divisor_state->pVertexBindingDivisors[i].binding] =
-				divisor_state->pVertexBindingDivisors[i].divisor;
-		}
-	}
-
-	if (pCreateInfo->pDynamicState) {
-		uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
-		for (uint32_t i = 0; i < count; i++) {
-			if (pCreateInfo->pDynamicState->pDynamicStates[i] == VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT) {
-				uses_dynamic_stride = true;
-				break;
-			}
-		}
-	}
-
-	for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
-		const VkVertexInputAttributeDescription *desc =
-			&input_state->pVertexAttributeDescriptions[i];
-		const struct util_format_description *format_desc;
-		unsigned location = desc->location;
-		unsigned binding = desc->binding;
-		unsigned num_format, data_format;
-		int first_non_void;
-
-		if (binding_input_rate & (1u << binding)) {
-			key.instance_rate_inputs |= 1u << location;
-			key.instance_rate_divisors[location] = instance_rate_divisors[binding];
-		}
-
-		format_desc = vk_format_description(desc->format);
-		first_non_void = vk_format_get_first_non_void_channel(desc->format);
-
-		num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
-		data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
-
-		key.vertex_attribute_formats[location] = data_format | (num_format << 4);
-		key.vertex_attribute_bindings[location] = desc->binding;
-		key.vertex_attribute_offsets[location] = desc->offset;
-
-		if (!uses_dynamic_stride) {
-			/* From the Vulkan spec 1.2.157:
-			 *
-			 * "If the bound pipeline state object was created
-			 *  with the
-			 *  VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT
-			 *  dynamic state enabled then pStrides[i] specifies
-			 *  the distance in bytes between two consecutive
-			 *  elements within the corresponding buffer. In this
-			 *  case the VkVertexInputBindingDescription::stride
-			 *  state from the pipeline state object is ignored."
-			 *
-			 * Make sure the vertex attribute stride is zero to
-			 * avoid computing a wrong offset if it's initialized
-			 * to something else than zero.
-			 */
-			key.vertex_attribute_strides[location] =
-				radv_get_attrib_stride(input_state, desc->binding);
-		}
-
-		enum ac_fetch_format adjust = AC_FETCH_FORMAT_NONE;
-		if (pipeline->device->physical_device->rad_info.chip_class <= GFX8 &&
-		    pipeline->device->physical_device->rad_info.family != CHIP_STONEY) {
-			VkFormat format = input_state->pVertexAttributeDescriptions[i].format;
-			switch(format) {
-			case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
-			case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
-				adjust = AC_FETCH_FORMAT_SNORM;
-				break;
-			case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
-			case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
-				adjust = AC_FETCH_FORMAT_SSCALED;
-				break;
-			case VK_FORMAT_A2R10G10B10_SINT_PACK32:
-			case VK_FORMAT_A2B10G10R10_SINT_PACK32:
-				adjust = AC_FETCH_FORMAT_SINT;
-				break;
-			default:
-				break;
-			}
-		}
-		key.vertex_alpha_adjust[location] = adjust;
-
-		switch (desc->format) {
-		case VK_FORMAT_B8G8R8A8_UNORM:
-		case VK_FORMAT_B8G8R8A8_SNORM:
-		case VK_FORMAT_B8G8R8A8_USCALED:
-		case VK_FORMAT_B8G8R8A8_SSCALED:
-		case VK_FORMAT_B8G8R8A8_UINT:
-		case VK_FORMAT_B8G8R8A8_SINT:
-		case VK_FORMAT_B8G8R8A8_SRGB:
-		case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
-		case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
-		case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
-		case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
-		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
-		case VK_FORMAT_A2R10G10B10_SINT_PACK32:
-			key.vertex_post_shuffle |= 1 << location;
-			break;
-		default:
-			break;
-		}
-	}
-
-	const VkPipelineTessellationStateCreateInfo *tess =
-		radv_pipeline_get_tessellation_state(pCreateInfo);
-	if (tess)
-		key.tess_input_vertices = tess->patchControlPoints;
-
-	const VkPipelineMultisampleStateCreateInfo *vkms =
-		radv_pipeline_get_multisample_state(pCreateInfo);
-	if (vkms && vkms->rasterizationSamples > 1) {
-		uint32_t num_samples = vkms->rasterizationSamples;
-		uint32_t ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo);
-		key.num_samples = num_samples;
-		key.log2_ps_iter_samples = util_logbase2(ps_iter_samples);
-	}
-
-	key.col_format = blend->spi_shader_col_format;
-	if (pipeline->device->physical_device->rad_info.chip_class < GFX8) {
-		key.is_int8 = blend->col_format_is_int8;
-		key.is_int10 = blend->col_format_is_int10;
-	}
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10)
-		key.topology = pCreateInfo->pInputAssemblyState->topology;
-
-	return key;
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+   const VkPipelineVertexInputStateCreateInfo *input_state = pCreateInfo->pVertexInputState;
+   const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state =
+      vk_find_struct_const(input_state->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
+   bool uses_dynamic_stride = false;
+
+   struct radv_pipeline_key key;
+   memset(&key, 0, sizeof(key));
+
+   if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
+      key.optimisations_disabled = 1;
+
+   key.has_multiview_view_index = !!subpass->view_mask;
+
+   uint32_t binding_input_rate = 0;
+   uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
+   for (unsigned i = 0; i < input_state->vertexBindingDescriptionCount; ++i) {
+      if (input_state->pVertexBindingDescriptions[i].inputRate) {
+         unsigned binding = input_state->pVertexBindingDescriptions[i].binding;
+         binding_input_rate |= 1u << binding;
+         instance_rate_divisors[binding] = 1;
+      }
+   }
+   if (divisor_state) {
+      for (unsigned i = 0; i < divisor_state->vertexBindingDivisorCount; ++i) {
+         instance_rate_divisors[divisor_state->pVertexBindingDivisors[i].binding] =
+            divisor_state->pVertexBindingDivisors[i].divisor;
+      }
+   }
+
+   if (pCreateInfo->pDynamicState) {
+      uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
+      for (uint32_t i = 0; i < count; i++) {
+         if (pCreateInfo->pDynamicState->pDynamicStates[i] ==
+             VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT) {
+            uses_dynamic_stride = true;
+            break;
+         }
+      }
+   }
+
+   for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
+      const VkVertexInputAttributeDescription *desc = &input_state->pVertexAttributeDescriptions[i];
+      const struct util_format_description *format_desc;
+      unsigned location = desc->location;
+      unsigned binding = desc->binding;
+      unsigned num_format, data_format;
+      int first_non_void;
+
+      if (binding_input_rate & (1u << binding)) {
+         key.instance_rate_inputs |= 1u << location;
+         key.instance_rate_divisors[location] = instance_rate_divisors[binding];
+      }
+
+      format_desc = vk_format_description(desc->format);
+      first_non_void = vk_format_get_first_non_void_channel(desc->format);
+
+      num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
+      data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
+
+      key.vertex_attribute_formats[location] = data_format | (num_format << 4);
+      key.vertex_attribute_bindings[location] = desc->binding;
+      key.vertex_attribute_offsets[location] = desc->offset;
+
+      if (!uses_dynamic_stride) {
+         /* From the Vulkan spec 1.2.157:
+          *
+          * "If the bound pipeline state object was created
+          *  with the
+          *  VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT
+          *  dynamic state enabled then pStrides[i] specifies
+          *  the distance in bytes between two consecutive
+          *  elements within the corresponding buffer. In this
+          *  case the VkVertexInputBindingDescription::stride
+          *  state from the pipeline state object is ignored."
+          *
+          * Make sure the vertex attribute stride is zero to
+          * avoid computing a wrong offset if it's initialized
+          * to something else than zero.
+          */
+         key.vertex_attribute_strides[location] =
+            radv_get_attrib_stride(input_state, desc->binding);
+      }
+
+      enum ac_fetch_format adjust = AC_FETCH_FORMAT_NONE;
+      if (pipeline->device->physical_device->rad_info.chip_class <= GFX8 &&
+          pipeline->device->physical_device->rad_info.family != CHIP_STONEY) {
+         VkFormat format = input_state->pVertexAttributeDescriptions[i].format;
+         switch (format) {
+         case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+         case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
+            adjust = AC_FETCH_FORMAT_SNORM;
+            break;
+         case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
+         case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
+            adjust = AC_FETCH_FORMAT_SSCALED;
+            break;
+         case VK_FORMAT_A2R10G10B10_SINT_PACK32:
+         case VK_FORMAT_A2B10G10R10_SINT_PACK32:
+            adjust = AC_FETCH_FORMAT_SINT;
+            break;
+         default:
+            break;
+         }
+      }
+      key.vertex_alpha_adjust[location] = adjust;
+
+      switch (desc->format) {
+      case VK_FORMAT_B8G8R8A8_UNORM:
+      case VK_FORMAT_B8G8R8A8_SNORM:
+      case VK_FORMAT_B8G8R8A8_USCALED:
+      case VK_FORMAT_B8G8R8A8_SSCALED:
+      case VK_FORMAT_B8G8R8A8_UINT:
+      case VK_FORMAT_B8G8R8A8_SINT:
+      case VK_FORMAT_B8G8R8A8_SRGB:
+      case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+      case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+      case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
+      case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
+      case VK_FORMAT_A2R10G10B10_UINT_PACK32:
+      case VK_FORMAT_A2R10G10B10_SINT_PACK32:
+         key.vertex_post_shuffle |= 1 << location;
+         break;
+      default:
+         break;
+      }
+   }
+
+   const VkPipelineTessellationStateCreateInfo *tess =
+      radv_pipeline_get_tessellation_state(pCreateInfo);
+   if (tess)
+      key.tess_input_vertices = tess->patchControlPoints;
+
+   const VkPipelineMultisampleStateCreateInfo *vkms =
+      radv_pipeline_get_multisample_state(pCreateInfo);
+   if (vkms && vkms->rasterizationSamples > 1) {
+      uint32_t num_samples = vkms->rasterizationSamples;
+      uint32_t ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo);
+      key.num_samples = num_samples;
+      key.log2_ps_iter_samples = util_logbase2(ps_iter_samples);
+   }
+
+   key.col_format = blend->spi_shader_col_format;
+   if (pipeline->device->physical_device->rad_info.chip_class < GFX8) {
+      key.is_int8 = blend->col_format_is_int8;
+      key.is_int10 = blend->col_format_is_int10;
+   }
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX10)
+      key.topology = pCreateInfo->pInputAssemblyState->topology;
+
+   return key;
 }
 
 static bool
 radv_nir_stage_uses_xfb(const nir_shader *nir)
 {
-	nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
-	bool uses_xfb = !!xfb;
+   nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
+   bool uses_xfb = !!xfb;
 
-	ralloc_free(xfb);
-	return uses_xfb;
+   ralloc_free(xfb);
+   return uses_xfb;
 }
 
 static void
-radv_fill_shader_keys(struct radv_device *device,
-		      struct radv_shader_variant_key *keys,
-                      const struct radv_pipeline_key *key,
-                      nir_shader **nir)
+radv_fill_shader_keys(struct radv_device *device, struct radv_shader_variant_key *keys,
+                      const struct radv_pipeline_key *key, nir_shader **nir)
 {
-	keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
-	keys[MESA_SHADER_VERTEX].vs.post_shuffle = key->vertex_post_shuffle;
-	for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i) {
-		keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i];
-		keys[MESA_SHADER_VERTEX].vs.vertex_attribute_formats[i] = key->vertex_attribute_formats[i];
-		keys[MESA_SHADER_VERTEX].vs.vertex_attribute_bindings[i] = key->vertex_attribute_bindings[i];
-		keys[MESA_SHADER_VERTEX].vs.vertex_attribute_offsets[i] = key->vertex_attribute_offsets[i];
-		keys[MESA_SHADER_VERTEX].vs.vertex_attribute_strides[i] = key->vertex_attribute_strides[i];
-		keys[MESA_SHADER_VERTEX].vs.alpha_adjust[i] = key->vertex_alpha_adjust[i];
-	}
-	keys[MESA_SHADER_VERTEX].vs.outprim = si_conv_prim_to_gs_out(key->topology);
-
-	if (nir[MESA_SHADER_TESS_CTRL]) {
-		keys[MESA_SHADER_VERTEX].vs_common_out.as_ls = true;
-		keys[MESA_SHADER_TESS_CTRL].tcs.input_vertices = key->tess_input_vertices;
-		keys[MESA_SHADER_TESS_CTRL].tcs.primitive_mode = nir[MESA_SHADER_TESS_EVAL]->info.tess.primitive_mode;
-	}
-
-	if (nir[MESA_SHADER_GEOMETRY]) {
-		if (nir[MESA_SHADER_TESS_CTRL])
-			keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_es = true;
-		else
-			keys[MESA_SHADER_VERTEX].vs_common_out.as_es = true;
-	}
-
-	if (device->physical_device->use_ngg) {
-		if (nir[MESA_SHADER_TESS_CTRL]) {
-			keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
-		} else {
-			keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = true;
-		}
-
-		if (nir[MESA_SHADER_TESS_CTRL] &&
-		    nir[MESA_SHADER_GEOMETRY] &&
-		    nir[MESA_SHADER_GEOMETRY]->info.gs.invocations *
-		    nir[MESA_SHADER_GEOMETRY]->info.gs.vertices_out > 256) {
-			/* Fallback to the legacy path if tessellation is
-			 * enabled with extreme geometry because
-			 * EN_MAX_VERT_OUT_PER_GS_INSTANCE doesn't work and it
-			 * might hang.
-			 */
-			keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
-		}
-
-		gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
-
-		for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
-			if (nir[i])
-				last_xfb_stage = i;
-		}
-
-		bool uses_xfb = nir[last_xfb_stage] &&
-				radv_nir_stage_uses_xfb(nir[last_xfb_stage]);
-
-		if (!device->physical_device->use_ngg_streamout && uses_xfb) {
-			if (nir[MESA_SHADER_TESS_CTRL])
-				keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
-			else
-				keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
-		}
-
-		/* Determine if the pipeline is eligible for the NGG passthrough
-		 * mode. It can't be enabled for geometry shaders, for NGG
-		 * streamout or for vertex shaders that export the primitive ID
-		 * (this is checked later because we don't have the info here.)
-		 */
-		if (!nir[MESA_SHADER_GEOMETRY] && !uses_xfb) {
-			if (nir[MESA_SHADER_TESS_CTRL] &&
-			    keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg) {
-				keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg_passthrough = true;
-			} else if (nir[MESA_SHADER_VERTEX] &&
-				   keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) {
-				keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = true;
-			}
-		}
-	}
-
-	for(int i = 0; i < MESA_SHADER_STAGES; ++i)
-		keys[i].has_multiview_view_index = key->has_multiview_view_index;
-
-	keys[MESA_SHADER_FRAGMENT].fs.col_format = key->col_format;
-	keys[MESA_SHADER_FRAGMENT].fs.is_int8 = key->is_int8;
-	keys[MESA_SHADER_FRAGMENT].fs.is_int10 = key->is_int10;
-	keys[MESA_SHADER_FRAGMENT].fs.log2_ps_iter_samples = key->log2_ps_iter_samples;
-	keys[MESA_SHADER_FRAGMENT].fs.num_samples = key->num_samples;
-
-	if (nir[MESA_SHADER_COMPUTE]) {
-		unsigned subgroup_size = key->compute_subgroup_size;
-		unsigned req_subgroup_size = subgroup_size;
-		bool require_full_subgroups = key->require_full_subgroups;
-
-		if (!subgroup_size)
-			subgroup_size = device->physical_device->cs_wave_size;
-
-		unsigned local_size = nir[MESA_SHADER_COMPUTE]->info.cs.local_size[0] *
-				      nir[MESA_SHADER_COMPUTE]->info.cs.local_size[1] *
-				      nir[MESA_SHADER_COMPUTE]->info.cs.local_size[2];
-
-		/* Games don't always request full subgroups when they should,
-		 * which can cause bugs if cswave32 is enabled.
-		 */
-		if (device->physical_device->cs_wave_size == 32 &&
-		    nir[MESA_SHADER_COMPUTE]->info.cs.uses_wide_subgroup_intrinsics &&
-		    !req_subgroup_size && local_size % RADV_SUBGROUP_SIZE == 0)
-			require_full_subgroups = true;
-
-		if (require_full_subgroups && !req_subgroup_size) {
-			/* don't use wave32 pretending to be wave64 */
-			subgroup_size = RADV_SUBGROUP_SIZE;
-		}
-
-		keys[MESA_SHADER_COMPUTE].cs.subgroup_size = subgroup_size;
-	}
+   keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
+   keys[MESA_SHADER_VERTEX].vs.post_shuffle = key->vertex_post_shuffle;
+   for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i) {
+      keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i];
+      keys[MESA_SHADER_VERTEX].vs.vertex_attribute_formats[i] = key->vertex_attribute_formats[i];
+      keys[MESA_SHADER_VERTEX].vs.vertex_attribute_bindings[i] = key->vertex_attribute_bindings[i];
+      keys[MESA_SHADER_VERTEX].vs.vertex_attribute_offsets[i] = key->vertex_attribute_offsets[i];
+      keys[MESA_SHADER_VERTEX].vs.vertex_attribute_strides[i] = key->vertex_attribute_strides[i];
+      keys[MESA_SHADER_VERTEX].vs.alpha_adjust[i] = key->vertex_alpha_adjust[i];
+   }
+   keys[MESA_SHADER_VERTEX].vs.outprim = si_conv_prim_to_gs_out(key->topology);
+
+   if (nir[MESA_SHADER_TESS_CTRL]) {
+      keys[MESA_SHADER_VERTEX].vs_common_out.as_ls = true;
+      keys[MESA_SHADER_TESS_CTRL].tcs.input_vertices = key->tess_input_vertices;
+      keys[MESA_SHADER_TESS_CTRL].tcs.primitive_mode =
+         nir[MESA_SHADER_TESS_EVAL]->info.tess.primitive_mode;
+   }
+
+   if (nir[MESA_SHADER_GEOMETRY]) {
+      if (nir[MESA_SHADER_TESS_CTRL])
+         keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_es = true;
+      else
+         keys[MESA_SHADER_VERTEX].vs_common_out.as_es = true;
+   }
+
+   if (device->physical_device->use_ngg) {
+      if (nir[MESA_SHADER_TESS_CTRL]) {
+         keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
+      } else {
+         keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = true;
+      }
+
+      if (nir[MESA_SHADER_TESS_CTRL] && nir[MESA_SHADER_GEOMETRY] &&
+          nir[MESA_SHADER_GEOMETRY]->info.gs.invocations *
+                nir[MESA_SHADER_GEOMETRY]->info.gs.vertices_out >
+             256) {
+         /* Fallback to the legacy path if tessellation is
+          * enabled with extreme geometry because
+          * EN_MAX_VERT_OUT_PER_GS_INSTANCE doesn't work and it
+          * might hang.
+          */
+         keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+      }
+
+      gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
+
+      for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+         if (nir[i])
+            last_xfb_stage = i;
+      }
+
+      bool uses_xfb = nir[last_xfb_stage] && radv_nir_stage_uses_xfb(nir[last_xfb_stage]);
+
+      if (!device->physical_device->use_ngg_streamout && uses_xfb) {
+         if (nir[MESA_SHADER_TESS_CTRL])
+            keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+         else
+            keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+      }
+
+      /* Determine if the pipeline is eligible for the NGG passthrough
+       * mode. It can't be enabled for geometry shaders, for NGG
+       * streamout or for vertex shaders that export the primitive ID
+       * (this is checked later because we don't have the info here.)
+       */
+      if (!nir[MESA_SHADER_GEOMETRY] && !uses_xfb) {
+         if (nir[MESA_SHADER_TESS_CTRL] && keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg) {
+            keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg_passthrough = true;
+         } else if (nir[MESA_SHADER_VERTEX] && keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) {
+            keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = true;
+         }
+      }
+   }
+
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+      keys[i].has_multiview_view_index = key->has_multiview_view_index;
+
+   keys[MESA_SHADER_FRAGMENT].fs.col_format = key->col_format;
+   keys[MESA_SHADER_FRAGMENT].fs.is_int8 = key->is_int8;
+   keys[MESA_SHADER_FRAGMENT].fs.is_int10 = key->is_int10;
+   keys[MESA_SHADER_FRAGMENT].fs.log2_ps_iter_samples = key->log2_ps_iter_samples;
+   keys[MESA_SHADER_FRAGMENT].fs.num_samples = key->num_samples;
+
+   if (nir[MESA_SHADER_COMPUTE]) {
+      unsigned subgroup_size = key->compute_subgroup_size;
+      unsigned req_subgroup_size = subgroup_size;
+      bool require_full_subgroups = key->require_full_subgroups;
+
+      if (!subgroup_size)
+         subgroup_size = device->physical_device->cs_wave_size;
+
+      unsigned local_size = nir[MESA_SHADER_COMPUTE]->info.cs.local_size[0] *
+                            nir[MESA_SHADER_COMPUTE]->info.cs.local_size[1] *
+                            nir[MESA_SHADER_COMPUTE]->info.cs.local_size[2];
+
+      /* Games don't always request full subgroups when they should,
+       * which can cause bugs if cswave32 is enabled.
+       */
+      if (device->physical_device->cs_wave_size == 32 &&
+          nir[MESA_SHADER_COMPUTE]->info.cs.uses_wide_subgroup_intrinsics && !req_subgroup_size &&
+          local_size % RADV_SUBGROUP_SIZE == 0)
+         require_full_subgroups = true;
+
+      if (require_full_subgroups && !req_subgroup_size) {
+         /* don't use wave32 pretending to be wave64 */
+         subgroup_size = RADV_SUBGROUP_SIZE;
+      }
+
+      keys[MESA_SHADER_COMPUTE].cs.subgroup_size = subgroup_size;
+   }
 }
 
 static uint8_t
-radv_get_wave_size(struct radv_device *device,
-		   const VkPipelineShaderStageCreateInfo *pStage,
-		   gl_shader_stage stage,
-		   const struct radv_shader_variant_key *key)
+radv_get_wave_size(struct radv_device *device, const VkPipelineShaderStageCreateInfo *pStage,
+                   gl_shader_stage stage, const struct radv_shader_variant_key *key)
 {
-	if (stage == MESA_SHADER_GEOMETRY && !key->vs_common_out.as_ngg)
-		return 64;
-	else if (stage == MESA_SHADER_COMPUTE) {
-		return key->cs.subgroup_size;
-	}
-	else if (stage == MESA_SHADER_FRAGMENT)
-		return device->physical_device->ps_wave_size;
-	else
-		return device->physical_device->ge_wave_size;
+   if (stage == MESA_SHADER_GEOMETRY && !key->vs_common_out.as_ngg)
+      return 64;
+   else if (stage == MESA_SHADER_COMPUTE) {
+      return key->cs.subgroup_size;
+   } else if (stage == MESA_SHADER_FRAGMENT)
+      return device->physical_device->ps_wave_size;
+   else
+      return device->physical_device->ge_wave_size;
 }
 
 static uint8_t
-radv_get_ballot_bit_size(struct radv_device *device,
-			 const VkPipelineShaderStageCreateInfo *pStage,
-			 gl_shader_stage stage,
-			 const struct radv_shader_variant_key *key)
+radv_get_ballot_bit_size(struct radv_device *device, const VkPipelineShaderStageCreateInfo *pStage,
+                         gl_shader_stage stage, const struct radv_shader_variant_key *key)
 {
-	if (stage == MESA_SHADER_COMPUTE && key->cs.subgroup_size)
-		return key->cs.subgroup_size;
-	return 64;
+   if (stage == MESA_SHADER_COMPUTE && key->cs.subgroup_size)
+      return key->cs.subgroup_size;
+   return 64;
 }
 
 static void
 radv_fill_shader_info(struct radv_pipeline *pipeline,
-		      const VkPipelineShaderStageCreateInfo **pStages,
-		      struct radv_shader_variant_key *keys,
-                      struct radv_shader_info *infos,
+                      const VkPipelineShaderStageCreateInfo **pStages,
+                      struct radv_shader_variant_key *keys, struct radv_shader_info *infos,
                       nir_shader **nir)
 {
-	unsigned active_stages = 0;
-	unsigned filled_stages = 0;
-
-	for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-		if (nir[i])
-			active_stages |= (1 << i);
-	}
-
-	if (nir[MESA_SHADER_FRAGMENT]) {
-		radv_nir_shader_info_init(&infos[MESA_SHADER_FRAGMENT]);
-		radv_nir_shader_info_pass(nir[MESA_SHADER_FRAGMENT],
-					  pipeline->layout,
-					  &keys[MESA_SHADER_FRAGMENT],
-					  &infos[MESA_SHADER_FRAGMENT]);
-
-		/* TODO: These are no longer used as keys we should refactor this */
-		keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id =
-		        infos[MESA_SHADER_FRAGMENT].ps.prim_id_input;
-		keys[MESA_SHADER_VERTEX].vs_common_out.export_layer_id =
-		        infos[MESA_SHADER_FRAGMENT].ps.layer_input;
-		keys[MESA_SHADER_VERTEX].vs_common_out.export_clip_dists =
-		        !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
-		keys[MESA_SHADER_VERTEX].vs_common_out.export_viewport_index =
-		        infos[MESA_SHADER_FRAGMENT].ps.viewport_index_input;
-		keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_prim_id =
-		        infos[MESA_SHADER_FRAGMENT].ps.prim_id_input;
-		keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_layer_id =
-		        infos[MESA_SHADER_FRAGMENT].ps.layer_input;
-		keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_clip_dists =
-		        !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
-		keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_viewport_index =
-		        infos[MESA_SHADER_FRAGMENT].ps.viewport_index_input;
-
-		/* NGG passthrough mode can't be enabled for vertex shaders
-		 * that export the primitive ID.
-		 *
-		 * TODO: I should really refactor the keys logic.
-		 */
-		if (nir[MESA_SHADER_VERTEX] &&
-		    keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id) {
-			keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = false;
-		}
-
-		filled_stages |= (1 << MESA_SHADER_FRAGMENT);
-	}
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
-	    nir[MESA_SHADER_TESS_CTRL]) {
-		struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
-		struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
-		key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
-
-		radv_nir_shader_info_init(&infos[MESA_SHADER_TESS_CTRL]);
-
-		for (int i = 0; i < 2; i++) {
-			radv_nir_shader_info_pass(combined_nir[i],
-						  pipeline->layout, &key,
-						  &infos[MESA_SHADER_TESS_CTRL]);
-		}
-
-		filled_stages |= (1 << MESA_SHADER_VERTEX);
-		filled_stages |= (1 << MESA_SHADER_TESS_CTRL);
-	}
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
-	    nir[MESA_SHADER_GEOMETRY]) {
-		gl_shader_stage pre_stage = nir[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
-		struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
-
-		radv_nir_shader_info_init(&infos[MESA_SHADER_GEOMETRY]);
-
-		for (int i = 0; i < 2; i++) {
-			radv_nir_shader_info_pass(combined_nir[i],
-						  pipeline->layout,
-						  &keys[pre_stage],
-						  &infos[MESA_SHADER_GEOMETRY]);
-		}
-
-		filled_stages |= (1 << pre_stage);
-		filled_stages |= (1 << MESA_SHADER_GEOMETRY);
-	}
-
-	active_stages ^= filled_stages;
-	while (active_stages) {
-		int i = u_bit_scan(&active_stages);
-		radv_nir_shader_info_init(&infos[i]);
-		radv_nir_shader_info_pass(nir[i], pipeline->layout,
-					  &keys[i], &infos[i]);
-	}
-
-	for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-		if (nir[i]) {
-			infos[i].wave_size =
-				radv_get_wave_size(pipeline->device, pStages[i],
-						   i, &keys[i]);
-			infos[i].ballot_bit_size =
-				radv_get_ballot_bit_size(pipeline->device,
-							 pStages[i], i,
-							 &keys[i]);
-		}
-	}
+   unsigned active_stages = 0;
+   unsigned filled_stages = 0;
+
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (nir[i])
+         active_stages |= (1 << i);
+   }
+
+   if (nir[MESA_SHADER_FRAGMENT]) {
+      radv_nir_shader_info_init(&infos[MESA_SHADER_FRAGMENT]);
+      radv_nir_shader_info_pass(nir[MESA_SHADER_FRAGMENT], pipeline->layout,
+                                &keys[MESA_SHADER_FRAGMENT], &infos[MESA_SHADER_FRAGMENT]);
+
+      /* TODO: These are no longer used as keys we should refactor this */
+      keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id =
+         infos[MESA_SHADER_FRAGMENT].ps.prim_id_input;
+      keys[MESA_SHADER_VERTEX].vs_common_out.export_layer_id =
+         infos[MESA_SHADER_FRAGMENT].ps.layer_input;
+      keys[MESA_SHADER_VERTEX].vs_common_out.export_clip_dists =
+         !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
+      keys[MESA_SHADER_VERTEX].vs_common_out.export_viewport_index =
+         infos[MESA_SHADER_FRAGMENT].ps.viewport_index_input;
+      keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_prim_id =
+         infos[MESA_SHADER_FRAGMENT].ps.prim_id_input;
+      keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_layer_id =
+         infos[MESA_SHADER_FRAGMENT].ps.layer_input;
+      keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_clip_dists =
+         !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
+      keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_viewport_index =
+         infos[MESA_SHADER_FRAGMENT].ps.viewport_index_input;
+
+      /* NGG passthrough mode can't be enabled for vertex shaders
+       * that export the primitive ID.
+       *
+       * TODO: I should really refactor the keys logic.
+       */
+      if (nir[MESA_SHADER_VERTEX] && keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id) {
+         keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = false;
+      }
+
+      filled_stages |= (1 << MESA_SHADER_FRAGMENT);
+   }
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
+       nir[MESA_SHADER_TESS_CTRL]) {
+      struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
+      struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
+      key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
+
+      radv_nir_shader_info_init(&infos[MESA_SHADER_TESS_CTRL]);
+
+      for (int i = 0; i < 2; i++) {
+         radv_nir_shader_info_pass(combined_nir[i], pipeline->layout, &key,
+                                   &infos[MESA_SHADER_TESS_CTRL]);
+      }
+
+      filled_stages |= (1 << MESA_SHADER_VERTEX);
+      filled_stages |= (1 << MESA_SHADER_TESS_CTRL);
+   }
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
+       nir[MESA_SHADER_GEOMETRY]) {
+      gl_shader_stage pre_stage =
+         nir[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+      struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
+
+      radv_nir_shader_info_init(&infos[MESA_SHADER_GEOMETRY]);
+
+      for (int i = 0; i < 2; i++) {
+         radv_nir_shader_info_pass(combined_nir[i], pipeline->layout, &keys[pre_stage],
+                                   &infos[MESA_SHADER_GEOMETRY]);
+      }
+
+      filled_stages |= (1 << pre_stage);
+      filled_stages |= (1 << MESA_SHADER_GEOMETRY);
+   }
+
+   active_stages ^= filled_stages;
+   while (active_stages) {
+      int i = u_bit_scan(&active_stages);
+      radv_nir_shader_info_init(&infos[i]);
+      radv_nir_shader_info_pass(nir[i], pipeline->layout, &keys[i], &infos[i]);
+   }
+
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (nir[i]) {
+         infos[i].wave_size = radv_get_wave_size(pipeline->device, pStages[i], i, &keys[i]);
+         infos[i].ballot_bit_size =
+            radv_get_ballot_bit_size(pipeline->device, pStages[i], i, &keys[i]);
+      }
+   }
 }
 
 static void
-merge_tess_info(struct shader_info *tes_info,
-                struct shader_info *tcs_info)
+merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info)
 {
-	/* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
-	 *
-	 *    "PointMode. Controls generation of points rather than triangles
-	 *     or lines. This functionality defaults to disabled, and is
-	 *     enabled if either shader stage includes the execution mode.
-	 *
-	 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
-	 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
-	 * and OutputVertices, it says:
-	 *
-	 *    "One mode must be set in at least one of the tessellation
-	 *     shader stages."
-	 *
-	 * So, the fields can be set in either the TCS or TES, but they must
-	 * agree if set in both.  Our backend looks at TES, so bitwise-or in
-	 * the values from the TCS.
-	 */
-	assert(tcs_info->tess.tcs_vertices_out == 0 ||
-	       tes_info->tess.tcs_vertices_out == 0 ||
-	       tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
-	tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
-
-	assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
-	       tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
-	       tcs_info->tess.spacing == tes_info->tess.spacing);
-	tes_info->tess.spacing |= tcs_info->tess.spacing;
-
-	assert(tcs_info->tess.primitive_mode == 0 ||
-	       tes_info->tess.primitive_mode == 0 ||
-	       tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
-	tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
-	tes_info->tess.ccw |= tcs_info->tess.ccw;
-	tes_info->tess.point_mode |= tcs_info->tess.point_mode;
-
-	/* Copy the merged info back to the TCS */
-	tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out;
-	tcs_info->tess.spacing = tes_info->tess.spacing;
-	tcs_info->tess.primitive_mode = tes_info->tess.primitive_mode;
-	tcs_info->tess.ccw = tes_info->tess.ccw;
-	tcs_info->tess.point_mode = tes_info->tess.point_mode;
+   /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
+    *
+    *    "PointMode. Controls generation of points rather than triangles
+    *     or lines. This functionality defaults to disabled, and is
+    *     enabled if either shader stage includes the execution mode.
+    *
+    * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
+    * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
+    * and OutputVertices, it says:
+    *
+    *    "One mode must be set in at least one of the tessellation
+    *     shader stages."
+    *
+    * So, the fields can be set in either the TCS or TES, but they must
+    * agree if set in both.  Our backend looks at TES, so bitwise-or in
+    * the values from the TCS.
+    */
+   assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 ||
+          tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
+   tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
+
+   assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
+          tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
+          tcs_info->tess.spacing == tes_info->tess.spacing);
+   tes_info->tess.spacing |= tcs_info->tess.spacing;
+
+   assert(tcs_info->tess.primitive_mode == 0 || tes_info->tess.primitive_mode == 0 ||
+          tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
+   tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
+   tes_info->tess.ccw |= tcs_info->tess.ccw;
+   tes_info->tess.point_mode |= tcs_info->tess.point_mode;
+
+   /* Copy the merged info back to the TCS */
+   tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out;
+   tcs_info->tess.spacing = tes_info->tess.spacing;
+   tcs_info->tess.primitive_mode = tes_info->tess.primitive_mode;
+   tcs_info->tess.ccw = tes_info->tess.ccw;
+   tcs_info->tess.point_mode = tes_info->tess.point_mode;
 }
 
 static void
-gather_tess_info(struct radv_device *device,
-	             nir_shader **nir, struct radv_shader_info *infos,
+gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shader_info *infos,
                  const struct radv_pipeline_key *pipeline_key)
 {
-	merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
-
-	/* Number of tessellation patches per workgroup processed by the current pipeline. */
-	unsigned num_patches =
-		get_tcs_num_patches(
-			pipeline_key->tess_input_vertices,
-			nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out,
-			infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs,
-			infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
-			infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs,
-			device->tess_offchip_block_dw_size,
-			device->physical_device->rad_info.chip_class,
-			device->physical_device->rad_info.family);
-
-	/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
-	unsigned tcs_lds_size =
-		calculate_tess_lds_size(
-			device->physical_device->rad_info.chip_class,
-			pipeline_key->tess_input_vertices,
-			nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out,
-			infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs,
-			num_patches,
-			infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
-			infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs);
-
-	infos[MESA_SHADER_TESS_CTRL].num_tess_patches = num_patches;
-	infos[MESA_SHADER_TESS_CTRL].tcs.num_lds_blocks = tcs_lds_size;
-	infos[MESA_SHADER_TESS_CTRL].tcs.tes_reads_tess_factors = !!(nir[MESA_SHADER_TESS_EVAL]->info.inputs_read & (VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER));
-	infos[MESA_SHADER_TESS_CTRL].tcs.tes_inputs_read = nir[MESA_SHADER_TESS_EVAL]->info.inputs_read;
-	infos[MESA_SHADER_TESS_CTRL].tcs.tes_patch_inputs_read = nir[MESA_SHADER_TESS_EVAL]->info.patch_inputs_read;
-
-	infos[MESA_SHADER_TESS_EVAL].num_tess_patches = num_patches;
-	infos[MESA_SHADER_GEOMETRY].num_tess_patches = num_patches;
-
-	if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
-		/* When the number of TCS input and output vertices are the same (typically 3):
-		 * - There is an equal amount of LS and HS invocations
-		 * - In case of merged LSHS shaders, the LS and HS halves of the shader
-		 *   always process the exact same vertex. We can use this knowledge to optimize them.
-		 *
-		 * We don't set tcs_in_out_eq if the float controls differ because that might
-		 * involve different float modes for the same block and our optimizer
-		 * doesn't handle a instruction dominating another with a different mode.
-		 */
-		infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq =
-			device->physical_device->rad_info.chip_class >= GFX9 &&
-			pipeline_key->tess_input_vertices == nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out &&
-			nir[MESA_SHADER_VERTEX]->info.float_controls_execution_mode == nir[MESA_SHADER_TESS_CTRL]->info.float_controls_execution_mode;
-
-		if (infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq)
-			infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask =
-				nir[MESA_SHADER_TESS_CTRL]->info.inputs_read &
-				nir[MESA_SHADER_VERTEX]->info.outputs_written &
-				~nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_cross_invocation_inputs_read &
-				~nir[MESA_SHADER_TESS_CTRL]->info.inputs_read_indirectly &
-				~nir[MESA_SHADER_VERTEX]->info.outputs_accessed_indirectly;
-
-		/* Copy data to TCS so it can be accessed by the backend if they are merged. */
-		infos[MESA_SHADER_TESS_CTRL].vs.tcs_in_out_eq = infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq;
-		infos[MESA_SHADER_TESS_CTRL].vs.tcs_temp_only_input_mask = infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask;
-	}
+   merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
+
+   /* Number of tessellation patches per workgroup processed by the current pipeline. */
+   unsigned num_patches = get_tcs_num_patches(
+      pipeline_key->tess_input_vertices, nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out,
+      infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs,
+      infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
+      infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs, device->tess_offchip_block_dw_size,
+      device->physical_device->rad_info.chip_class, device->physical_device->rad_info.family);
+
+   /* LDS size used by VS+TCS for storing TCS inputs and outputs. */
+   unsigned tcs_lds_size = calculate_tess_lds_size(
+      device->physical_device->rad_info.chip_class, pipeline_key->tess_input_vertices,
+      nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out,
+      infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs, num_patches,
+      infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
+      infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs);
+
+   infos[MESA_SHADER_TESS_CTRL].num_tess_patches = num_patches;
+   infos[MESA_SHADER_TESS_CTRL].tcs.num_lds_blocks = tcs_lds_size;
+   infos[MESA_SHADER_TESS_CTRL].tcs.tes_reads_tess_factors =
+      !!(nir[MESA_SHADER_TESS_EVAL]->info.inputs_read &
+         (VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER));
+   infos[MESA_SHADER_TESS_CTRL].tcs.tes_inputs_read = nir[MESA_SHADER_TESS_EVAL]->info.inputs_read;
+   infos[MESA_SHADER_TESS_CTRL].tcs.tes_patch_inputs_read =
+      nir[MESA_SHADER_TESS_EVAL]->info.patch_inputs_read;
+
+   infos[MESA_SHADER_TESS_EVAL].num_tess_patches = num_patches;
+   infos[MESA_SHADER_GEOMETRY].num_tess_patches = num_patches;
+
+   if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
+      /* When the number of TCS input and output vertices are the same (typically 3):
+       * - There is an equal amount of LS and HS invocations
+       * - In case of merged LSHS shaders, the LS and HS halves of the shader
+       *   always process the exact same vertex. We can use this knowledge to optimize them.
+       *
+       * We don't set tcs_in_out_eq if the float controls differ because that might
+       * involve different float modes for the same block and our optimizer
+       * doesn't handle a instruction dominating another with a different mode.
+       */
+      infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq =
+         device->physical_device->rad_info.chip_class >= GFX9 &&
+         pipeline_key->tess_input_vertices ==
+            nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out &&
+         nir[MESA_SHADER_VERTEX]->info.float_controls_execution_mode ==
+            nir[MESA_SHADER_TESS_CTRL]->info.float_controls_execution_mode;
+
+      if (infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq)
+         infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask =
+            nir[MESA_SHADER_TESS_CTRL]->info.inputs_read &
+            nir[MESA_SHADER_VERTEX]->info.outputs_written &
+            ~nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_cross_invocation_inputs_read &
+            ~nir[MESA_SHADER_TESS_CTRL]->info.inputs_read_indirectly &
+            ~nir[MESA_SHADER_VERTEX]->info.outputs_accessed_indirectly;
+
+      /* Copy data to TCS so it can be accessed by the backend if they are merged. */
+      infos[MESA_SHADER_TESS_CTRL].vs.tcs_in_out_eq = infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq;
+      infos[MESA_SHADER_TESS_CTRL].vs.tcs_temp_only_input_mask =
+         infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask;
+   }
 }
 
-static
-void radv_init_feedback(const VkPipelineCreationFeedbackCreateInfoEXT *ext)
+static void
+radv_init_feedback(const VkPipelineCreationFeedbackCreateInfoEXT *ext)
 {
-	if (!ext)
-		return;
-
-	if (ext->pPipelineCreationFeedback) {
-		ext->pPipelineCreationFeedback->flags = 0;
-		ext->pPipelineCreationFeedback->duration = 0;
-	}
-
-	for (unsigned i = 0; i < ext->pipelineStageCreationFeedbackCount; ++i) {
-		ext->pPipelineStageCreationFeedbacks[i].flags = 0;
-		ext->pPipelineStageCreationFeedbacks[i].duration = 0;
-	}
+   if (!ext)
+      return;
+
+   if (ext->pPipelineCreationFeedback) {
+      ext->pPipelineCreationFeedback->flags = 0;
+      ext->pPipelineCreationFeedback->duration = 0;
+   }
+
+   for (unsigned i = 0; i < ext->pipelineStageCreationFeedbackCount; ++i) {
+      ext->pPipelineStageCreationFeedbacks[i].flags = 0;
+      ext->pPipelineStageCreationFeedbacks[i].duration = 0;
+   }
 }
 
-static
-void radv_start_feedback(VkPipelineCreationFeedbackEXT *feedback)
+static void
+radv_start_feedback(VkPipelineCreationFeedbackEXT *feedback)
 {
-	if (!feedback)
-		return;
+   if (!feedback)
+      return;
 
-	feedback->duration -= radv_get_current_time();
-	feedback ->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
+   feedback->duration -= radv_get_current_time();
+   feedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
 }
 
-static
-void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
+static void
+radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
 {
-	if (!feedback)
-		return;
+   if (!feedback)
+      return;
 
-	feedback->duration += radv_get_current_time();
-	feedback ->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT |
-	                   (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
+   feedback->duration += radv_get_current_time();
+   feedback->flags =
+      VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT |
+      (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
 }
 
 static bool
-mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
-                       unsigned bit_size,
-                       unsigned num_components,
-                       nir_intrinsic_instr *low, nir_intrinsic_instr *high,
+mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
+                       unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
                        void *data)
 {
-	if (num_components > 4)
-		return false;
-
-	/* >128 bit loads are split except with SMEM */
-	if (bit_size * num_components > 128)
-		return false;
-
-	uint32_t align;
-	if (align_offset)
-		align = 1 << (ffs(align_offset) - 1);
-	else
-		align = align_mul;
-
-	switch (low->intrinsic) {
-	case nir_intrinsic_load_global:
-	case nir_intrinsic_store_global:
-	case nir_intrinsic_store_ssbo:
-	case nir_intrinsic_load_ssbo:
-	case nir_intrinsic_load_ubo:
-	case nir_intrinsic_load_push_constant:
-		return align % (bit_size == 8 ? 2 : 4) == 0;
-	case nir_intrinsic_load_deref:
-	case nir_intrinsic_store_deref:
-		assert(nir_deref_mode_is(nir_src_as_deref(low->src[0]),
-		                         nir_var_mem_shared));
-		FALLTHROUGH;
-	case nir_intrinsic_load_shared:
-	case nir_intrinsic_store_shared:
-		if (bit_size * num_components == 96) /* 96 bit loads require 128 bit alignment and are split otherwise */
-			return align % 16 == 0;
-		else if (bit_size * num_components == 128) /* 128 bit loads require 64 bit alignment and are split otherwise */
-			return align % 8 == 0;
-		else
-			return align % (bit_size == 8 ? 2 : 4) == 0;
-	default:
-		return false;
-	}
-	return false;
+   if (num_components > 4)
+      return false;
+
+   /* >128 bit loads are split except with SMEM */
+   if (bit_size * num_components > 128)
+      return false;
+
+   uint32_t align;
+   if (align_offset)
+      align = 1 << (ffs(align_offset) - 1);
+   else
+      align = align_mul;
+
+   switch (low->intrinsic) {
+   case nir_intrinsic_load_global:
+   case nir_intrinsic_store_global:
+   case nir_intrinsic_store_ssbo:
+   case nir_intrinsic_load_ssbo:
+   case nir_intrinsic_load_ubo:
+   case nir_intrinsic_load_push_constant:
+      return align % (bit_size == 8 ? 2 : 4) == 0;
+   case nir_intrinsic_load_deref:
+   case nir_intrinsic_store_deref:
+      assert(nir_deref_mode_is(nir_src_as_deref(low->src[0]), nir_var_mem_shared));
+      FALLTHROUGH;
+   case nir_intrinsic_load_shared:
+   case nir_intrinsic_store_shared:
+      if (bit_size * num_components ==
+          96) /* 96 bit loads require 128 bit alignment and are split otherwise */
+         return align % 16 == 0;
+      else if (bit_size * num_components ==
+               128) /* 128 bit loads require 64 bit alignment and are split otherwise */
+         return align % 8 == 0;
+      else
+         return align % (bit_size == 8 ? 2 : 4) == 0;
+   default:
+      return false;
+   }
+   return false;
 }
 
 static unsigned
 lower_bit_size_callback(const nir_instr *instr, void *_)
 {
-	struct radv_device *device = _;
-	enum chip_class chip = device->physical_device->rad_info.chip_class;
-
-	if (instr->type != nir_instr_type_alu)
-		return 0;
-	nir_alu_instr *alu = nir_instr_as_alu(instr);
-
-	if (alu->dest.dest.ssa.bit_size & (8 | 16)) {
-		unsigned bit_size = alu->dest.dest.ssa.bit_size;
-		switch (alu->op) {
-		case nir_op_iabs:
-		case nir_op_bitfield_select:
-		case nir_op_udiv:
-		case nir_op_idiv:
-		case nir_op_umod:
-		case nir_op_imod:
-		case nir_op_imul_high:
-		case nir_op_umul_high:
-		case nir_op_ineg:
-		case nir_op_irem:
-		case nir_op_isign:
-			return 32;
-		case nir_op_imax:
-		case nir_op_umax:
-		case nir_op_imin:
-		case nir_op_umin:
-		case nir_op_ishr:
-		case nir_op_ushr:
-		case nir_op_ishl:
-		case nir_op_uadd_sat:
-			return (bit_size == 8 ||
-			        !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 : 0;
-		default:
-			return 0;
-		}
-	}
-
-	if (nir_src_bit_size(alu->src[0].src) & (8 | 16)) {
-		unsigned bit_size = nir_src_bit_size(alu->src[0].src);
-		switch (alu->op) {
-		case nir_op_bit_count:
-		case nir_op_find_lsb:
-		case nir_op_ufind_msb:
-		case nir_op_i2b1:
-			return 32;
-		case nir_op_ilt:
-		case nir_op_ige:
-		case nir_op_ieq:
-		case nir_op_ine:
-		case nir_op_ult:
-		case nir_op_uge:
-			return (bit_size == 8 ||
-			        !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 : 0;
-		default:
-			return 0;
-		}
-	}
-
-	return 0;
+   struct radv_device *device = _;
+   enum chip_class chip = device->physical_device->rad_info.chip_class;
+
+   if (instr->type != nir_instr_type_alu)
+      return 0;
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+   if (alu->dest.dest.ssa.bit_size & (8 | 16)) {
+      unsigned bit_size = alu->dest.dest.ssa.bit_size;
+      switch (alu->op) {
+      case nir_op_iabs:
+      case nir_op_bitfield_select:
+      case nir_op_udiv:
+      case nir_op_idiv:
+      case nir_op_umod:
+      case nir_op_imod:
+      case nir_op_imul_high:
+      case nir_op_umul_high:
+      case nir_op_ineg:
+      case nir_op_irem:
+      case nir_op_isign:
+         return 32;
+      case nir_op_imax:
+      case nir_op_umax:
+      case nir_op_imin:
+      case nir_op_umin:
+      case nir_op_ishr:
+      case nir_op_ushr:
+      case nir_op_ishl:
+      case nir_op_uadd_sat:
+         return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32
+                                                                                            : 0;
+      default:
+         return 0;
+      }
+   }
+
+   if (nir_src_bit_size(alu->src[0].src) & (8 | 16)) {
+      unsigned bit_size = nir_src_bit_size(alu->src[0].src);
+      switch (alu->op) {
+      case nir_op_bit_count:
+      case nir_op_find_lsb:
+      case nir_op_ufind_msb:
+      case nir_op_i2b1:
+         return 32;
+      case nir_op_ilt:
+      case nir_op_ige:
+      case nir_op_ieq:
+      case nir_op_ine:
+      case nir_op_ult:
+      case nir_op_uge:
+         return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32
+                                                                                            : 0;
+      default:
+         return 0;
+      }
+   }
+
+   return 0;
 }
 
 static bool
@@ -3272,1017 +3154,991 @@ opt_vectorize_callback(const nir_instr *instr, void *_)
    }
 }
 
-VkResult radv_create_shaders(struct radv_pipeline *pipeline,
-                             struct radv_device *device,
-                             struct radv_pipeline_cache *cache,
-                             const struct radv_pipeline_key *pipeline_key,
-                             const VkPipelineShaderStageCreateInfo **pStages,
-                             const VkPipelineCreateFlags flags,
-                             VkPipelineCreationFeedbackEXT *pipeline_feedback,
-                             VkPipelineCreationFeedbackEXT **stage_feedbacks)
+VkResult
+radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
+                    struct radv_pipeline_cache *cache, const struct radv_pipeline_key *pipeline_key,
+                    const VkPipelineShaderStageCreateInfo **pStages,
+                    const VkPipelineCreateFlags flags,
+                    VkPipelineCreationFeedbackEXT *pipeline_feedback,
+                    VkPipelineCreationFeedbackEXT **stage_feedbacks)
 {
-	struct vk_shader_module fs_m = {0};
-	struct vk_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
-	nir_shader *nir[MESA_SHADER_STAGES] = {0};
-	struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL};
-	struct radv_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{{0}}}}};
-	struct radv_shader_info infos[MESA_SHADER_STAGES] = {0};
-	unsigned char hash[20], gs_copy_hash[20];
-	bool keep_executable_info = (flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) || device->keep_shader_info;
-	bool keep_statistic_info = (flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) ||
-	                           (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) ||
-	                           device->keep_shader_info;
-	bool disable_optimizations = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
-
-	radv_start_feedback(pipeline_feedback);
-
-	for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (pStages[i]) {
-			modules[i] = vk_shader_module_from_handle(pStages[i]->module);
-			if (modules[i]->nir)
-				_mesa_sha1_compute(modules[i]->nir->info.name,
-				                   strlen(modules[i]->nir->info.name),
-				                   modules[i]->sha1);
-
-			pipeline->active_stages |= mesa_to_vk_shader_stage(i);
-		}
-	}
-
-	radv_hash_shaders(hash, pStages, pipeline->layout, pipeline_key,
-			  get_hash_flags(device, keep_statistic_info));
-	memcpy(gs_copy_hash, hash, 20);
-	gs_copy_hash[0] ^= 1;
-
-	pipeline->pipeline_hash = *(uint64_t *)hash;
-
-	bool found_in_application_cache = true;
-	if (modules[MESA_SHADER_GEOMETRY] && !keep_executable_info) {
-		struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
-		radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants,
-		                                                &found_in_application_cache);
-		pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY];
-	}
-
-	if (!keep_executable_info &&
-	    radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders,
-	                                                    &found_in_application_cache) &&
-	    (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
-		radv_stop_feedback(pipeline_feedback, found_in_application_cache);
-		return VK_SUCCESS;
-	}
-
-	if (flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {
-		radv_stop_feedback(pipeline_feedback, found_in_application_cache);
-		return VK_PIPELINE_COMPILE_REQUIRED_EXT;
-	}
-
-	if (!modules[MESA_SHADER_FRAGMENT] && !modules[MESA_SHADER_COMPUTE]) {
-		nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "noop_fs");
-		fs_m = vk_shader_module_from_nir(fs_b.shader);
-		modules[MESA_SHADER_FRAGMENT] = &fs_m;
-	}
-
-	for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
-		const VkPipelineShaderStageCreateInfo *stage = pStages[i];
-
-		if (!modules[i])
-			continue;
-
-		radv_start_feedback(stage_feedbacks[i]);
-
-		nir[i] = radv_shader_compile_to_nir(device, modules[i],
-						    stage ? stage->pName : "main", i,
-						    stage ? stage->pSpecializationInfo : NULL,
-						    flags, pipeline->layout,
-						    pipeline_key);
-
-		/* We don't want to alter meta shaders IR directly so clone it
-		 * first.
-		 */
-		if (nir[i]->info.name) {
-			nir[i] = nir_shader_clone(NULL, nir[i]);
-		}
-
-		radv_stop_feedback(stage_feedbacks[i], false);
-	}
-
-	bool optimize_conservatively = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
-
-	radv_link_shaders(pipeline, nir, optimize_conservatively);
-	radv_set_driver_locations(pipeline, nir, infos);
-
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (nir[i]) {
-			radv_start_feedback(stage_feedbacks[i]);
-			radv_optimize_nir(device, nir[i], optimize_conservatively, false);
-
-			/* Gather info again, information such as outputs_read can be out-of-date. */
-			nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i]));
-			radv_lower_io(device, nir[i]);
-
-			radv_stop_feedback(stage_feedbacks[i], false);
-		}
-	}
-
-	infos[MESA_SHADER_VERTEX].vs.as_ls = !!nir[MESA_SHADER_TESS_CTRL];
-	infos[MESA_SHADER_VERTEX].vs.as_es = !!nir[MESA_SHADER_GEOMETRY] && !nir[MESA_SHADER_TESS_CTRL];
-	infos[MESA_SHADER_TESS_EVAL].tes.as_es = !!nir[MESA_SHADER_GEOMETRY] && !!nir[MESA_SHADER_TESS_CTRL];
-
-	if (nir[MESA_SHADER_TESS_CTRL]) {
-		nir_lower_patch_vertices(nir[MESA_SHADER_TESS_EVAL], nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL);
-		gather_tess_info(device, nir, infos, pipeline_key);
-	}
-
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (nir[i]) {
-			radv_start_feedback(stage_feedbacks[i]);
-
-			if (!radv_use_llvm_for_stage(device, i)) {
-				NIR_PASS_V(nir[i], nir_lower_non_uniform_access,
-				           nir_lower_non_uniform_ubo_access |
-				           nir_lower_non_uniform_ssbo_access |
-				           nir_lower_non_uniform_texture_access |
-				           nir_lower_non_uniform_image_access);
-			}
-			NIR_PASS_V(nir[i], nir_lower_memory_model);
-
-			bool lower_to_scalar = false;
-
-			nir_load_store_vectorize_options vectorize_opts = {
-				.modes = nir_var_mem_ssbo | nir_var_mem_ubo |
-					 nir_var_mem_push_const | nir_var_mem_shared |
-					 nir_var_mem_global,
-				.callback = mem_vectorize_callback,
-				.robust_modes = 0,
-			};
-
-			if (device->robust_buffer_access) {
-				vectorize_opts.robust_modes = nir_var_mem_ubo |
-							      nir_var_mem_ssbo |
-							      nir_var_mem_global |
-							      nir_var_mem_push_const;
-			}
-
-			if (nir_opt_load_store_vectorize(nir[i], &vectorize_opts)) {
-				lower_to_scalar = true;
-
-				/* Gather info again, to update whether 8/16-bit are used. */
-				nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i]));
-			}
-
-			lower_to_scalar |= nir_opt_shrink_vectors(nir[i],
-								  !device->instance->disable_shrink_image_store);
-
-			if (lower_to_scalar)
-				nir_lower_alu_to_scalar(nir[i], NULL, NULL);
-
-			/* lower ALU operations */
-			/* TODO: Some 64-bit tests crash inside LLVM. */
-			if (!radv_use_llvm_for_stage(device, i))
-				nir_lower_int64(nir[i]);
-
-			/* TODO: Implement nir_op_uadd_sat with LLVM. */
-			if (!radv_use_llvm_for_stage(device, i))
-				nir_opt_idiv_const(nir[i], 8);
-			nir_lower_idiv(nir[i], nir_lower_idiv_precise);
-
-			nir_opt_sink(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
-			nir_opt_move(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
-
-			/* Lower I/O intrinsics to memory instructions. */
-			bool io_to_mem = radv_lower_io_to_mem(device, nir[i], &infos[i], pipeline_key);
-
-			/* optimize the lowered ALU operations */
-			bool more_algebraic = true;
-			while (more_algebraic) {
-				more_algebraic = false;
-				NIR_PASS_V(nir[i], nir_copy_prop);
-				NIR_PASS_V(nir[i], nir_opt_dce);
-				NIR_PASS_V(nir[i], nir_opt_constant_folding);
-				NIR_PASS_V(nir[i], nir_opt_cse);
-				NIR_PASS(more_algebraic, nir[i], nir_opt_algebraic);
-			}
-
-			if (io_to_mem || i == MESA_SHADER_COMPUTE)
-				NIR_PASS_V(nir[i], nir_opt_offsets);
-
-			/* Do late algebraic optimization to turn add(a,
-			 * neg(b)) back into subs, then the mandatory cleanup
-			 * after algebraic.  Note that it may produce fnegs,
-			 * and if so then we need to keep running to squash
-			 * fneg(fneg(a)).
-			 */
-			bool more_late_algebraic = true;
-			while (more_late_algebraic) {
-				more_late_algebraic = false;
-				NIR_PASS(more_late_algebraic, nir[i], nir_opt_algebraic_late);
-				NIR_PASS_V(nir[i], nir_opt_constant_folding);
-				NIR_PASS_V(nir[i], nir_copy_prop);
-				NIR_PASS_V(nir[i], nir_opt_dce);
-				NIR_PASS_V(nir[i], nir_opt_cse);
-			}
-
-			if (nir[i]->info.bit_sizes_int & (8 | 16)) {
-				if (device->physical_device->rad_info.chip_class >= GFX8) {
-					nir_convert_to_lcssa(nir[i], true, true);
-					nir_divergence_analysis(nir[i]);
-				}
-
-				if (nir_lower_bit_size(nir[i], lower_bit_size_callback, device)) {
-					// TODO: lower idiv beforehand
-					if (nir_lower_idiv(nir[i], nir_lower_idiv_precise))
-						NIR_PASS_V(nir[i], nir_opt_algebraic_late); /* needed for removing ineg again */
-					NIR_PASS_V(nir[i], nir_opt_constant_folding);
-					NIR_PASS_V(nir[i], nir_opt_dce);
-				}
-
-				if (device->physical_device->rad_info.chip_class >= GFX8)
-					nir_opt_remove_phis(nir[i]); /* cleanup LCSSA phis */
-				if (device->physical_device->rad_info.chip_class >= GFX9)
-					NIR_PASS_V(nir[i], nir_opt_vectorize, opt_vectorize_callback, NULL);
-			}
-
-			/* cleanup passes */
-			nir_lower_load_const_to_scalar(nir[i]);
-			nir_move_options move_opts =
-				nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
-				nir_move_comparisons | nir_move_copies;
-			nir_opt_sink(nir[i], move_opts | nir_move_load_ssbo);
-			nir_opt_move(nir[i], move_opts);
-
-			radv_stop_feedback(stage_feedbacks[i], false);
-		}
-	}
-
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (radv_can_dump_shader(device, modules[i], false))
-			nir_print_shader(nir[i], stderr);
-	}
-
-	radv_fill_shader_keys(device, keys, pipeline_key, nir);
-
-	radv_fill_shader_info(pipeline, pStages, keys, infos, nir);
-
-	if ((nir[MESA_SHADER_VERTEX] &&
-	     keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) ||
-	    (nir[MESA_SHADER_TESS_EVAL] &&
-	     keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg)) {
-		struct gfx10_ngg_info *ngg_info;
-
-		if (nir[MESA_SHADER_GEOMETRY])
-			ngg_info = &infos[MESA_SHADER_GEOMETRY].ngg_info;
-		else if (nir[MESA_SHADER_TESS_CTRL])
-			ngg_info = &infos[MESA_SHADER_TESS_EVAL].ngg_info;
-		else
-			ngg_info = &infos[MESA_SHADER_VERTEX].ngg_info;
-
-		gfx10_get_ngg_info(pipeline_key, pipeline, nir, infos, ngg_info);
-	} else if (nir[MESA_SHADER_GEOMETRY]) {
-		struct gfx9_gs_info *gs_info =
-			&infos[MESA_SHADER_GEOMETRY].gs_ring_info;
-
-		gfx9_get_gs_info(pipeline_key, pipeline, nir, infos, gs_info);
-	}
-
-	if(modules[MESA_SHADER_GEOMETRY]) {
-		struct radv_shader_binary *gs_copy_binary = NULL;
-		if (!pipeline->gs_copy_shader &&
-		    !radv_pipeline_has_ngg(pipeline)) {
-			struct radv_shader_info info = {0};
-			struct radv_shader_variant_key key = {0};
-
-			key.has_multiview_view_index =
-				keys[MESA_SHADER_GEOMETRY].has_multiview_view_index;
-
-			radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY],
-						  pipeline->layout, &key,
-						  &info);
-			info.wave_size = 64; /* Wave32 not supported. */
-			info.ballot_bit_size = 64;
-
-			pipeline->gs_copy_shader = radv_create_gs_copy_shader(
-					device, nir[MESA_SHADER_GEOMETRY], &info,
-					&gs_copy_binary, keep_executable_info, keep_statistic_info,
-					keys[MESA_SHADER_GEOMETRY].has_multiview_view_index,
-					disable_optimizations);
-		}
-
-		if (!keep_executable_info && pipeline->gs_copy_shader) {
-			struct radv_shader_binary *gs_binaries[MESA_SHADER_STAGES] = {NULL};
-			struct radv_shader_variant *gs_variants[MESA_SHADER_STAGES] = {0};
-
-			gs_binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary;
-			gs_variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
-
-			radv_pipeline_cache_insert_shaders(device, cache,
-							   gs_copy_hash,
-							   gs_variants,
-							   gs_binaries);
-		}
-		free(gs_copy_binary);
-	}
-
-	if (nir[MESA_SHADER_FRAGMENT]) {
-		if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
-			radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
-
-			pipeline->shaders[MESA_SHADER_FRAGMENT] =
-			       radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
-			                                  pipeline->layout, keys + MESA_SHADER_FRAGMENT,
-							  infos + MESA_SHADER_FRAGMENT,
-			                                  keep_executable_info, keep_statistic_info,
-							  disable_optimizations,
-							  &binaries[MESA_SHADER_FRAGMENT]);
-
-			radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
-		}
-	}
-
-	if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_TESS_CTRL]) {
-		if (!pipeline->shaders[MESA_SHADER_TESS_CTRL]) {
-			struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
-			struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
-			key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
-
-			radv_start_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL]);
-
-			pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
-			                                                                      pipeline->layout,
-			                                                                      &key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info,
-			                                                                      keep_statistic_info,
-											      disable_optimizations,
-											      &binaries[MESA_SHADER_TESS_CTRL]);
-
-			radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
-		}
-		modules[MESA_SHADER_VERTEX] = NULL;
-	}
-
-	if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) {
-		gl_shader_stage pre_stage = modules[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
-		if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) {
-			struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
-
-			radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]);
-
-			pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
-			                                                                     pipeline->layout,
-			                                                                     &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
-			                                                                     keep_statistic_info,
-											     disable_optimizations,
-											     &binaries[MESA_SHADER_GEOMETRY]);
-
-			radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
-		}
-		modules[pre_stage] = NULL;
-	}
-
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if(modules[i] && !pipeline->shaders[i]) {
-			radv_start_feedback(stage_feedbacks[i]);
-
-			pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
-									  pipeline->layout,
-									  keys + i, infos + i, keep_executable_info,
-									  keep_statistic_info,
-									  disable_optimizations,
-									  &binaries[i]);
-
-			radv_stop_feedback(stage_feedbacks[i], false);
-		}
-	}
-
-	if (!keep_executable_info) {
-		radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders,
-						   binaries);
-	}
-
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		free(binaries[i]);
-		if (nir[i]) {
-			ralloc_free(nir[i]);
-
-			if (radv_can_dump_shader_stats(device, modules[i])) {
-				radv_dump_shader_stats(device, pipeline, i, stderr);
-			}
-		}
-	}
-
-	if (fs_m.nir)
-		ralloc_free(fs_m.nir);
-
-	radv_stop_feedback(pipeline_feedback, false);
-	return VK_SUCCESS;
+   struct vk_shader_module fs_m = {0};
+   struct vk_shader_module *modules[MESA_SHADER_STAGES] = {
+      0,
+   };
+   nir_shader *nir[MESA_SHADER_STAGES] = {0};
+   struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL};
+   struct radv_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{{0}}}}};
+   struct radv_shader_info infos[MESA_SHADER_STAGES] = {0};
+   unsigned char hash[20], gs_copy_hash[20];
+   bool keep_executable_info =
+      (flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) ||
+      device->keep_shader_info;
+   bool keep_statistic_info = (flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) ||
+                              (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) ||
+                              device->keep_shader_info;
+   bool disable_optimizations = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
+
+   radv_start_feedback(pipeline_feedback);
+
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (pStages[i]) {
+         modules[i] = vk_shader_module_from_handle(pStages[i]->module);
+         if (modules[i]->nir)
+            _mesa_sha1_compute(modules[i]->nir->info.name, strlen(modules[i]->nir->info.name),
+                               modules[i]->sha1);
+
+         pipeline->active_stages |= mesa_to_vk_shader_stage(i);
+      }
+   }
+
+   radv_hash_shaders(hash, pStages, pipeline->layout, pipeline_key,
+                     get_hash_flags(device, keep_statistic_info));
+   memcpy(gs_copy_hash, hash, 20);
+   gs_copy_hash[0] ^= 1;
+
+   pipeline->pipeline_hash = *(uint64_t *)hash;
+
+   bool found_in_application_cache = true;
+   if (modules[MESA_SHADER_GEOMETRY] && !keep_executable_info) {
+      struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
+      radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants,
+                                                      &found_in_application_cache);
+      pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY];
+   }
+
+   if (!keep_executable_info &&
+       radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders,
+                                                       &found_in_application_cache) &&
+       (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
+      radv_stop_feedback(pipeline_feedback, found_in_application_cache);
+      return VK_SUCCESS;
+   }
+
+   if (flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {
+      radv_stop_feedback(pipeline_feedback, found_in_application_cache);
+      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
+   }
+
+   if (!modules[MESA_SHADER_FRAGMENT] && !modules[MESA_SHADER_COMPUTE]) {
+      nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "noop_fs");
+      fs_m = vk_shader_module_from_nir(fs_b.shader);
+      modules[MESA_SHADER_FRAGMENT] = &fs_m;
+   }
+
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+      const VkPipelineShaderStageCreateInfo *stage = pStages[i];
+
+      if (!modules[i])
+         continue;
+
+      radv_start_feedback(stage_feedbacks[i]);
+
+      nir[i] = radv_shader_compile_to_nir(device, modules[i], stage ? stage->pName : "main", i,
+                                          stage ? stage->pSpecializationInfo : NULL, flags,
+                                          pipeline->layout, pipeline_key);
+
+      /* We don't want to alter meta shaders IR directly so clone it
+       * first.
+       */
+      if (nir[i]->info.name) {
+         nir[i] = nir_shader_clone(NULL, nir[i]);
+      }
+
+      radv_stop_feedback(stage_feedbacks[i], false);
+   }
+
+   bool optimize_conservatively = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
+
+   radv_link_shaders(pipeline, nir, optimize_conservatively);
+   radv_set_driver_locations(pipeline, nir, infos);
+
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (nir[i]) {
+         radv_start_feedback(stage_feedbacks[i]);
+         radv_optimize_nir(device, nir[i], optimize_conservatively, false);
+
+         /* Gather info again, information such as outputs_read can be out-of-date. */
+         nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i]));
+         radv_lower_io(device, nir[i]);
+
+         radv_stop_feedback(stage_feedbacks[i], false);
+      }
+   }
+
+   infos[MESA_SHADER_VERTEX].vs.as_ls = !!nir[MESA_SHADER_TESS_CTRL];
+   infos[MESA_SHADER_VERTEX].vs.as_es = !!nir[MESA_SHADER_GEOMETRY] && !nir[MESA_SHADER_TESS_CTRL];
+   infos[MESA_SHADER_TESS_EVAL].tes.as_es =
+      !!nir[MESA_SHADER_GEOMETRY] && !!nir[MESA_SHADER_TESS_CTRL];
+
+   if (nir[MESA_SHADER_TESS_CTRL]) {
+      nir_lower_patch_vertices(nir[MESA_SHADER_TESS_EVAL],
+                               nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL);
+      gather_tess_info(device, nir, infos, pipeline_key);
+   }
+
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (nir[i]) {
+         radv_start_feedback(stage_feedbacks[i]);
+
+         if (!radv_use_llvm_for_stage(device, i)) {
+            NIR_PASS_V(nir[i], nir_lower_non_uniform_access,
+                       nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access |
+                          nir_lower_non_uniform_texture_access |
+                          nir_lower_non_uniform_image_access);
+         }
+         NIR_PASS_V(nir[i], nir_lower_memory_model);
+
+         bool lower_to_scalar = false;
+
+         nir_load_store_vectorize_options vectorize_opts = {
+            .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const |
+                     nir_var_mem_shared | nir_var_mem_global,
+            .callback = mem_vectorize_callback,
+            .robust_modes = 0,
+         };
+
+         if (device->robust_buffer_access) {
+            vectorize_opts.robust_modes =
+               nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global | nir_var_mem_push_const;
+         }
+
+         if (nir_opt_load_store_vectorize(nir[i], &vectorize_opts)) {
+            lower_to_scalar = true;
+
+            /* Gather info again, to update whether 8/16-bit are used. */
+            nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i]));
+         }
+
+         lower_to_scalar |=
+            nir_opt_shrink_vectors(nir[i], !device->instance->disable_shrink_image_store);
+
+         if (lower_to_scalar)
+            nir_lower_alu_to_scalar(nir[i], NULL, NULL);
+
+         /* lower ALU operations */
+         /* TODO: Some 64-bit tests crash inside LLVM. */
+         if (!radv_use_llvm_for_stage(device, i))
+            nir_lower_int64(nir[i]);
+
+         /* TODO: Implement nir_op_uadd_sat with LLVM. */
+         if (!radv_use_llvm_for_stage(device, i))
+            nir_opt_idiv_const(nir[i], 8);
+         nir_lower_idiv(nir[i], nir_lower_idiv_precise);
+
+         nir_opt_sink(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
+         nir_opt_move(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
+
+         /* Lower I/O intrinsics to memory instructions. */
+         bool io_to_mem = radv_lower_io_to_mem(device, nir[i], &infos[i], pipeline_key);
+
+         /* optimize the lowered ALU operations */
+         bool more_algebraic = true;
+         while (more_algebraic) {
+            more_algebraic = false;
+            NIR_PASS_V(nir[i], nir_copy_prop);
+            NIR_PASS_V(nir[i], nir_opt_dce);
+            NIR_PASS_V(nir[i], nir_opt_constant_folding);
+            NIR_PASS_V(nir[i], nir_opt_cse);
+            NIR_PASS(more_algebraic, nir[i], nir_opt_algebraic);
+         }
+
+         if (io_to_mem || i == MESA_SHADER_COMPUTE)
+            NIR_PASS_V(nir[i], nir_opt_offsets);
+
+         /* Do late algebraic optimization to turn add(a,
+          * neg(b)) back into subs, then the mandatory cleanup
+          * after algebraic.  Note that it may produce fnegs,
+          * and if so then we need to keep running to squash
+          * fneg(fneg(a)).
+          */
+         bool more_late_algebraic = true;
+         while (more_late_algebraic) {
+            more_late_algebraic = false;
+            NIR_PASS(more_late_algebraic, nir[i], nir_opt_algebraic_late);
+            NIR_PASS_V(nir[i], nir_opt_constant_folding);
+            NIR_PASS_V(nir[i], nir_copy_prop);
+            NIR_PASS_V(nir[i], nir_opt_dce);
+            NIR_PASS_V(nir[i], nir_opt_cse);
+         }
+
+         if (nir[i]->info.bit_sizes_int & (8 | 16)) {
+            if (device->physical_device->rad_info.chip_class >= GFX8) {
+               nir_convert_to_lcssa(nir[i], true, true);
+               nir_divergence_analysis(nir[i]);
+            }
+
+            if (nir_lower_bit_size(nir[i], lower_bit_size_callback, device)) {
+               // TODO: lower idiv beforehand
+               if (nir_lower_idiv(nir[i], nir_lower_idiv_precise))
+                  NIR_PASS_V(nir[i], nir_opt_algebraic_late); /* needed for removing ineg again */
+               NIR_PASS_V(nir[i], nir_opt_constant_folding);
+               NIR_PASS_V(nir[i], nir_opt_dce);
+            }
+
+            if (device->physical_device->rad_info.chip_class >= GFX8)
+               nir_opt_remove_phis(nir[i]); /* cleanup LCSSA phis */
+            if (device->physical_device->rad_info.chip_class >= GFX9)
+               NIR_PASS_V(nir[i], nir_opt_vectorize, opt_vectorize_callback, NULL);
+         }
+
+         /* cleanup passes */
+         nir_lower_load_const_to_scalar(nir[i]);
+         nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo |
+                                      nir_move_load_input | nir_move_comparisons | nir_move_copies;
+         nir_opt_sink(nir[i], move_opts | nir_move_load_ssbo);
+         nir_opt_move(nir[i], move_opts);
+
+         radv_stop_feedback(stage_feedbacks[i], false);
+      }
+   }
+
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (radv_can_dump_shader(device, modules[i], false))
+         nir_print_shader(nir[i], stderr);
+   }
+
+   radv_fill_shader_keys(device, keys, pipeline_key, nir);
+
+   radv_fill_shader_info(pipeline, pStages, keys, infos, nir);
+
+   if ((nir[MESA_SHADER_VERTEX] && keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) ||
+       (nir[MESA_SHADER_TESS_EVAL] && keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg)) {
+      struct gfx10_ngg_info *ngg_info;
+
+      if (nir[MESA_SHADER_GEOMETRY])
+         ngg_info = &infos[MESA_SHADER_GEOMETRY].ngg_info;
+      else if (nir[MESA_SHADER_TESS_CTRL])
+         ngg_info = &infos[MESA_SHADER_TESS_EVAL].ngg_info;
+      else
+         ngg_info = &infos[MESA_SHADER_VERTEX].ngg_info;
+
+      gfx10_get_ngg_info(pipeline_key, pipeline, nir, infos, ngg_info);
+   } else if (nir[MESA_SHADER_GEOMETRY]) {
+      struct gfx9_gs_info *gs_info = &infos[MESA_SHADER_GEOMETRY].gs_ring_info;
+
+      gfx9_get_gs_info(pipeline_key, pipeline, nir, infos, gs_info);
+   }
+
+   if (modules[MESA_SHADER_GEOMETRY]) {
+      struct radv_shader_binary *gs_copy_binary = NULL;
+      if (!pipeline->gs_copy_shader && !radv_pipeline_has_ngg(pipeline)) {
+         struct radv_shader_info info = {0};
+         struct radv_shader_variant_key key = {0};
+
+         key.has_multiview_view_index = keys[MESA_SHADER_GEOMETRY].has_multiview_view_index;
+
+         radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY], pipeline->layout, &key, &info);
+         info.wave_size = 64; /* Wave32 not supported. */
+         info.ballot_bit_size = 64;
+
+         pipeline->gs_copy_shader = radv_create_gs_copy_shader(
+            device, nir[MESA_SHADER_GEOMETRY], &info, &gs_copy_binary, keep_executable_info,
+            keep_statistic_info, keys[MESA_SHADER_GEOMETRY].has_multiview_view_index,
+            disable_optimizations);
+      }
+
+      if (!keep_executable_info && pipeline->gs_copy_shader) {
+         struct radv_shader_binary *gs_binaries[MESA_SHADER_STAGES] = {NULL};
+         struct radv_shader_variant *gs_variants[MESA_SHADER_STAGES] = {0};
+
+         gs_binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary;
+         gs_variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
+
+         radv_pipeline_cache_insert_shaders(device, cache, gs_copy_hash, gs_variants, gs_binaries);
+      }
+      free(gs_copy_binary);
+   }
+
+   if (nir[MESA_SHADER_FRAGMENT]) {
+      if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
+         radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
+
+         pipeline->shaders[MESA_SHADER_FRAGMENT] = radv_shader_variant_compile(
+            device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1, pipeline->layout,
+            keys + MESA_SHADER_FRAGMENT, infos + MESA_SHADER_FRAGMENT, keep_executable_info,
+            keep_statistic_info, disable_optimizations, &binaries[MESA_SHADER_FRAGMENT]);
+
+         radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
+      }
+   }
+
+   if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_TESS_CTRL]) {
+      if (!pipeline->shaders[MESA_SHADER_TESS_CTRL]) {
+         struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
+         struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
+         key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
+
+         radv_start_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL]);
+
+         pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(
+            device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2, pipeline->layout, &key,
+            &infos[MESA_SHADER_TESS_CTRL], keep_executable_info, keep_statistic_info,
+            disable_optimizations, &binaries[MESA_SHADER_TESS_CTRL]);
+
+         radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
+      }
+      modules[MESA_SHADER_VERTEX] = NULL;
+   }
+
+   if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) {
+      gl_shader_stage pre_stage =
+         modules[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+      if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) {
+         struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
+
+         radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]);
+
+         pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(
+            device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2, pipeline->layout,
+            &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
+            keep_statistic_info, disable_optimizations, &binaries[MESA_SHADER_GEOMETRY]);
+
+         radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
+      }
+      modules[pre_stage] = NULL;
+   }
+
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (modules[i] && !pipeline->shaders[i]) {
+         radv_start_feedback(stage_feedbacks[i]);
+
+         pipeline->shaders[i] = radv_shader_variant_compile(
+            device, modules[i], &nir[i], 1, pipeline->layout, keys + i, infos + i,
+            keep_executable_info, keep_statistic_info, disable_optimizations, &binaries[i]);
+
+         radv_stop_feedback(stage_feedbacks[i], false);
+      }
+   }
+
+   if (!keep_executable_info) {
+      radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders, binaries);
+   }
+
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      free(binaries[i]);
+      if (nir[i]) {
+         ralloc_free(nir[i]);
+
+         if (radv_can_dump_shader_stats(device, modules[i])) {
+            radv_dump_shader_stats(device, pipeline, i, stderr);
+         }
+      }
+   }
+
+   if (fs_m.nir)
+      ralloc_free(fs_m.nir);
+
+   radv_stop_feedback(pipeline_feedback, false);
+   return VK_SUCCESS;
 }
 
 static uint32_t
-radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline,
-				   gl_shader_stage stage, enum chip_class chip_class)
+radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, gl_shader_stage stage,
+                                   enum chip_class chip_class)
 {
-	bool has_gs = radv_pipeline_has_gs(pipeline);
-	bool has_tess = radv_pipeline_has_tess(pipeline);
-	bool has_ngg = radv_pipeline_has_ngg(pipeline);
-
-	switch (stage) {
-	case MESA_SHADER_FRAGMENT:
-		return R_00B030_SPI_SHADER_USER_DATA_PS_0;
-	case MESA_SHADER_VERTEX:
-		if (has_tess) {
-			if (chip_class >= GFX10) {
-				return R_00B430_SPI_SHADER_USER_DATA_HS_0;
-			} else if (chip_class == GFX9) {
-				return R_00B430_SPI_SHADER_USER_DATA_LS_0;
-			} else {
-				return R_00B530_SPI_SHADER_USER_DATA_LS_0;
-			}
-
-		}
-
-		if (has_gs) {
-			if (chip_class >= GFX10) {
-				return R_00B230_SPI_SHADER_USER_DATA_GS_0;
-			} else {
-				return R_00B330_SPI_SHADER_USER_DATA_ES_0;
-			}
-		}
-
-		if (has_ngg)
-			return R_00B230_SPI_SHADER_USER_DATA_GS_0;
-
-		return R_00B130_SPI_SHADER_USER_DATA_VS_0;
-	case MESA_SHADER_GEOMETRY:
-		return chip_class == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
-		                            R_00B230_SPI_SHADER_USER_DATA_GS_0;
-	case MESA_SHADER_COMPUTE:
-		return R_00B900_COMPUTE_USER_DATA_0;
-	case MESA_SHADER_TESS_CTRL:
-		return chip_class == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
-		                            R_00B430_SPI_SHADER_USER_DATA_HS_0;
-	case MESA_SHADER_TESS_EVAL:
-		if (has_gs) {
-			return chip_class >= GFX10 ? R_00B230_SPI_SHADER_USER_DATA_GS_0 :
-						     R_00B330_SPI_SHADER_USER_DATA_ES_0;
-		} else if (has_ngg) {
-			return R_00B230_SPI_SHADER_USER_DATA_GS_0;
-		} else {
-			return R_00B130_SPI_SHADER_USER_DATA_VS_0;
-		}
-	default:
-		unreachable("unknown shader");
-	}
+   bool has_gs = radv_pipeline_has_gs(pipeline);
+   bool has_tess = radv_pipeline_has_tess(pipeline);
+   bool has_ngg = radv_pipeline_has_ngg(pipeline);
+
+   switch (stage) {
+   case MESA_SHADER_FRAGMENT:
+      return R_00B030_SPI_SHADER_USER_DATA_PS_0;
+   case MESA_SHADER_VERTEX:
+      if (has_tess) {
+         if (chip_class >= GFX10) {
+            return R_00B430_SPI_SHADER_USER_DATA_HS_0;
+         } else if (chip_class == GFX9) {
+            return R_00B430_SPI_SHADER_USER_DATA_LS_0;
+         } else {
+            return R_00B530_SPI_SHADER_USER_DATA_LS_0;
+         }
+      }
+
+      if (has_gs) {
+         if (chip_class >= GFX10) {
+            return R_00B230_SPI_SHADER_USER_DATA_GS_0;
+         } else {
+            return R_00B330_SPI_SHADER_USER_DATA_ES_0;
+         }
+      }
+
+      if (has_ngg)
+         return R_00B230_SPI_SHADER_USER_DATA_GS_0;
+
+      return R_00B130_SPI_SHADER_USER_DATA_VS_0;
+   case MESA_SHADER_GEOMETRY:
+      return chip_class == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0
+                                : R_00B230_SPI_SHADER_USER_DATA_GS_0;
+   case MESA_SHADER_COMPUTE:
+      return R_00B900_COMPUTE_USER_DATA_0;
+   case MESA_SHADER_TESS_CTRL:
+      return chip_class == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0
+                                : R_00B430_SPI_SHADER_USER_DATA_HS_0;
+   case MESA_SHADER_TESS_EVAL:
+      if (has_gs) {
+         return chip_class >= GFX10 ? R_00B230_SPI_SHADER_USER_DATA_GS_0
+                                    : R_00B330_SPI_SHADER_USER_DATA_ES_0;
+      } else if (has_ngg) {
+         return R_00B230_SPI_SHADER_USER_DATA_GS_0;
+      } else {
+         return R_00B130_SPI_SHADER_USER_DATA_VS_0;
+      }
+   default:
+      unreachable("unknown shader");
+   }
 }
 
 struct radv_bin_size_entry {
-	unsigned bpp;
-	VkExtent2D extent;
+   unsigned bpp;
+   VkExtent2D extent;
 };
 
 static VkExtent2D
-radv_gfx9_compute_bin_size(const struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
-{
-	static const struct radv_bin_size_entry color_size_table[][3][9] = {
-		{
-			/* One RB / SE */
-			{
-				/* One shader engine */
-				{        0, {128,  128}},
-				{        1, { 64,  128}},
-				{        2, { 32,  128}},
-				{        3, { 16,  128}},
-				{       17, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				/* Two shader engines */
-				{        0, {128,  128}},
-				{        2, { 64,  128}},
-				{        3, { 32,  128}},
-				{        5, { 16,  128}},
-				{       17, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				/* Four shader engines */
-				{        0, {128,  128}},
-				{        3, { 64,  128}},
-				{        5, { 16,  128}},
-				{       17, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-		},
-		{
-			/* Two RB / SE */
-			{
-				/* One shader engine */
-				{        0, {128,  128}},
-				{        2, { 64,  128}},
-				{        3, { 32,  128}},
-				{        5, { 16,  128}},
-				{       33, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				/* Two shader engines */
-				{        0, {128,  128}},
-				{        3, { 64,  128}},
-				{        5, { 32,  128}},
-				{        9, { 16,  128}},
-				{       33, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				/* Four shader engines */
-				{        0, {256,  256}},
-				{        2, {128,  256}},
-				{        3, {128,  128}},
-				{        5, { 64,  128}},
-				{        9, { 16,  128}},
-				{       33, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-		},
-		{
-			/* Four RB / SE */
-			{
-				/* One shader engine */
-				{        0, {128,  256}},
-				{        2, {128,  128}},
-				{        3, { 64,  128}},
-				{        5, { 32,  128}},
-				{        9, { 16,  128}},
-				{       33, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				/* Two shader engines */
-				{        0, {256,  256}},
-				{        2, {128,  256}},
-				{        3, {128,  128}},
-				{        5, { 64,  128}},
-				{        9, { 32,  128}},
-				{       17, { 16,  128}},
-				{       33, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				/* Four shader engines */
-				{        0, {256,  512}},
-				{        2, {256,  256}},
-				{        3, {128,  256}},
-				{        5, {128,  128}},
-				{        9, { 64,  128}},
-				{       17, { 16,  128}},
-				{       33, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-		},
-	};
-	static const struct radv_bin_size_entry ds_size_table[][3][9] = {
-		{
-			// One RB / SE
-			{
-				// One shader engine
-				{        0, {128,  256}},
-				{        2, {128,  128}},
-				{        4, { 64,  128}},
-				{        7, { 32,  128}},
-				{       13, { 16,  128}},
-				{       49, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				// Two shader engines
-				{        0, {256,  256}},
-				{        2, {128,  256}},
-				{        4, {128,  128}},
-				{        7, { 64,  128}},
-				{       13, { 32,  128}},
-				{       25, { 16,  128}},
-				{       49, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				// Four shader engines
-				{        0, {256,  512}},
-				{        2, {256,  256}},
-				{        4, {128,  256}},
-				{        7, {128,  128}},
-				{       13, { 64,  128}},
-				{       25, { 16,  128}},
-				{       49, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-		},
-		{
-			// Two RB / SE
-			{
-				// One shader engine
-				{        0, {256,  256}},
-				{        2, {128,  256}},
-				{        4, {128,  128}},
-				{        7, { 64,  128}},
-				{       13, { 32,  128}},
-				{       25, { 16,  128}},
-				{       97, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				// Two shader engines
-				{        0, {256,  512}},
-				{        2, {256,  256}},
-				{        4, {128,  256}},
-				{        7, {128,  128}},
-				{       13, { 64,  128}},
-				{       25, { 32,  128}},
-				{       49, { 16,  128}},
-				{       97, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				// Four shader engines
-				{        0, {512,  512}},
-				{        2, {256,  512}},
-				{        4, {256,  256}},
-				{        7, {128,  256}},
-				{       13, {128,  128}},
-				{       25, { 64,  128}},
-				{       49, { 16,  128}},
-				{       97, {  0,    0}},
-				{ UINT_MAX, {  0,    0}},
-			},
-		},
-		{
-			// Four RB / SE
-			{
-				// One shader engine
-				{        0, {256,  512}},
-				{        2, {256,  256}},
-				{        4, {128,  256}},
-				{        7, {128,  128}},
-				{       13, { 64,  128}},
-				{       25, { 32,  128}},
-				{       49, { 16,  128}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				// Two shader engines
-				{        0, {512,  512}},
-				{        2, {256,  512}},
-				{        4, {256,  256}},
-				{        7, {128,  256}},
-				{       13, {128,  128}},
-				{       25, { 64,  128}},
-				{       49, { 32,  128}},
-				{       97, { 16,  128}},
-				{ UINT_MAX, {  0,    0}},
-			},
-			{
-				// Four shader engines
-				{        0, {512,  512}},
-				{        4, {256,  512}},
-				{        7, {256,  256}},
-				{       13, {128,  256}},
-				{       25, {128,  128}},
-				{       49, { 64,  128}},
-				{       97, { 16,  128}},
-				{ UINT_MAX, {  0,    0}},
-			},
-		},
-	};
-
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	VkExtent2D extent = {512, 512};
-
-	unsigned log_num_rb_per_se =
-	    util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_render_backends /
-	                       pipeline->device->physical_device->rad_info.max_se);
-	unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se);
-
-	unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
-	unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa);
-	unsigned effective_samples = total_samples;
-	unsigned color_bytes_per_pixel = 0;
-
-	const VkPipelineColorBlendStateCreateInfo *vkblend =
-		radv_pipeline_get_color_blend_state(pCreateInfo);
-	if (vkblend) {
-		for (unsigned i = 0; i < subpass->color_count; i++) {
-			if (!vkblend->pAttachments[i].colorWriteMask)
-				continue;
-
-			if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
-				continue;
-
-			VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
-			color_bytes_per_pixel += vk_format_get_blocksize(format);
-		}
-
-		/* MSAA images typically don't use all samples all the time. */
-		if (effective_samples >= 2 && ps_iter_samples <= 1)
-			effective_samples = 2;
-		color_bytes_per_pixel *= effective_samples;
-	}
-
-	const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se];
-	while(color_entry[1].bpp <= color_bytes_per_pixel)
-		++color_entry;
-
-	extent = color_entry->extent;
-
-	if (subpass->depth_stencil_attachment) {
-		struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
-
-		/* Coefficients taken from AMDVLK */
-		unsigned depth_coeff = vk_format_has_depth(attachment->format) ? 5 : 0;
-		unsigned stencil_coeff = vk_format_has_stencil(attachment->format) ? 1 : 0;
-		unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples;
-
-		const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se];
-		while(ds_entry[1].bpp <= ds_bytes_per_pixel)
-			++ds_entry;
-
-		if (ds_entry->extent.width * ds_entry->extent.height < extent.width * extent.height)
-			extent = ds_entry->extent;
-	}
-
-	return extent;
-}
-
-static VkExtent2D
-radv_gfx10_compute_bin_size(const struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
+radv_gfx9_compute_bin_size(const struct radv_pipeline *pipeline,
+                           const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	VkExtent2D extent = {512, 512};
-
-	const unsigned db_tag_size = 64;
-	const unsigned db_tag_count = 312;
-	const unsigned color_tag_size = 1024;
-	const unsigned color_tag_count = 31;
-	const unsigned fmask_tag_size = 256;
-	const unsigned fmask_tag_count = 44;
-
-	const unsigned rb_count = pipeline->device->physical_device->rad_info.max_render_backends;
-	const unsigned pipe_count = MAX2(rb_count, pipeline->device->physical_device->rad_info.num_tcc_blocks);
+   static const struct radv_bin_size_entry color_size_table[][3][9] = {
+      {
+         /* One RB / SE */
+         {
+            /* One shader engine */
+            {0, {128, 128}},
+            {1, {64, 128}},
+            {2, {32, 128}},
+            {3, {16, 128}},
+            {17, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            /* Two shader engines */
+            {0, {128, 128}},
+            {2, {64, 128}},
+            {3, {32, 128}},
+            {5, {16, 128}},
+            {17, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            /* Four shader engines */
+            {0, {128, 128}},
+            {3, {64, 128}},
+            {5, {16, 128}},
+            {17, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+      },
+      {
+         /* Two RB / SE */
+         {
+            /* One shader engine */
+            {0, {128, 128}},
+            {2, {64, 128}},
+            {3, {32, 128}},
+            {5, {16, 128}},
+            {33, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            /* Two shader engines */
+            {0, {128, 128}},
+            {3, {64, 128}},
+            {5, {32, 128}},
+            {9, {16, 128}},
+            {33, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            /* Four shader engines */
+            {0, {256, 256}},
+            {2, {128, 256}},
+            {3, {128, 128}},
+            {5, {64, 128}},
+            {9, {16, 128}},
+            {33, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+      },
+      {
+         /* Four RB / SE */
+         {
+            /* One shader engine */
+            {0, {128, 256}},
+            {2, {128, 128}},
+            {3, {64, 128}},
+            {5, {32, 128}},
+            {9, {16, 128}},
+            {33, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            /* Two shader engines */
+            {0, {256, 256}},
+            {2, {128, 256}},
+            {3, {128, 128}},
+            {5, {64, 128}},
+            {9, {32, 128}},
+            {17, {16, 128}},
+            {33, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            /* Four shader engines */
+            {0, {256, 512}},
+            {2, {256, 256}},
+            {3, {128, 256}},
+            {5, {128, 128}},
+            {9, {64, 128}},
+            {17, {16, 128}},
+            {33, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+      },
+   };
+   static const struct radv_bin_size_entry ds_size_table[][3][9] = {
+      {
+         // One RB / SE
+         {
+            // One shader engine
+            {0, {128, 256}},
+            {2, {128, 128}},
+            {4, {64, 128}},
+            {7, {32, 128}},
+            {13, {16, 128}},
+            {49, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            // Two shader engines
+            {0, {256, 256}},
+            {2, {128, 256}},
+            {4, {128, 128}},
+            {7, {64, 128}},
+            {13, {32, 128}},
+            {25, {16, 128}},
+            {49, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            // Four shader engines
+            {0, {256, 512}},
+            {2, {256, 256}},
+            {4, {128, 256}},
+            {7, {128, 128}},
+            {13, {64, 128}},
+            {25, {16, 128}},
+            {49, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+      },
+      {
+         // Two RB / SE
+         {
+            // One shader engine
+            {0, {256, 256}},
+            {2, {128, 256}},
+            {4, {128, 128}},
+            {7, {64, 128}},
+            {13, {32, 128}},
+            {25, {16, 128}},
+            {97, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            // Two shader engines
+            {0, {256, 512}},
+            {2, {256, 256}},
+            {4, {128, 256}},
+            {7, {128, 128}},
+            {13, {64, 128}},
+            {25, {32, 128}},
+            {49, {16, 128}},
+            {97, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            // Four shader engines
+            {0, {512, 512}},
+            {2, {256, 512}},
+            {4, {256, 256}},
+            {7, {128, 256}},
+            {13, {128, 128}},
+            {25, {64, 128}},
+            {49, {16, 128}},
+            {97, {0, 0}},
+            {UINT_MAX, {0, 0}},
+         },
+      },
+      {
+         // Four RB / SE
+         {
+            // One shader engine
+            {0, {256, 512}},
+            {2, {256, 256}},
+            {4, {128, 256}},
+            {7, {128, 128}},
+            {13, {64, 128}},
+            {25, {32, 128}},
+            {49, {16, 128}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            // Two shader engines
+            {0, {512, 512}},
+            {2, {256, 512}},
+            {4, {256, 256}},
+            {7, {128, 256}},
+            {13, {128, 128}},
+            {25, {64, 128}},
+            {49, {32, 128}},
+            {97, {16, 128}},
+            {UINT_MAX, {0, 0}},
+         },
+         {
+            // Four shader engines
+            {0, {512, 512}},
+            {4, {256, 512}},
+            {7, {256, 256}},
+            {13, {128, 256}},
+            {25, {128, 128}},
+            {49, {64, 128}},
+            {97, {16, 128}},
+            {UINT_MAX, {0, 0}},
+         },
+      },
+   };
+
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+   VkExtent2D extent = {512, 512};
+
+   unsigned log_num_rb_per_se =
+      util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_render_backends /
+                         pipeline->device->physical_device->rad_info.max_se);
+   unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se);
+
+   unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
+   unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa);
+   unsigned effective_samples = total_samples;
+   unsigned color_bytes_per_pixel = 0;
+
+   const VkPipelineColorBlendStateCreateInfo *vkblend =
+      radv_pipeline_get_color_blend_state(pCreateInfo);
+   if (vkblend) {
+      for (unsigned i = 0; i < subpass->color_count; i++) {
+         if (!vkblend->pAttachments[i].colorWriteMask)
+            continue;
+
+         if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+            continue;
+
+         VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
+         color_bytes_per_pixel += vk_format_get_blocksize(format);
+      }
+
+      /* MSAA images typically don't use all samples all the time. */
+      if (effective_samples >= 2 && ps_iter_samples <= 1)
+         effective_samples = 2;
+      color_bytes_per_pixel *= effective_samples;
+   }
 
-	const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count;
-	const unsigned color_tag_part = (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count;
-	const unsigned fmask_tag_part = (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count;
+   const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se];
+   while (color_entry[1].bpp <= color_bytes_per_pixel)
+      ++color_entry;
 
-	const unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
-	const unsigned samples_log = util_logbase2_ceil(total_samples);
+   extent = color_entry->extent;
 
-	unsigned color_bytes_per_pixel = 0;
-	unsigned fmask_bytes_per_pixel = 0;
+   if (subpass->depth_stencil_attachment) {
+      struct radv_render_pass_attachment *attachment =
+         pass->attachments + subpass->depth_stencil_attachment->attachment;
 
-	const VkPipelineColorBlendStateCreateInfo *vkblend =
-		radv_pipeline_get_color_blend_state(pCreateInfo);
-	if (vkblend) {
-		for (unsigned i = 0; i < subpass->color_count; i++) {
-			if (!vkblend->pAttachments[i].colorWriteMask)
-				continue;
+      /* Coefficients taken from AMDVLK */
+      unsigned depth_coeff = vk_format_has_depth(attachment->format) ? 5 : 0;
+      unsigned stencil_coeff = vk_format_has_stencil(attachment->format) ? 1 : 0;
+      unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples;
 
-			if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
-				continue;
+      const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se];
+      while (ds_entry[1].bpp <= ds_bytes_per_pixel)
+         ++ds_entry;
 
-			VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
-			color_bytes_per_pixel += vk_format_get_blocksize(format);
+      if (ds_entry->extent.width * ds_entry->extent.height < extent.width * extent.height)
+         extent = ds_entry->extent;
+   }
 
-			if (total_samples > 1) {
-				assert(samples_log <= 3);
-				const unsigned fmask_array[] = {0, 1, 1, 4};
-				fmask_bytes_per_pixel += fmask_array[samples_log];
-			}
-		}
+   return extent;
+}
 
-		color_bytes_per_pixel *= total_samples;
-	}
-	color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1);
+static VkExtent2D
+radv_gfx10_compute_bin_size(const struct radv_pipeline *pipeline,
+                            const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+   VkExtent2D extent = {512, 512};
+
+   const unsigned db_tag_size = 64;
+   const unsigned db_tag_count = 312;
+   const unsigned color_tag_size = 1024;
+   const unsigned color_tag_count = 31;
+   const unsigned fmask_tag_size = 256;
+   const unsigned fmask_tag_count = 44;
+
+   const unsigned rb_count = pipeline->device->physical_device->rad_info.max_render_backends;
+   const unsigned pipe_count =
+      MAX2(rb_count, pipeline->device->physical_device->rad_info.num_tcc_blocks);
+
+   const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count;
+   const unsigned color_tag_part =
+      (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count;
+   const unsigned fmask_tag_part =
+      (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count;
+
+   const unsigned total_samples =
+      1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
+   const unsigned samples_log = util_logbase2_ceil(total_samples);
+
+   unsigned color_bytes_per_pixel = 0;
+   unsigned fmask_bytes_per_pixel = 0;
+
+   const VkPipelineColorBlendStateCreateInfo *vkblend =
+      radv_pipeline_get_color_blend_state(pCreateInfo);
+   if (vkblend) {
+      for (unsigned i = 0; i < subpass->color_count; i++) {
+         if (!vkblend->pAttachments[i].colorWriteMask)
+            continue;
+
+         if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+            continue;
+
+         VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
+         color_bytes_per_pixel += vk_format_get_blocksize(format);
+
+         if (total_samples > 1) {
+            assert(samples_log <= 3);
+            const unsigned fmask_array[] = {0, 1, 1, 4};
+            fmask_bytes_per_pixel += fmask_array[samples_log];
+         }
+      }
+
+      color_bytes_per_pixel *= total_samples;
+   }
+   color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1);
 
-	const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel);
-	extent.width = 1ull << ((color_pixel_count_log + 1) / 2);
-	extent.height = 1ull << (color_pixel_count_log / 2);
+   const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel);
+   extent.width = 1ull << ((color_pixel_count_log + 1) / 2);
+   extent.height = 1ull << (color_pixel_count_log / 2);
 
-	if (fmask_bytes_per_pixel) {
-		const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel);
+   if (fmask_bytes_per_pixel) {
+      const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel);
 
-		const VkExtent2D fmask_extent = (VkExtent2D){
-			.width = 1ull << ((fmask_pixel_count_log + 1) / 2),
-			.height = 1ull << (color_pixel_count_log / 2)
-		};
+      const VkExtent2D fmask_extent =
+         (VkExtent2D){.width = 1ull << ((fmask_pixel_count_log + 1) / 2),
+                      .height = 1ull << (color_pixel_count_log / 2)};
 
-		if (fmask_extent.width * fmask_extent.height < extent.width * extent.height)
-		    extent = fmask_extent;
-	}
+      if (fmask_extent.width * fmask_extent.height < extent.width * extent.height)
+         extent = fmask_extent;
+   }
 
-	if (subpass->depth_stencil_attachment) {
-		struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
+   if (subpass->depth_stencil_attachment) {
+      struct radv_render_pass_attachment *attachment =
+         pass->attachments + subpass->depth_stencil_attachment->attachment;
 
-		/* Coefficients taken from AMDVLK */
-		unsigned depth_coeff = vk_format_has_depth(attachment->format) ? 5 : 0;
-		unsigned stencil_coeff = vk_format_has_stencil(attachment->format) ? 1 : 0;
-		unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples;
+      /* Coefficients taken from AMDVLK */
+      unsigned depth_coeff = vk_format_has_depth(attachment->format) ? 5 : 0;
+      unsigned stencil_coeff = vk_format_has_stencil(attachment->format) ? 1 : 0;
+      unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples;
 
-		const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel);
+      const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel);
 
-		const VkExtent2D db_extent = (VkExtent2D){
-			.width = 1ull << ((db_pixel_count_log + 1) / 2),
-			.height = 1ull << (color_pixel_count_log / 2)
-		};
+      const VkExtent2D db_extent = (VkExtent2D){.width = 1ull << ((db_pixel_count_log + 1) / 2),
+                                                .height = 1ull << (color_pixel_count_log / 2)};
 
-		if (db_extent.width * db_extent.height < extent.width * extent.height)
-		    extent = db_extent;
-	}
+      if (db_extent.width * db_extent.height < extent.width * extent.height)
+         extent = db_extent;
+   }
 
-	extent.width = MAX2(extent.width, 128);
-	extent.height = MAX2(extent.width, 64);
+   extent.width = MAX2(extent.width, 128);
+   extent.height = MAX2(extent.width, 64);
 
-	return extent;
+   return extent;
 }
 
 static void
 radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
-					  const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                                          const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	uint32_t pa_sc_binner_cntl_0 =
-	                S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
-	                S_028C44_DISABLE_START_OF_PRIM(1);
-	uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
-		RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-		struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-		const VkPipelineColorBlendStateCreateInfo *vkblend =
-			radv_pipeline_get_color_blend_state(pCreateInfo);
-		unsigned min_bytes_per_pixel = 0;
-
-		if (vkblend) {
-			for (unsigned i = 0; i < subpass->color_count; i++) {
-				if (!vkblend->pAttachments[i].colorWriteMask)
-					continue;
-
-				if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
-					continue;
-
-				VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
-				unsigned bytes = vk_format_get_blocksize(format);
-				if (!min_bytes_per_pixel || bytes < min_bytes_per_pixel)
-					min_bytes_per_pixel = bytes;
-			}
-		}
-
-		pa_sc_binner_cntl_0 =
-			S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) |
-			S_028C44_BIN_SIZE_X(0) |
-			S_028C44_BIN_SIZE_Y(0) |
-			S_028C44_BIN_SIZE_X_EXTEND(2) | /* 128 */
-			S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */
-			S_028C44_DISABLE_START_OF_PRIM(1);
-	}
-
-	pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
-	pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
+   uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+                                  S_028C44_DISABLE_START_OF_PRIM(1);
+   uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+      RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+      struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+      const VkPipelineColorBlendStateCreateInfo *vkblend =
+         radv_pipeline_get_color_blend_state(pCreateInfo);
+      unsigned min_bytes_per_pixel = 0;
+
+      if (vkblend) {
+         for (unsigned i = 0; i < subpass->color_count; i++) {
+            if (!vkblend->pAttachments[i].colorWriteMask)
+               continue;
+
+            if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+               continue;
+
+            VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
+            unsigned bytes = vk_format_get_blocksize(format);
+            if (!min_bytes_per_pixel || bytes < min_bytes_per_pixel)
+               min_bytes_per_pixel = bytes;
+         }
+      }
+
+      pa_sc_binner_cntl_0 =
+         S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) | S_028C44_BIN_SIZE_X(0) |
+         S_028C44_BIN_SIZE_Y(0) | S_028C44_BIN_SIZE_X_EXTEND(2) |       /* 128 */
+         S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */
+         S_028C44_DISABLE_START_OF_PRIM(1);
+   }
+
+   pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
+   pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
 }
 
 struct radv_binning_settings
 radv_get_binning_settings(const struct radv_physical_device *pdev)
 {
-	struct radv_binning_settings settings;
-	if (pdev->rad_info.has_dedicated_vram) {
-		if (pdev->rad_info.max_render_backends > 4) {
-			settings.context_states_per_bin = 1;
-			settings.persistent_states_per_bin = 1;
-		} else {
-			settings.context_states_per_bin = 3;
-			settings.persistent_states_per_bin = 8;
-		}
-		settings.fpovs_per_batch = 63;
-	} else {
-		/* The context states are affected by the scissor bug. */
-		settings.context_states_per_bin = 6;
-		/* 32 causes hangs for RAVEN. */
-		settings.persistent_states_per_bin = 16;
-		settings.fpovs_per_batch = 63;
-	}
-
-	if (pdev->rad_info.has_gfx9_scissor_bug)
-		settings.context_states_per_bin = 1;
-
-	return settings;
+   struct radv_binning_settings settings;
+   if (pdev->rad_info.has_dedicated_vram) {
+      if (pdev->rad_info.max_render_backends > 4) {
+         settings.context_states_per_bin = 1;
+         settings.persistent_states_per_bin = 1;
+      } else {
+         settings.context_states_per_bin = 3;
+         settings.persistent_states_per_bin = 8;
+      }
+      settings.fpovs_per_batch = 63;
+   } else {
+      /* The context states are affected by the scissor bug. */
+      settings.context_states_per_bin = 6;
+      /* 32 causes hangs for RAVEN. */
+      settings.persistent_states_per_bin = 16;
+      settings.fpovs_per_batch = 63;
+   }
+
+   if (pdev->rad_info.has_gfx9_scissor_bug)
+      settings.context_states_per_bin = 1;
+
+   return settings;
 }
 
 static void
 radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
-				 const VkGraphicsPipelineCreateInfo *pCreateInfo,
-				 const struct radv_blend_state *blend)
+                                 const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                                 const struct radv_blend_state *blend)
 {
-	if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
-		return;
-
-	VkExtent2D bin_size;
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
-		bin_size = radv_gfx10_compute_bin_size(pipeline, pCreateInfo);
-	} else if (pipeline->device->physical_device->rad_info.chip_class == GFX9) {
-		bin_size = radv_gfx9_compute_bin_size(pipeline, pCreateInfo);
-	} else
-		unreachable("Unhandled generation for binning bin size calculation");
-
-	if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) {
-		struct radv_binning_settings settings =
-			radv_get_binning_settings(pipeline->device->physical_device);
-
-		bool disable_start_of_prim = true;
-		uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
-
-		const struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-
-		if (pipeline->device->dfsm_allowed && ps &&
-		    !ps->info.ps.can_discard &&
-		    !ps->info.ps.writes_memory &&
-		    blend->cb_target_enabled_4bit) {
-			db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_AUTO);
-			disable_start_of_prim = (blend->blend_enable_4bit & blend->cb_target_enabled_4bit) != 0;
-		}
-
-		const uint32_t pa_sc_binner_cntl_0 =
-	                S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
-	                S_028C44_BIN_SIZE_X(bin_size.width == 16) |
-	                S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
-	                S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
-	                S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
-	                S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
-	                S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
-	                S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
-	                S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) |
-	                S_028C44_OPTIMAL_BIN_SELECTION(1);
-
-		pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
-		pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
-	} else
-		radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo);
+   if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
+      return;
+
+   VkExtent2D bin_size;
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+      bin_size = radv_gfx10_compute_bin_size(pipeline, pCreateInfo);
+   } else if (pipeline->device->physical_device->rad_info.chip_class == GFX9) {
+      bin_size = radv_gfx9_compute_bin_size(pipeline, pCreateInfo);
+   } else
+      unreachable("Unhandled generation for binning bin size calculation");
+
+   if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) {
+      struct radv_binning_settings settings =
+         radv_get_binning_settings(pipeline->device->physical_device);
+
+      bool disable_start_of_prim = true;
+      uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
+
+      const struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+
+      if (pipeline->device->dfsm_allowed && ps && !ps->info.ps.can_discard &&
+          !ps->info.ps.writes_memory && blend->cb_target_enabled_4bit) {
+         db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_AUTO);
+         disable_start_of_prim = (blend->blend_enable_4bit & blend->cb_target_enabled_4bit) != 0;
+      }
+
+      const uint32_t pa_sc_binner_cntl_0 =
+         S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
+         S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
+         S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
+         S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
+         S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
+         S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
+         S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
+         S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1);
+
+      pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
+      pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
+   } else
+      radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo);
 }
 
-
 static void
 radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
                                            const struct radv_pipeline *pipeline,
                                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
                                            const struct radv_graphics_pipeline_create_info *extra)
 {
-	const VkPipelineDepthStencilStateCreateInfo *vkds = radv_pipeline_get_depth_stencil_state(pCreateInfo);
-	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
-	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-	struct radv_render_pass_attachment *attachment = NULL;
-	uint32_t db_render_control = 0, db_render_override2 = 0;
-	uint32_t db_render_override = 0;
-
-	if (subpass->depth_stencil_attachment)
-		attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
-
-	bool has_depth_attachment = attachment && vk_format_has_depth(attachment->format);
-
-	if (vkds && has_depth_attachment) {
-		/* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
-		db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2);
-
-		if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
-			db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE(1);
-	}
-
-	if (attachment && extra) {
-		db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
-		db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
-
-		db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->resummarize_enable);
-		db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable);
-		db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable);
-		db_render_override2 |= S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(extra->db_depth_disable_expclear);
-		db_render_override2 |= S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(extra->db_stencil_disable_expclear);
-	}
-
-	db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
-			      S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
-
-	if (!pCreateInfo->pRasterizationState->depthClampEnable &&
-	    ps->info.ps.writes_z) {
-		/* From VK_EXT_depth_range_unrestricted spec:
-		 *
-		 * "The behavior described in Primitive Clipping still applies.
-		 *  If depth clamping is disabled the depth values are still
-		 *  clipped to 0 ≤ zc ≤ wc before the viewport transform. If
-		 *  depth clamping is enabled the above equation is ignored and
-		 *  the depth values are instead clamped to the VkViewport
-		 *  minDepth and maxDepth values, which in the case of this
-		 *  extension can be outside of the 0.0 to 1.0 range."
-		 */
-		db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1);
-	}
-
-	radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, db_render_control);
-	radeon_set_context_reg(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
-	radeon_set_context_reg(ctx_cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2);
+   const VkPipelineDepthStencilStateCreateInfo *vkds =
+      radv_pipeline_get_depth_stencil_state(pCreateInfo);
+   RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+   struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+   struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+   struct radv_render_pass_attachment *attachment = NULL;
+   uint32_t db_render_control = 0, db_render_override2 = 0;
+   uint32_t db_render_override = 0;
+
+   if (subpass->depth_stencil_attachment)
+      attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
+
+   bool has_depth_attachment = attachment && vk_format_has_depth(attachment->format);
+
+   if (vkds && has_depth_attachment) {
+      /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
+      db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2);
+
+      if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
+         db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE(1);
+   }
+
+   if (attachment && extra) {
+      db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
+      db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
+
+      db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->resummarize_enable);
+      db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable);
+      db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable);
+      db_render_override2 |=
+         S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(extra->db_depth_disable_expclear);
+      db_render_override2 |=
+         S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(extra->db_stencil_disable_expclear);
+   }
+
+   db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
+                         S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
+
+   if (!pCreateInfo->pRasterizationState->depthClampEnable && ps->info.ps.writes_z) {
+      /* From VK_EXT_depth_range_unrestricted spec:
+       *
+       * "The behavior described in Primitive Clipping still applies.
+       *  If depth clamping is disabled the depth values are still
+       *  clipped to 0 ≤ zc ≤ wc before the viewport transform. If
+       *  depth clamping is enabled the above equation is ignored and
+       *  the depth values are instead clamped to the VkViewport
+       *  minDepth and maxDepth values, which in the case of this
+       *  extension can be outside of the 0.0 to 1.0 range."
+       */
+      db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1);
+   }
+
+   radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, db_render_control);
+   radeon_set_context_reg(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
+   radeon_set_context_reg(ctx_cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2);
 }
 
 static void
@@ -4290,1083 +4146,1044 @@ radv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs,
                                    const struct radv_pipeline *pipeline,
                                    const struct radv_blend_state *blend)
 {
-	radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8);
-	radeon_emit_array(ctx_cs, blend->cb_blend_control,
-			  8);
-	radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
-	radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
+   radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8);
+   radeon_emit_array(ctx_cs, blend->cb_blend_control, 8);
+   radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
+   radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
 
-	if (pipeline->device->physical_device->rad_info.has_rbplus) {
+   if (pipeline->device->physical_device->rad_info.has_rbplus) {
 
-		radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8);
-		radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8);
-	}
+      radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8);
+      radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8);
+   }
 
-	radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
+   radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
 
-	radeon_set_context_reg(ctx_cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
-	radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
+   radeon_set_context_reg(ctx_cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
+   radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
 }
 
 static void
 radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs,
-				    const struct radv_pipeline *pipeline,
+                                    const struct radv_pipeline *pipeline,
                                     const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState;
-	const VkConservativeRasterizationModeEXT mode =
-		radv_get_conservative_raster_mode(vkraster);
-	uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
-	bool depth_clip_disable = vkraster->depthClampEnable;
-
-	const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
-		vk_find_struct_const(vkraster->pNext, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
-	if (depth_clip_state) {
-		depth_clip_disable = !depth_clip_state->depthClipEnable;
-	}
-
-	radeon_set_context_reg(ctx_cs, R_028810_PA_CL_CLIP_CNTL,
-	                       S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
-	                       S_028810_ZCLIP_NEAR_DISABLE(depth_clip_disable ? 1 : 0) |
-	                       S_028810_ZCLIP_FAR_DISABLE(depth_clip_disable ? 1 : 0) |
-	                       S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
-	                       S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
-
-	radeon_set_context_reg(ctx_cs, R_028BDC_PA_SC_LINE_CNTL,
-			       S_028BDC_DX10_DIAMOND_TEST_ENA(1));
-
-	/* Conservative rasterization. */
-	if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
-		pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) |
-					  S_028C4C_POSTZ_AA_MASK_ENABLE(1) |
-					  S_028C4C_CENTROID_SAMPLE_OVERRIDE(1);
-
-		if (mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) {
-			pa_sc_conservative_rast |=
-				S_028C4C_OVER_RAST_ENABLE(1) |
-				S_028C4C_OVER_RAST_SAMPLE_SELECT(0) |
-				S_028C4C_UNDER_RAST_ENABLE(0) |
-				S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) |
-				S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1);
-		} else {
-			assert(mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT);
-			pa_sc_conservative_rast |=
-				S_028C4C_OVER_RAST_ENABLE(0) |
-				S_028C4C_OVER_RAST_SAMPLE_SELECT(1) |
-				S_028C4C_UNDER_RAST_ENABLE(1) |
-				S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) |
-				S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0);
-		}
-	}
-
-	radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
-				   pa_sc_conservative_rast);
-}
+   const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState;
+   const VkConservativeRasterizationModeEXT mode = radv_get_conservative_raster_mode(vkraster);
+   uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
+   bool depth_clip_disable = vkraster->depthClampEnable;
+
+   const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
+      vk_find_struct_const(vkraster->pNext,
+                           PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
+   if (depth_clip_state) {
+      depth_clip_disable = !depth_clip_state->depthClipEnable;
+   }
+
+   radeon_set_context_reg(
+      ctx_cs, R_028810_PA_CL_CLIP_CNTL,
+      S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
+         S_028810_ZCLIP_NEAR_DISABLE(depth_clip_disable ? 1 : 0) |
+         S_028810_ZCLIP_FAR_DISABLE(depth_clip_disable ? 1 : 0) |
+         S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
+         S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
+
+   radeon_set_context_reg(ctx_cs, R_028BDC_PA_SC_LINE_CNTL, S_028BDC_DX10_DIAMOND_TEST_ENA(1));
+
+   /* Conservative rasterization. */
+   if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
+      pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) |
+                                S_028C4C_CENTROID_SAMPLE_OVERRIDE(1);
+
+      if (mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) {
+         pa_sc_conservative_rast |=
+            S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_OVER_RAST_SAMPLE_SELECT(0) |
+            S_028C4C_UNDER_RAST_ENABLE(0) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) |
+            S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1);
+      } else {
+         assert(mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT);
+         pa_sc_conservative_rast |=
+            S_028C4C_OVER_RAST_ENABLE(0) | S_028C4C_OVER_RAST_SAMPLE_SELECT(1) |
+            S_028C4C_UNDER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) |
+            S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0);
+      }
+   }
 
+   radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
+                          pa_sc_conservative_rast);
+}
 
 static void
 radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs,
                                          const struct radv_pipeline *pipeline)
 {
-	const struct radv_multisample_state *ms = &pipeline->graphics.ms;
-
-	radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
-	radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]);
-	radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]);
-
-	radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa);
-	radeon_set_context_reg(ctx_cs, R_028A48_PA_SC_MODE_CNTL_0, ms->pa_sc_mode_cntl_0);
-	radeon_set_context_reg(ctx_cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
-	radeon_set_context_reg(ctx_cs, R_028BE0_PA_SC_AA_CONFIG, ms->pa_sc_aa_config);
-
-	/* The exclusion bits can be set to improve rasterization efficiency
-	 * if no sample lies on the pixel boundary (-8 sample offset). It's
-	 * currently always TRUE because the driver doesn't support 16 samples.
-	 */
-	bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7;
-	radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
-			       S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
-			       S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
-
-	/* GFX9: Flush DFSM when the AA mode changes. */
-	if (pipeline->device->dfsm_allowed) {
-		radeon_emit(ctx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(ctx_cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
-	}
+   const struct radv_multisample_state *ms = &pipeline->graphics.ms;
+
+   radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+   radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]);
+   radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]);
+
+   radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa);
+   radeon_set_context_reg(ctx_cs, R_028A48_PA_SC_MODE_CNTL_0, ms->pa_sc_mode_cntl_0);
+   radeon_set_context_reg(ctx_cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
+   radeon_set_context_reg(ctx_cs, R_028BE0_PA_SC_AA_CONFIG, ms->pa_sc_aa_config);
+
+   /* The exclusion bits can be set to improve rasterization efficiency
+    * if no sample lies on the pixel boundary (-8 sample offset). It's
+    * currently always TRUE because the driver doesn't support 16 samples.
+    */
+   bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7;
+   radeon_set_context_reg(
+      ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
+      S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
+
+   /* GFX9: Flush DFSM when the AA mode changes. */
+   if (pipeline->device->dfsm_allowed) {
+      radeon_emit(ctx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(ctx_cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+   }
 }
 
 static void
 radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs,
                                    const struct radv_pipeline *pipeline)
 {
-	const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
-	const struct radv_shader_variant *vs =
-		pipeline->shaders[MESA_SHADER_TESS_EVAL] ?
-		pipeline->shaders[MESA_SHADER_TESS_EVAL] :
-		pipeline->shaders[MESA_SHADER_VERTEX];
-	unsigned vgt_primitiveid_en = 0;
-	uint32_t vgt_gs_mode = 0;
-
-	if (radv_pipeline_has_ngg(pipeline))
-		return;
-
-	if (radv_pipeline_has_gs(pipeline)) {
-		const struct radv_shader_variant *gs =
-			pipeline->shaders[MESA_SHADER_GEOMETRY];
-
-		vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out,
-		                             pipeline->device->physical_device->rad_info.chip_class);
-	} else if (outinfo->export_prim_id || vs->info.uses_prim_id) {
-		vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
-		vgt_primitiveid_en |= S_028A84_PRIMITIVEID_EN(1);
-	}
-
-	radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en);
-	radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode);
+   const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
+   const struct radv_shader_variant *vs = pipeline->shaders[MESA_SHADER_TESS_EVAL]
+                                             ? pipeline->shaders[MESA_SHADER_TESS_EVAL]
+                                             : pipeline->shaders[MESA_SHADER_VERTEX];
+   unsigned vgt_primitiveid_en = 0;
+   uint32_t vgt_gs_mode = 0;
+
+   if (radv_pipeline_has_ngg(pipeline))
+      return;
+
+   if (radv_pipeline_has_gs(pipeline)) {
+      const struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+
+      vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out,
+                                   pipeline->device->physical_device->rad_info.chip_class);
+   } else if (outinfo->export_prim_id || vs->info.uses_prim_id) {
+      vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
+      vgt_primitiveid_en |= S_028A84_PRIMITIVEID_EN(1);
+   }
+
+   radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en);
+   radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode);
 }
 
 static void
-radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs,
-			     struct radeon_cmdbuf *cs,
-			     const struct radv_pipeline *pipeline,
-			     const struct radv_shader_variant *shader)
+radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+                             const struct radv_pipeline *pipeline,
+                             const struct radv_shader_variant *shader)
 {
-	uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-
-	radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
-	radeon_emit(cs, va >> 8);
-	radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
-	radeon_emit(cs, shader->config.rsrc1);
-	radeon_emit(cs, shader->config.rsrc2);
-
-	const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
-	unsigned clip_dist_mask, cull_dist_mask, total_mask;
-	clip_dist_mask = outinfo->clip_dist_mask;
-	cull_dist_mask = outinfo->cull_dist_mask;
-	total_mask = clip_dist_mask | cull_dist_mask;
-
-	bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
-					     pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
-	bool misc_vec_ena = outinfo->writes_pointsize ||
-		outinfo->writes_layer ||
-		outinfo->writes_viewport_index ||
-		writes_primitive_shading_rate;
-	unsigned spi_vs_out_config, nparams;
-
-	/* VS is required to export at least one param. */
-	nparams = MAX2(outinfo->param_exports, 1);
-	spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(nparams - 1);
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
-		spi_vs_out_config |= S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0);
-	}
-
-	radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, spi_vs_out_config);
-
-	radeon_set_context_reg(ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
-	                       S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
-	                       S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
-	                                                   V_02870C_SPI_SHADER_4COMP :
-	                                                   V_02870C_SPI_SHADER_NONE) |
-	                       S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
-	                                                   V_02870C_SPI_SHADER_4COMP :
-	                                                   V_02870C_SPI_SHADER_NONE) |
-	                       S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
-	                                                   V_02870C_SPI_SHADER_4COMP :
-	                                                   V_02870C_SPI_SHADER_NONE));
-
-	radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
-	                       S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
-	                       S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
-	                       S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
-			       S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
-	                       S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
-	                       S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
-	                       S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
-	                       S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
-	                       cull_dist_mask << 8 |
-	                       clip_dist_mask);
-
-	if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
-		radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF,
-		                       outinfo->writes_viewport_index);
+   uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+   radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
+   radeon_emit(cs, va >> 8);
+   radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
+   radeon_emit(cs, shader->config.rsrc1);
+   radeon_emit(cs, shader->config.rsrc2);
+
+   const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
+   unsigned clip_dist_mask, cull_dist_mask, total_mask;
+   clip_dist_mask = outinfo->clip_dist_mask;
+   cull_dist_mask = outinfo->cull_dist_mask;
+   total_mask = clip_dist_mask | cull_dist_mask;
+
+   bool writes_primitive_shading_rate =
+      outinfo->writes_primitive_shading_rate || pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
+   bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
+                       outinfo->writes_viewport_index || writes_primitive_shading_rate;
+   unsigned spi_vs_out_config, nparams;
+
+   /* VS is required to export at least one param. */
+   nparams = MAX2(outinfo->param_exports, 1);
+   spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(nparams - 1);
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+      spi_vs_out_config |= S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0);
+   }
+
+   radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, spi_vs_out_config);
+
+   radeon_set_context_reg(
+      ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
+      S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
+         S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP
+                                                              : V_02870C_SPI_SHADER_NONE) |
+         S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP
+                                                              : V_02870C_SPI_SHADER_NONE) |
+         S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP
+                                                              : V_02870C_SPI_SHADER_NONE));
+
+   radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
+                          S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
+                             S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
+                             S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
+                             S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
+                             S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+                             S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
+                             S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
+                             S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
+                             cull_dist_mask << 8 | clip_dist_mask);
+
+   if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
+      radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index);
 }
 
 static void
-radv_pipeline_generate_hw_es(struct radeon_cmdbuf *cs,
-			     const struct radv_pipeline *pipeline,
-			     const struct radv_shader_variant *shader)
+radv_pipeline_generate_hw_es(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline,
+                             const struct radv_shader_variant *shader)
 {
-	uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+   uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
 
-	radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
-	radeon_emit(cs, va >> 8);
-	radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
-	radeon_emit(cs, shader->config.rsrc1);
-	radeon_emit(cs, shader->config.rsrc2);
+   radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
+   radeon_emit(cs, va >> 8);
+   radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+   radeon_emit(cs, shader->config.rsrc1);
+   radeon_emit(cs, shader->config.rsrc2);
 }
 
 static void
-radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs,
-			     const struct radv_pipeline *pipeline,
-			     const struct radv_shader_variant *shader)
+radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline,
+                             const struct radv_shader_variant *shader)
 {
-	unsigned num_lds_blocks = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks;
-	uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-	uint32_t rsrc2 = shader->config.rsrc2;
-
-	radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
-	radeon_emit(cs, va >> 8);
-	radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
-
-	rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks);
-	if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
-	    pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
-		radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
-
-	radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
-	radeon_emit(cs, shader->config.rsrc1);
-	radeon_emit(cs, rsrc2);
+   unsigned num_lds_blocks = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks;
+   uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+   uint32_t rsrc2 = shader->config.rsrc2;
+
+   radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
+   radeon_emit(cs, va >> 8);
+   radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
+
+   rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks);
+   if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
+       pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
+      radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
+
+   radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+   radeon_emit(cs, shader->config.rsrc1);
+   radeon_emit(cs, rsrc2);
 }
 
 static void
-radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
-			      struct radeon_cmdbuf *cs,
-			      const struct radv_pipeline *pipeline,
-			      const struct radv_shader_variant *shader)
+radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+                              const struct radv_pipeline *pipeline,
+                              const struct radv_shader_variant *shader)
 {
-	uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-	gl_shader_stage es_type =
-		radv_pipeline_has_tess(pipeline) ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
-	struct radv_shader_variant *es =
-		es_type == MESA_SHADER_TESS_EVAL ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX];
-	const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
-
-	radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
-	radeon_emit(cs, va >> 8);
-	radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
-	radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
-	radeon_emit(cs, shader->config.rsrc1);
-	radeon_emit(cs, shader->config.rsrc2);
-
-	const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
-	unsigned clip_dist_mask, cull_dist_mask, total_mask;
-	clip_dist_mask = outinfo->clip_dist_mask;
-	cull_dist_mask = outinfo->cull_dist_mask;
-	total_mask = clip_dist_mask | cull_dist_mask;
-
-	bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
-					     pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
-	bool misc_vec_ena = outinfo->writes_pointsize ||
-		outinfo->writes_layer ||
-		outinfo->writes_viewport_index ||
-		writes_primitive_shading_rate;
-	bool es_enable_prim_id = outinfo->export_prim_id ||
-				 (es && es->info.uses_prim_id);
-	bool break_wave_at_eoi = false;
-	unsigned ge_cntl;
-	unsigned nparams;
-
-	if (es_type == MESA_SHADER_TESS_EVAL) {
-		struct radv_shader_variant *gs =
-			pipeline->shaders[MESA_SHADER_GEOMETRY];
-
-		if (es_enable_prim_id || (gs && gs->info.uses_prim_id))
-			break_wave_at_eoi = true;
-	}
-
-	nparams = MAX2(outinfo->param_exports, 1);
-	radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG,
-	                       S_0286C4_VS_EXPORT_COUNT(nparams - 1) |
-			       S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0));
-
-	radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT,
-			       S_028708_IDX0_EXPORT_FORMAT(V_028708_SPI_SHADER_1COMP));
-	radeon_set_context_reg(ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
-	                       S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
-	                       S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
-	                                                   V_02870C_SPI_SHADER_4COMP :
-	                                                   V_02870C_SPI_SHADER_NONE) |
-	                       S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
-	                                                   V_02870C_SPI_SHADER_4COMP :
-	                                                   V_02870C_SPI_SHADER_NONE) |
-	                       S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
-	                                                   V_02870C_SPI_SHADER_4COMP :
-	                                                   V_02870C_SPI_SHADER_NONE));
-
-	radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
-	                       S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
-	                       S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
-	                       S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
-			       S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
-	                       S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
-	                       S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
-	                       S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
-	                       S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
-	                       cull_dist_mask << 8 |
-	                       clip_dist_mask);
-
-	radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN,
-			       S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
-			       S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id));
-
-	radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-			       ngg_state->vgt_esgs_ring_itemsize);
-
-	/* NGG specific registers. */
-	struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
-	uint32_t gs_num_invocations = gs ? gs->info.gs.invocations : 1;
-
-	radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
-			       S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) |
-			       S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
-			       S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations));
-	radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
-			       S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts));
-	radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL,
-			       S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) |
-			       S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */
-	radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
-			       S_028B90_CNT(gs_num_invocations) |
-			       S_028B90_ENABLE(gs_num_invocations > 1) |
-			       S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance));
-
-	/* User edge flags are set by the pos exports. If user edge flags are
-	 * not used, we must use hw-generated edge flags and pass them via
-	 * the prim export to prevent drawing lines on internal edges of
-	 * decomposed primitives (such as quads) with polygon mode = lines.
-	 *
-	 * TODO: We should combine hw-generated edge flags with user edge
-	 *       flags in the shader.
-	 */
-	radeon_set_context_reg(ctx_cs, R_028838_PA_CL_NGG_CNTL,
-			       S_028838_INDEX_BUF_EDGE_FLAG_ENA(!radv_pipeline_has_tess(pipeline) &&
-			                                        !radv_pipeline_has_gs(pipeline)) |
-			       /* Reuse for NGG. */
-			       S_028838_VERTEX_REUSE_DEPTH(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 30 : 0));
-
-	ge_cntl = S_03096C_PRIM_GRP_SIZE(ngg_state->max_gsprims) |
-		  S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
-		  S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
-
-	/* Bug workaround for a possible hang with non-tessellation cases.
-	 * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0
-	 *
-	 * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
-	 */
-	if (pipeline->device->physical_device->rad_info.chip_class == GFX10 &&
-	    !radv_pipeline_has_tess(pipeline) &&
-	    ngg_state->hw_max_esverts != 256) {
-		ge_cntl &= C_03096C_VERT_GRP_SIZE;
-
-		if (ngg_state->hw_max_esverts > 5) {
-			ge_cntl |= S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts - 5);
-		}
-	}
-
-	radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL, ge_cntl);
+   uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+   gl_shader_stage es_type =
+      radv_pipeline_has_tess(pipeline) ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+   struct radv_shader_variant *es = es_type == MESA_SHADER_TESS_EVAL
+                                       ? pipeline->shaders[MESA_SHADER_TESS_EVAL]
+                                       : pipeline->shaders[MESA_SHADER_VERTEX];
+   const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
+
+   radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
+   radeon_emit(cs, va >> 8);
+   radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+   radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
+   radeon_emit(cs, shader->config.rsrc1);
+   radeon_emit(cs, shader->config.rsrc2);
+
+   const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
+   unsigned clip_dist_mask, cull_dist_mask, total_mask;
+   clip_dist_mask = outinfo->clip_dist_mask;
+   cull_dist_mask = outinfo->cull_dist_mask;
+   total_mask = clip_dist_mask | cull_dist_mask;
+
+   bool writes_primitive_shading_rate =
+      outinfo->writes_primitive_shading_rate || pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
+   bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
+                       outinfo->writes_viewport_index || writes_primitive_shading_rate;
+   bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id);
+   bool break_wave_at_eoi = false;
+   unsigned ge_cntl;
+   unsigned nparams;
+
+   if (es_type == MESA_SHADER_TESS_EVAL) {
+      struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+
+      if (es_enable_prim_id || (gs && gs->info.uses_prim_id))
+         break_wave_at_eoi = true;
+   }
+
+   nparams = MAX2(outinfo->param_exports, 1);
+   radeon_set_context_reg(
+      ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG,
+      S_0286C4_VS_EXPORT_COUNT(nparams - 1) | S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0));
+
+   radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT,
+                          S_028708_IDX0_EXPORT_FORMAT(V_028708_SPI_SHADER_1COMP));
+   radeon_set_context_reg(
+      ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
+      S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
+         S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP
+                                                              : V_02870C_SPI_SHADER_NONE) |
+         S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP
+                                                              : V_02870C_SPI_SHADER_NONE) |
+         S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP
+                                                              : V_02870C_SPI_SHADER_NONE));
+
+   radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
+                          S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
+                             S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
+                             S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
+                             S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
+                             S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+                             S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
+                             S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
+                             S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
+                             cull_dist_mask << 8 | clip_dist_mask);
+
+   radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN,
+                          S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
+                             S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id));
+
+   radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+                          ngg_state->vgt_esgs_ring_itemsize);
+
+   /* NGG specific registers. */
+   struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+   uint32_t gs_num_invocations = gs ? gs->info.gs.invocations : 1;
+
+   radeon_set_context_reg(
+      ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+      S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) |
+         S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
+         S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations));
+   radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
+                          S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts));
+   radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL,
+                          S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) |
+                             S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */
+   radeon_set_context_reg(
+      ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
+      S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) |
+         S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance));
+
+   /* User edge flags are set by the pos exports. If user edge flags are
+    * not used, we must use hw-generated edge flags and pass them via
+    * the prim export to prevent drawing lines on internal edges of
+    * decomposed primitives (such as quads) with polygon mode = lines.
+    *
+    * TODO: We should combine hw-generated edge flags with user edge
+    *       flags in the shader.
+    */
+   radeon_set_context_reg(
+      ctx_cs, R_028838_PA_CL_NGG_CNTL,
+      S_028838_INDEX_BUF_EDGE_FLAG_ENA(!radv_pipeline_has_tess(pipeline) &&
+                                       !radv_pipeline_has_gs(pipeline)) |
+         /* Reuse for NGG. */
+         S_028838_VERTEX_REUSE_DEPTH(
+            pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 30 : 0));
+
+   ge_cntl = S_03096C_PRIM_GRP_SIZE(ngg_state->max_gsprims) |
+             S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
+             S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
+
+   /* Bug workaround for a possible hang with non-tessellation cases.
+    * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0
+    *
+    * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
+    */
+   if (pipeline->device->physical_device->rad_info.chip_class == GFX10 &&
+       !radv_pipeline_has_tess(pipeline) && ngg_state->hw_max_esverts != 256) {
+      ge_cntl &= C_03096C_VERT_GRP_SIZE;
+
+      if (ngg_state->hw_max_esverts > 5) {
+         ge_cntl |= S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts - 5);
+      }
+   }
+
+   radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL, ge_cntl);
 }
 
 static void
-radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs,
-			     const struct radv_pipeline *pipeline,
-			     const struct radv_shader_variant *shader)
+radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline,
+                             const struct radv_shader_variant *shader)
 {
-	uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
-		if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
-			radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
-			radeon_emit(cs, va >> 8);
-			radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
-		} else {
-			radeon_set_sh_reg_seq(cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2);
-			radeon_emit(cs, va >> 8);
-			radeon_emit(cs, S_00B414_MEM_BASE(va >> 40));
-		}
-
-		radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
-		radeon_emit(cs, shader->config.rsrc1);
-		radeon_emit(cs, shader->config.rsrc2);
-	} else {
-		radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
-		radeon_emit(cs, va >> 8);
-		radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
-		radeon_emit(cs, shader->config.rsrc1);
-		radeon_emit(cs, shader->config.rsrc2);
-	}
+   uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
+      if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+         radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
+         radeon_emit(cs, va >> 8);
+         radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
+      } else {
+         radeon_set_sh_reg_seq(cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2);
+         radeon_emit(cs, va >> 8);
+         radeon_emit(cs, S_00B414_MEM_BASE(va >> 40));
+      }
+
+      radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
+      radeon_emit(cs, shader->config.rsrc1);
+      radeon_emit(cs, shader->config.rsrc2);
+   } else {
+      radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
+      radeon_emit(cs, va >> 8);
+      radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
+      radeon_emit(cs, shader->config.rsrc1);
+      radeon_emit(cs, shader->config.rsrc2);
+   }
 }
 
 static void
-radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs,
-				     struct radeon_cmdbuf *cs,
-				     const struct radv_pipeline *pipeline)
+radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+                                     const struct radv_pipeline *pipeline)
 {
-	struct radv_shader_variant *vs;
-
-	/* Skip shaders merged into HS/GS */
-	vs = pipeline->shaders[MESA_SHADER_VERTEX];
-	if (!vs)
-		return;
-
-	if (vs->info.vs.as_ls)
-		radv_pipeline_generate_hw_ls(cs, pipeline, vs);
-	else if (vs->info.vs.as_es)
-		radv_pipeline_generate_hw_es(cs, pipeline, vs);
-	else if (vs->info.is_ngg)
-		radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, vs);
-	else
-		radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, vs);
+   struct radv_shader_variant *vs;
+
+   /* Skip shaders merged into HS/GS */
+   vs = pipeline->shaders[MESA_SHADER_VERTEX];
+   if (!vs)
+      return;
+
+   if (vs->info.vs.as_ls)
+      radv_pipeline_generate_hw_ls(cs, pipeline, vs);
+   else if (vs->info.vs.as_es)
+      radv_pipeline_generate_hw_es(cs, pipeline, vs);
+   else if (vs->info.is_ngg)
+      radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, vs);
+   else
+      radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, vs);
 }
 
 static void
-radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs,
-				    struct radeon_cmdbuf *cs,
-				    const struct radv_pipeline *pipeline)
+radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+                                    const struct radv_pipeline *pipeline)
 {
-	struct radv_shader_variant *tes, *tcs;
-
-	tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL];
-	tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
-
-	if (tes) {
-		if (tes->info.is_ngg) {
-			radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, tes);
-		} else if (tes->info.tes.as_es)
-			radv_pipeline_generate_hw_es(cs, pipeline, tes);
-		else
-			radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, tes);
-	}
-
-	radv_pipeline_generate_hw_hs(cs, pipeline, tcs);
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
-	    !radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
-		radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
-		                       S_028A44_ES_VERTS_PER_SUBGRP(250) |
-		                       S_028A44_GS_PRIMS_PER_SUBGRP(126) |
-		                       S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
-	}
+   struct radv_shader_variant *tes, *tcs;
+
+   tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL];
+   tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+
+   if (tes) {
+      if (tes->info.is_ngg) {
+         radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, tes);
+      } else if (tes->info.tes.as_es)
+         radv_pipeline_generate_hw_es(cs, pipeline, tes);
+      else
+         radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, tes);
+   }
+
+   radv_pipeline_generate_hw_hs(cs, pipeline, tcs);
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
+       !radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
+      radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+                             S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_GS_PRIMS_PER_SUBGRP(126) |
+                                S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
+   }
 }
 
 static void
 radv_pipeline_generate_tess_state(struct radeon_cmdbuf *ctx_cs,
-				  const struct radv_pipeline *pipeline,
-				  const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                                  const struct radv_pipeline *pipeline,
+                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	struct radv_shader_variant *tes = radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL);
-	unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0;
-	unsigned num_tcs_input_cp, num_tcs_output_cp, num_patches;
-	unsigned ls_hs_config;
-
-	num_tcs_input_cp = pCreateInfo->pTessellationState->patchControlPoints;
-	num_tcs_output_cp = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; //TCS VERTICES OUT
-	num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
-
-	ls_hs_config = S_028B58_NUM_PATCHES(num_patches) |
-		       S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
-		       S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
-		radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG,
-					   2, ls_hs_config);
-	} else {
-		radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG,
-				       ls_hs_config);
-	}
-
-	switch (tes->info.tes.primitive_mode) {
-	case GL_TRIANGLES:
-		type = V_028B6C_TESS_TRIANGLE;
-		break;
-	case GL_QUADS:
-		type = V_028B6C_TESS_QUAD;
-		break;
-	case GL_ISOLINES:
-		type = V_028B6C_TESS_ISOLINE;
-		break;
-	}
-
-	switch (tes->info.tes.spacing) {
-	case TESS_SPACING_EQUAL:
-		partitioning = V_028B6C_PART_INTEGER;
-		break;
-	case TESS_SPACING_FRACTIONAL_ODD:
-		partitioning = V_028B6C_PART_FRAC_ODD;
-		break;
-	case TESS_SPACING_FRACTIONAL_EVEN:
-		partitioning = V_028B6C_PART_FRAC_EVEN;
-		break;
-	default:
-		break;
-	}
-
-	bool ccw = tes->info.tes.ccw;
-	const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =
-	              vk_find_struct_const(pCreateInfo->pTessellationState,
-	                                   PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
-
-	if (domain_origin_state && domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)
-		ccw = !ccw;
-
-	if (tes->info.tes.point_mode)
-		topology = V_028B6C_OUTPUT_POINT;
-	else if (tes->info.tes.primitive_mode == GL_ISOLINES)
-		topology = V_028B6C_OUTPUT_LINE;
-	else if (ccw)
-		topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
-	else
-		topology = V_028B6C_OUTPUT_TRIANGLE_CW;
-
-	if (pipeline->device->physical_device->rad_info.has_distributed_tess) {
-		if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI ||
-		    pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10)
-			distribution_mode = V_028B6C_TRAPEZOIDS;
-		else
-			distribution_mode = V_028B6C_DONUTS;
-	} else
-		distribution_mode = V_028B6C_NO_DIST;
-
-	radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM,
-			       S_028B6C_TYPE(type) |
-			       S_028B6C_PARTITIONING(partitioning) |
-			       S_028B6C_TOPOLOGY(topology) |
-			       S_028B6C_DISTRIBUTION_MODE(distribution_mode));
+   struct radv_shader_variant *tes = radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL);
+   unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0;
+   unsigned num_tcs_input_cp, num_tcs_output_cp, num_patches;
+   unsigned ls_hs_config;
+
+   num_tcs_input_cp = pCreateInfo->pTessellationState->patchControlPoints;
+   num_tcs_output_cp =
+      pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; // TCS VERTICES OUT
+   num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
+
+   ls_hs_config = S_028B58_NUM_PATCHES(num_patches) | S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
+                  S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
+      radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
+   } else {
+      radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
+   }
+
+   switch (tes->info.tes.primitive_mode) {
+   case GL_TRIANGLES:
+      type = V_028B6C_TESS_TRIANGLE;
+      break;
+   case GL_QUADS:
+      type = V_028B6C_TESS_QUAD;
+      break;
+   case GL_ISOLINES:
+      type = V_028B6C_TESS_ISOLINE;
+      break;
+   }
+
+   switch (tes->info.tes.spacing) {
+   case TESS_SPACING_EQUAL:
+      partitioning = V_028B6C_PART_INTEGER;
+      break;
+   case TESS_SPACING_FRACTIONAL_ODD:
+      partitioning = V_028B6C_PART_FRAC_ODD;
+      break;
+   case TESS_SPACING_FRACTIONAL_EVEN:
+      partitioning = V_028B6C_PART_FRAC_EVEN;
+      break;
+   default:
+      break;
+   }
+
+   bool ccw = tes->info.tes.ccw;
+   const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =
+      vk_find_struct_const(pCreateInfo->pTessellationState,
+                           PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
+
+   if (domain_origin_state &&
+       domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)
+      ccw = !ccw;
+
+   if (tes->info.tes.point_mode)
+      topology = V_028B6C_OUTPUT_POINT;
+   else if (tes->info.tes.primitive_mode == GL_ISOLINES)
+      topology = V_028B6C_OUTPUT_LINE;
+   else if (ccw)
+      topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
+   else
+      topology = V_028B6C_OUTPUT_TRIANGLE_CW;
+
+   if (pipeline->device->physical_device->rad_info.has_distributed_tess) {
+      if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI ||
+          pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10)
+         distribution_mode = V_028B6C_TRAPEZOIDS;
+      else
+         distribution_mode = V_028B6C_DONUTS;
+   } else
+      distribution_mode = V_028B6C_NO_DIST;
+
+   radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM,
+                          S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) |
+                             S_028B6C_TOPOLOGY(topology) |
+                             S_028B6C_DISTRIBUTION_MODE(distribution_mode));
 }
 
 static void
-radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs,
-			     struct radeon_cmdbuf *cs,
-			     const struct radv_pipeline *pipeline,
-			     const struct radv_shader_variant *gs)
+radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+                             const struct radv_pipeline *pipeline,
+                             const struct radv_shader_variant *gs)
 {
-	const struct gfx9_gs_info *gs_state = &gs->info.gs_ring_info;
-	unsigned gs_max_out_vertices;
-	const uint8_t *num_components;
-	uint8_t max_stream;
-	unsigned offset;
-	uint64_t va;
-
-	gs_max_out_vertices = gs->info.gs.vertices_out;
-	max_stream = gs->info.gs.max_stream;
-	num_components = gs->info.gs.num_stream_output_components;
-
-	offset = num_components[0] * gs_max_out_vertices;
-
-	radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
-	radeon_emit(ctx_cs, offset);
-	if (max_stream >= 1)
-		offset += num_components[1] * gs_max_out_vertices;
-	radeon_emit(ctx_cs, offset);
-	if (max_stream >= 2)
-		offset += num_components[2] * gs_max_out_vertices;
-	radeon_emit(ctx_cs, offset);
-	if (max_stream >= 3)
-		offset += num_components[3] * gs_max_out_vertices;
-	radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset);
-
-	radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4);
-	radeon_emit(ctx_cs, num_components[0]);
-	radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0);
-	radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0);
-	radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0);
-
-	uint32_t gs_num_invocations = gs->info.gs.invocations;
-	radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
-			       S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
-			       S_028B90_ENABLE(gs_num_invocations > 0));
-
-	radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-			       gs_state->vgt_esgs_ring_itemsize);
-
-	va = radv_buffer_get_va(gs->bo) + gs->bo_offset;
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
-		if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
-			radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
-			radeon_emit(cs, va >> 8);
-			radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
-		} else {
-			radeon_set_sh_reg_seq(cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2);
-			radeon_emit(cs, va >> 8);
-			radeon_emit(cs, S_00B214_MEM_BASE(va >> 40));
-		}
-
-		radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
-		radeon_emit(cs, gs->config.rsrc1);
-		radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
-
-		radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
-		radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup);
-	} else {
-		radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
-		radeon_emit(cs, va >> 8);
-		radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
-		radeon_emit(cs, gs->config.rsrc1);
-		radeon_emit(cs, gs->config.rsrc2);
-	}
-
-	radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
+   const struct gfx9_gs_info *gs_state = &gs->info.gs_ring_info;
+   unsigned gs_max_out_vertices;
+   const uint8_t *num_components;
+   uint8_t max_stream;
+   unsigned offset;
+   uint64_t va;
+
+   gs_max_out_vertices = gs->info.gs.vertices_out;
+   max_stream = gs->info.gs.max_stream;
+   num_components = gs->info.gs.num_stream_output_components;
+
+   offset = num_components[0] * gs_max_out_vertices;
+
+   radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
+   radeon_emit(ctx_cs, offset);
+   if (max_stream >= 1)
+      offset += num_components[1] * gs_max_out_vertices;
+   radeon_emit(ctx_cs, offset);
+   if (max_stream >= 2)
+      offset += num_components[2] * gs_max_out_vertices;
+   radeon_emit(ctx_cs, offset);
+   if (max_stream >= 3)
+      offset += num_components[3] * gs_max_out_vertices;
+   radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset);
+
+   radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4);
+   radeon_emit(ctx_cs, num_components[0]);
+   radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0);
+   radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0);
+   radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0);
+
+   uint32_t gs_num_invocations = gs->info.gs.invocations;
+   radeon_set_context_reg(
+      ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
+      S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0));
+
+   radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+                          gs_state->vgt_esgs_ring_itemsize);
+
+   va = radv_buffer_get_va(gs->bo) + gs->bo_offset;
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
+      if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+         radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
+         radeon_emit(cs, va >> 8);
+         radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+      } else {
+         radeon_set_sh_reg_seq(cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2);
+         radeon_emit(cs, va >> 8);
+         radeon_emit(cs, S_00B214_MEM_BASE(va >> 40));
+      }
+
+      radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
+      radeon_emit(cs, gs->config.rsrc1);
+      radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
+
+      radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
+      radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
+                             gs_state->vgt_gs_max_prims_per_subgroup);
+   } else {
+      radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
+      radeon_emit(cs, va >> 8);
+      radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
+      radeon_emit(cs, gs->config.rsrc1);
+      radeon_emit(cs, gs->config.rsrc2);
+   }
+
+   radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
 }
 
 static void
-radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
-				       struct radeon_cmdbuf *cs,
-				       const struct radv_pipeline *pipeline)
+radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+                                       const struct radv_pipeline *pipeline)
 {
-	struct radv_shader_variant *gs;
+   struct radv_shader_variant *gs;
 
-	gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
-	if (!gs)
-		return;
+   gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+   if (!gs)
+      return;
 
-	if (gs->info.is_ngg)
-		radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, gs);
-	else
-		radv_pipeline_generate_hw_gs(ctx_cs, cs, pipeline, gs);
+   if (gs->info.is_ngg)
+      radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, gs);
+   else
+      radv_pipeline_generate_hw_gs(ctx_cs, cs, pipeline, gs);
 
-	radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT,
-			      gs->info.gs.vertices_out);
+   radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out);
 }
 
-static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade,
-				   bool explicit, bool float16)
+static uint32_t
+offset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool float16)
 {
-	uint32_t ps_input_cntl;
-	if (offset <= AC_EXP_PARAM_OFFSET_31) {
-		ps_input_cntl = S_028644_OFFSET(offset);
-		if (flat_shade || explicit)
-			ps_input_cntl |= S_028644_FLAT_SHADE(1);
-		if (explicit) {
-			/* Force parameter cache to be read in passthrough
-			 * mode.
-			 */
-			ps_input_cntl |= S_028644_OFFSET(1 << 5);
-		}
-		if (float16) {
-			ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
-			                 S_028644_ATTR0_VALID(1);
-		}
-	} else {
-		/* The input is a DEFAULT_VAL constant. */
-		assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
-		       offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
-		offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
-		ps_input_cntl = S_028644_OFFSET(0x20) |
-			S_028644_DEFAULT_VAL(offset);
-	}
-	return ps_input_cntl;
+   uint32_t ps_input_cntl;
+   if (offset <= AC_EXP_PARAM_OFFSET_31) {
+      ps_input_cntl = S_028644_OFFSET(offset);
+      if (flat_shade || explicit)
+         ps_input_cntl |= S_028644_FLAT_SHADE(1);
+      if (explicit) {
+         /* Force parameter cache to be read in passthrough
+          * mode.
+          */
+         ps_input_cntl |= S_028644_OFFSET(1 << 5);
+      }
+      if (float16) {
+         ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) | S_028644_ATTR0_VALID(1);
+      }
+   } else {
+      /* The input is a DEFAULT_VAL constant. */
+      assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
+      offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
+      ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset);
+   }
+   return ps_input_cntl;
 }
 
 static void
-radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
-				 const struct radv_pipeline *pipeline)
+radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, const struct radv_pipeline *pipeline)
 {
-	struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-	const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
-	uint32_t ps_input_cntl[32];
-
-	unsigned ps_offset = 0;
-
-	if (ps->info.ps.prim_id_input) {
-		unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
-		if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
-			++ps_offset;
-		}
-	}
-
-	if (ps->info.ps.layer_input ||
-	    ps->info.needs_multiview_view_index) {
-		unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
-		if (vs_offset != AC_EXP_PARAM_UNDEFINED)
-			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
-		else
-			ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false);
-		++ps_offset;
-	}
-
-	if (ps->info.ps.viewport_index_input) {
-		unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VIEWPORT];
-		if (vs_offset != AC_EXP_PARAM_UNDEFINED)
-			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
-		else
-			ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false);
-		++ps_offset;
-	}
-
-	if (ps->info.ps.has_pcoord) {
-		unsigned val;
-		val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
-		ps_input_cntl[ps_offset] = val;
-		ps_offset++;
-	}
-
-	if (ps->info.ps.num_input_clips_culls) {
-		unsigned vs_offset;
-
-		vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
-		if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false);
-			++ps_offset;
-		}
-
-		vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
-		if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
-		    ps->info.ps.num_input_clips_culls > 4) {
-			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false);
-			++ps_offset;
-		}
-	}
-
-	for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.ps.input_mask; ++i) {
-		unsigned vs_offset;
-		bool flat_shade;
-		bool explicit;
-		bool float16;
-		if (!(ps->info.ps.input_mask & (1u << i)))
-			continue;
-
-		vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
-		if (vs_offset == AC_EXP_PARAM_UNDEFINED) {
-			ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
-			++ps_offset;
-			continue;
-		}
-
-		flat_shade = !!(ps->info.ps.flat_shaded_mask & (1u << ps_offset));
-		explicit = !!(ps->info.ps.explicit_shaded_mask & (1u << ps_offset));
-		float16 = !!(ps->info.ps.float16_shaded_mask & (1u << ps_offset));
-
-		ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, explicit, float16);
-		++ps_offset;
-	}
-
-	if (ps_offset) {
-		radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset);
-		for (unsigned i = 0; i < ps_offset; i++) {
-			radeon_emit(ctx_cs, ps_input_cntl[i]);
-		}
-	}
+   struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+   const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
+   uint32_t ps_input_cntl[32];
+
+   unsigned ps_offset = 0;
+
+   if (ps->info.ps.prim_id_input) {
+      unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
+      if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+         ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
+         ++ps_offset;
+      }
+   }
+
+   if (ps->info.ps.layer_input || ps->info.needs_multiview_view_index) {
+      unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
+      if (vs_offset != AC_EXP_PARAM_UNDEFINED)
+         ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
+      else
+         ps_input_cntl[ps_offset] =
+            offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false);
+      ++ps_offset;
+   }
+
+   if (ps->info.ps.viewport_index_input) {
+      unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VIEWPORT];
+      if (vs_offset != AC_EXP_PARAM_UNDEFINED)
+         ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
+      else
+         ps_input_cntl[ps_offset] =
+            offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false);
+      ++ps_offset;
+   }
+
+   if (ps->info.ps.has_pcoord) {
+      unsigned val;
+      val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
+      ps_input_cntl[ps_offset] = val;
+      ps_offset++;
+   }
+
+   if (ps->info.ps.num_input_clips_culls) {
+      unsigned vs_offset;
+
+      vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
+      if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+         ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false);
+         ++ps_offset;
+      }
+
+      vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
+      if (vs_offset != AC_EXP_PARAM_UNDEFINED && ps->info.ps.num_input_clips_culls > 4) {
+         ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false);
+         ++ps_offset;
+      }
+   }
+
+   for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.ps.input_mask; ++i) {
+      unsigned vs_offset;
+      bool flat_shade;
+      bool explicit;
+      bool float16;
+      if (!(ps->info.ps.input_mask & (1u << i)))
+         continue;
+
+      vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
+      if (vs_offset == AC_EXP_PARAM_UNDEFINED) {
+         ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
+         ++ps_offset;
+         continue;
+      }
+
+      flat_shade = !!(ps->info.ps.flat_shaded_mask & (1u << ps_offset));
+      explicit = !!(ps->info.ps.explicit_shaded_mask & (1u << ps_offset));
+      float16 = !!(ps->info.ps.float16_shaded_mask & (1u << ps_offset));
+
+      ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, explicit, float16);
+      ++ps_offset;
+   }
+
+   if (ps_offset) {
+      radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset);
+      for (unsigned i = 0; i < ps_offset; i++) {
+         radeon_emit(ctx_cs, ps_input_cntl[i]);
+      }
+   }
 }
 
 static uint32_t
 radv_compute_db_shader_control(const struct radv_device *device,
-			       const struct radv_pipeline *pipeline,
+                               const struct radv_pipeline *pipeline,
                                const struct radv_shader_variant *ps)
 {
-	unsigned conservative_z_export = V_02880C_EXPORT_ANY_Z;
-	unsigned z_order;
-	if (ps->info.ps.early_fragment_test || !ps->info.ps.writes_memory)
-		z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
-	else
-		z_order = V_02880C_LATE_Z;
-
-	if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER)
-		conservative_z_export = V_02880C_EXPORT_GREATER_THAN_Z;
-	else if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_LESS)
-		conservative_z_export = V_02880C_EXPORT_LESS_THAN_Z;
-
-	bool disable_rbplus = device->physical_device->rad_info.has_rbplus &&
-	                      !device->physical_device->rad_info.rbplus_allowed;
-
-	/* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
-	 * but this appears to break Project Cars (DXVK). See
-	 * https://bugs.freedesktop.org/show_bug.cgi?id=109401
-	 */
-	bool mask_export_enable = ps->info.ps.writes_sample_mask;
-
-	return  S_02880C_Z_EXPORT_ENABLE(ps->info.ps.writes_z) |
-		S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.ps.writes_stencil) |
-		S_02880C_KILL_ENABLE(!!ps->info.ps.can_discard) |
-		S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) |
-		S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) |
-		S_02880C_Z_ORDER(z_order) |
-		S_02880C_DEPTH_BEFORE_SHADER(ps->info.ps.early_fragment_test) |
-		S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps->info.ps.post_depth_coverage) |
-		S_02880C_EXEC_ON_HIER_FAIL(ps->info.ps.writes_memory) |
-		S_02880C_EXEC_ON_NOOP(ps->info.ps.writes_memory) |
-		S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
+   unsigned conservative_z_export = V_02880C_EXPORT_ANY_Z;
+   unsigned z_order;
+   if (ps->info.ps.early_fragment_test || !ps->info.ps.writes_memory)
+      z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
+   else
+      z_order = V_02880C_LATE_Z;
+
+   if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER)
+      conservative_z_export = V_02880C_EXPORT_GREATER_THAN_Z;
+   else if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_LESS)
+      conservative_z_export = V_02880C_EXPORT_LESS_THAN_Z;
+
+   bool disable_rbplus = device->physical_device->rad_info.has_rbplus &&
+                         !device->physical_device->rad_info.rbplus_allowed;
+
+   /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
+    * but this appears to break Project Cars (DXVK). See
+    * https://bugs.freedesktop.org/show_bug.cgi?id=109401
+    */
+   bool mask_export_enable = ps->info.ps.writes_sample_mask;
+
+   return S_02880C_Z_EXPORT_ENABLE(ps->info.ps.writes_z) |
+          S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.ps.writes_stencil) |
+          S_02880C_KILL_ENABLE(!!ps->info.ps.can_discard) |
+          S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) |
+          S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) | S_02880C_Z_ORDER(z_order) |
+          S_02880C_DEPTH_BEFORE_SHADER(ps->info.ps.early_fragment_test) |
+          S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps->info.ps.post_depth_coverage) |
+          S_02880C_EXEC_ON_HIER_FAIL(ps->info.ps.writes_memory) |
+          S_02880C_EXEC_ON_NOOP(ps->info.ps.writes_memory) |
+          S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
 }
 
 static void
-radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs,
-				       struct radeon_cmdbuf *cs,
-				       struct radv_pipeline *pipeline)
+radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+                                       struct radv_pipeline *pipeline)
 {
-	struct radv_shader_variant *ps;
-	uint64_t va;
-	assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
+   struct radv_shader_variant *ps;
+   uint64_t va;
+   assert(pipeline->shaders[MESA_SHADER_FRAGMENT]);
 
-	ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-	va = radv_buffer_get_va(ps->bo) + ps->bo_offset;
+   ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+   va = radv_buffer_get_va(ps->bo) + ps->bo_offset;
 
-	radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4);
-	radeon_emit(cs, va >> 8);
-	radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
-	radeon_emit(cs, ps->config.rsrc1);
-	radeon_emit(cs, ps->config.rsrc2);
+   radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4);
+   radeon_emit(cs, va >> 8);
+   radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
+   radeon_emit(cs, ps->config.rsrc1);
+   radeon_emit(cs, ps->config.rsrc2);
 
-	radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL,
-	                       radv_compute_db_shader_control(pipeline->device,
-							      pipeline, ps));
+   radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL,
+                          radv_compute_db_shader_control(pipeline->device, pipeline, ps));
 
-	radeon_set_context_reg(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA,
-			       ps->config.spi_ps_input_ena);
+   radeon_set_context_reg(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA, ps->config.spi_ps_input_ena);
 
-	radeon_set_context_reg(ctx_cs, R_0286D0_SPI_PS_INPUT_ADDR,
-			       ps->config.spi_ps_input_addr);
+   radeon_set_context_reg(ctx_cs, R_0286D0_SPI_PS_INPUT_ADDR, ps->config.spi_ps_input_addr);
 
-	radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
-			       S_0286D8_NUM_INTERP(ps->info.ps.num_interp) |
-			       S_0286D8_PS_W32_EN(ps->info.wave_size == 32));
+   radeon_set_context_reg(
+      ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
+      S_0286D8_NUM_INTERP(ps->info.ps.num_interp) | S_0286D8_PS_W32_EN(ps->info.wave_size == 32));
 
-	radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
+   radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
 
-	radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
-	                       ac_get_spi_shader_z_format(ps->info.ps.writes_z,
-	                                                  ps->info.ps.writes_stencil,
-	                                                  ps->info.ps.writes_sample_mask));
+   radeon_set_context_reg(
+      ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
+      ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
+                                 ps->info.ps.writes_sample_mask));
 
-	if (pipeline->device->dfsm_allowed) {
-		/* optimise this? */
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
-	}
+   if (pipeline->device->dfsm_allowed) {
+      /* optimise this? */
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+   }
 }
 
 static void
 radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs,
-					const struct radv_pipeline *pipeline)
+                                        const struct radv_pipeline *pipeline)
 {
-	if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10 ||
-	    pipeline->device->physical_device->rad_info.chip_class >= GFX10)
-		return;
-
-	unsigned vtx_reuse_depth = 30;
-	if (radv_pipeline_has_tess(pipeline) &&
-	    radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) {
-		vtx_reuse_depth = 14;
-	}
-	radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
-	                       S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
+   if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10 ||
+       pipeline->device->physical_device->rad_info.chip_class >= GFX10)
+      return;
+
+   unsigned vtx_reuse_depth = 30;
+   if (radv_pipeline_has_tess(pipeline) &&
+       radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.tes.spacing ==
+          TESS_SPACING_FRACTIONAL_ODD) {
+      vtx_reuse_depth = 14;
+   }
+   radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                          S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
 }
 
 static void
 radv_pipeline_generate_vgt_shader_config(struct radeon_cmdbuf *ctx_cs,
-					 const struct radv_pipeline *pipeline)
+                                         const struct radv_pipeline *pipeline)
 {
-	uint32_t stages = 0;
-	if (radv_pipeline_has_tess(pipeline)) {
-		stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
-			S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
-
-		if (radv_pipeline_has_gs(pipeline))
-			stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
-				  S_028B54_GS_EN(1);
-		else if (radv_pipeline_has_ngg(pipeline))
-			stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS);
-		else
-			stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
-	} else if (radv_pipeline_has_gs(pipeline)) {
-		stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
-			S_028B54_GS_EN(1);
-	} else if (radv_pipeline_has_ngg(pipeline)) {
-		stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
-	}
-
-	if (radv_pipeline_has_ngg(pipeline)) {
-		stages |= S_028B54_PRIMGEN_EN(1);
-		if (pipeline->streamout_shader)
-			stages |= S_028B54_NGG_WAVE_ID_EN(1);
-		if (radv_pipeline_has_ngg_passthrough(pipeline))
-			stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1);
-	} else if (radv_pipeline_has_gs(pipeline)) {
-		stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
-	}
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
-		stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
-		uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
-
-		if (radv_pipeline_has_tess(pipeline))
-			hs_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
-
-		if (pipeline->shaders[MESA_SHADER_GEOMETRY]) {
-			vs_size = gs_size = pipeline->shaders[MESA_SHADER_GEOMETRY]->info.wave_size;
-			if (pipeline->gs_copy_shader)
-				vs_size = pipeline->gs_copy_shader->info.wave_size;
-		} else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
-			vs_size = pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.wave_size;
-		else if (pipeline->shaders[MESA_SHADER_VERTEX])
-			vs_size = pipeline->shaders[MESA_SHADER_VERTEX]->info.wave_size;
-
-		if (radv_pipeline_has_ngg(pipeline))
-			gs_size = vs_size;
-
-		/* legacy GS only supports Wave64 */
-		stages |= S_028B54_HS_W32_EN(hs_size == 32 ? 1 : 0) |
-			  S_028B54_GS_W32_EN(gs_size == 32 ? 1 : 0) |
-			  S_028B54_VS_W32_EN(vs_size == 32 ? 1 : 0);
-	}
-
-	radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, stages);
+   uint32_t stages = 0;
+   if (radv_pipeline_has_tess(pipeline)) {
+      stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
+
+      if (radv_pipeline_has_gs(pipeline))
+         stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1);
+      else if (radv_pipeline_has_ngg(pipeline))
+         stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS);
+      else
+         stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
+   } else if (radv_pipeline_has_gs(pipeline)) {
+      stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
+   } else if (radv_pipeline_has_ngg(pipeline)) {
+      stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
+   }
+
+   if (radv_pipeline_has_ngg(pipeline)) {
+      stages |= S_028B54_PRIMGEN_EN(1);
+      if (pipeline->streamout_shader)
+         stages |= S_028B54_NGG_WAVE_ID_EN(1);
+      if (radv_pipeline_has_ngg_passthrough(pipeline))
+         stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1);
+   } else if (radv_pipeline_has_gs(pipeline)) {
+      stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+   }
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
+      stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+      uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
+
+      if (radv_pipeline_has_tess(pipeline))
+         hs_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
+
+      if (pipeline->shaders[MESA_SHADER_GEOMETRY]) {
+         vs_size = gs_size = pipeline->shaders[MESA_SHADER_GEOMETRY]->info.wave_size;
+         if (pipeline->gs_copy_shader)
+            vs_size = pipeline->gs_copy_shader->info.wave_size;
+      } else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+         vs_size = pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.wave_size;
+      else if (pipeline->shaders[MESA_SHADER_VERTEX])
+         vs_size = pipeline->shaders[MESA_SHADER_VERTEX]->info.wave_size;
+
+      if (radv_pipeline_has_ngg(pipeline))
+         gs_size = vs_size;
+
+      /* legacy GS only supports Wave64 */
+      stages |= S_028B54_HS_W32_EN(hs_size == 32 ? 1 : 0) |
+                S_028B54_GS_W32_EN(gs_size == 32 ? 1 : 0) |
+                S_028B54_VS_W32_EN(vs_size == 32 ? 1 : 0);
+   }
+
+   radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, stages);
 }
 
 static void
 radv_pipeline_generate_cliprect_rule(struct radeon_cmdbuf *ctx_cs,
-				     const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                                     const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	const  VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
-			vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
-	uint32_t cliprect_rule = 0;
-
-	if (!discard_rectangle_info) {
-		cliprect_rule = 0xffff;
-	} else {
-		for (unsigned i = 0; i < (1u << MAX_DISCARD_RECTANGLES); ++i) {
-			/* Interpret i as a bitmask, and then set the bit in
-			 * the mask if that combination of rectangles in which
-			 * the pixel is contained should pass the cliprect
-			 * test.
-			 */
-			unsigned relevant_subset = i & ((1u << discard_rectangle_info->discardRectangleCount) - 1);
-
-			if (discard_rectangle_info->discardRectangleMode == VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT &&
-			    !relevant_subset)
-				continue;
-
-			if (discard_rectangle_info->discardRectangleMode == VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT &&
-			    relevant_subset)
-				continue;
-
-			cliprect_rule |= 1u << i;
-		}
-	}
-
-	radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, cliprect_rule);
+   const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
+      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
+   uint32_t cliprect_rule = 0;
+
+   if (!discard_rectangle_info) {
+      cliprect_rule = 0xffff;
+   } else {
+      for (unsigned i = 0; i < (1u << MAX_DISCARD_RECTANGLES); ++i) {
+         /* Interpret i as a bitmask, and then set the bit in
+          * the mask if that combination of rectangles in which
+          * the pixel is contained should pass the cliprect
+          * test.
+          */
+         unsigned relevant_subset = i & ((1u << discard_rectangle_info->discardRectangleCount) - 1);
+
+         if (discard_rectangle_info->discardRectangleMode ==
+                VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT &&
+             !relevant_subset)
+            continue;
+
+         if (discard_rectangle_info->discardRectangleMode ==
+                VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT &&
+             relevant_subset)
+            continue;
+
+         cliprect_rule |= 1u << i;
+      }
+   }
+
+   radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, cliprect_rule);
 }
 
 static void
-gfx10_pipeline_generate_ge_cntl(struct radeon_cmdbuf *ctx_cs,
-				struct radv_pipeline *pipeline)
+gfx10_pipeline_generate_ge_cntl(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline)
 {
-	bool break_wave_at_eoi = false;
-	unsigned primgroup_size;
-	unsigned vertgroup_size = 256; /* 256 = disable vertex grouping */
-
-	if (radv_pipeline_has_tess(pipeline)) {
-		primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
-	} else if (radv_pipeline_has_gs(pipeline)) {
-		const struct gfx9_gs_info *gs_state =
-			&pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
-		unsigned vgt_gs_onchip_cntl = gs_state->vgt_gs_onchip_cntl;
-		primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl);
-	} else {
-		primgroup_size = 128; /* recommended without a GS and tess */
-	}
-
-	if (radv_pipeline_has_tess(pipeline)) {
-		if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
-		    radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
-			break_wave_at_eoi = true;
-	}
-
-	radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL,
-			       S_03096C_PRIM_GRP_SIZE(primgroup_size) |
-			       S_03096C_VERT_GRP_SIZE(vertgroup_size) |
-			       S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ |
-			       S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi));
+   bool break_wave_at_eoi = false;
+   unsigned primgroup_size;
+   unsigned vertgroup_size = 256; /* 256 = disable vertex grouping */
+
+   if (radv_pipeline_has_tess(pipeline)) {
+      primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
+   } else if (radv_pipeline_has_gs(pipeline)) {
+      const struct gfx9_gs_info *gs_state =
+         &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
+      unsigned vgt_gs_onchip_cntl = gs_state->vgt_gs_onchip_cntl;
+      primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl);
+   } else {
+      primgroup_size = 128; /* recommended without a GS and tess */
+   }
+
+   if (radv_pipeline_has_tess(pipeline)) {
+      if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
+          radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
+         break_wave_at_eoi = true;
+   }
+
+   radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL,
+                          S_03096C_PRIM_GRP_SIZE(primgroup_size) |
+                             S_03096C_VERT_GRP_SIZE(vertgroup_size) |
+                             S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ |
+                             S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi));
 }
 
 static void
 radv_pipeline_generate_vgt_gs_out(struct radeon_cmdbuf *ctx_cs,
-				  const struct radv_pipeline *pipeline,
-				  const VkGraphicsPipelineCreateInfo *pCreateInfo,
-				  const struct radv_graphics_pipeline_create_info *extra)
+                                  const struct radv_pipeline *pipeline,
+                                  const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                                  const struct radv_graphics_pipeline_create_info *extra)
 {
-	uint32_t gs_out;
-
-	if (radv_pipeline_has_gs(pipeline)) {
-		gs_out = si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
-	} else if (radv_pipeline_has_tess(pipeline)) {
-		if (pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) {
-			gs_out = V_028A6C_POINTLIST;
-		} else {
-			gs_out = si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.primitive_mode);
-		}
-	} else {
-		gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology);
-	}
-
-	if (extra && extra->use_rectlist) {
-		gs_out = V_028A6C_TRISTRIP;
-		if (radv_pipeline_has_ngg(pipeline))
-			gs_out = V_028A6C_RECTLIST;
-	}
-
-	radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
+   uint32_t gs_out;
+
+   if (radv_pipeline_has_gs(pipeline)) {
+      gs_out =
+         si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
+   } else if (radv_pipeline_has_tess(pipeline)) {
+      if (pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) {
+         gs_out = V_028A6C_POINTLIST;
+      } else {
+         gs_out = si_conv_gl_prim_to_gs_out(
+            pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.primitive_mode);
+      }
+   } else {
+      gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology);
+   }
+
+   if (extra && extra->use_rectlist) {
+      gs_out = V_028A6C_TRISTRIP;
+      if (radv_pipeline_has_ngg(pipeline))
+         gs_out = V_028A6C_RECTLIST;
+   }
+
+   radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
 }
 
 static bool
 gfx103_pipeline_vrs_coarse_shading(const struct radv_pipeline *pipeline)
 {
-	struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-	struct radv_device *device = pipeline->device;
+   struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+   struct radv_device *device = pipeline->device;
 
-	if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
-		return false;
+   if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
+      return false;
 
-	if (!ps->info.ps.allow_flat_shading)
-		return false;
+   if (!ps->info.ps.allow_flat_shading)
+      return false;
 
-	return true;
+   return true;
 }
 
 static void
 gfx103_pipeline_generate_vrs_state(struct radeon_cmdbuf *ctx_cs,
-				   const struct radv_pipeline *pipeline,
-				   const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                                   const struct radv_pipeline *pipeline,
+                                   const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	uint32_t mode = V_028064_VRS_COMB_MODE_PASSTHRU;
-	uint8_t rate_x = 0, rate_y = 0;
-	bool enable_vrs = false;
-
-	if (vk_find_struct_const(pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR) ||
-	    radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR)) {
-		/* Enable draw call VRS because it's explicitly requested.  */
-		enable_vrs = true;
-	} else if (gfx103_pipeline_vrs_coarse_shading(pipeline)) {
-		/* Enable VRS coarse shading 2x2 if the driver determined that
-		 * it's safe to enable.
-		 */
-		mode = V_028064_VRS_COMB_MODE_OVERRIDE;
-		rate_x = rate_y = 1;
-	} else if (pipeline->device->force_vrs != RADV_FORCE_VRS_NONE) {
-		/* Force enable vertex VRS if requested by the user. */
-		radeon_set_context_reg(ctx_cs, R_028848_PA_CL_VRS_CNTL,
-				       S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
-				       S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
-
-		/* If the shader is using discard, turn off coarse shading
-		 * because discard at 2x2 pixel granularity degrades quality
-		 * too much. MIN allows sample shading but not coarse shading.
-		 */
-		struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-
-		mode = ps->info.ps.can_discard ? V_028064_VRS_COMB_MODE_MIN
-					       : V_028064_VRS_COMB_MODE_PASSTHRU;
-	}
-
-	radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL,
-			       S_028A98_EN_VRS_RATE(enable_vrs));
-
-	radeon_set_context_reg(ctx_cs, R_028064_DB_VRS_OVERRIDE_CNTL,
-			       S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
-			       S_028064_VRS_OVERRIDE_RATE_X(rate_x) |
-			       S_028064_VRS_OVERRIDE_RATE_Y(rate_y));
+   uint32_t mode = V_028064_VRS_COMB_MODE_PASSTHRU;
+   uint8_t rate_x = 0, rate_y = 0;
+   bool enable_vrs = false;
+
+   if (vk_find_struct_const(pCreateInfo->pNext,
+                            PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR) ||
+       radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR)) {
+      /* Enable draw call VRS because it's explicitly requested.  */
+      enable_vrs = true;
+   } else if (gfx103_pipeline_vrs_coarse_shading(pipeline)) {
+      /* Enable VRS coarse shading 2x2 if the driver determined that
+       * it's safe to enable.
+       */
+      mode = V_028064_VRS_COMB_MODE_OVERRIDE;
+      rate_x = rate_y = 1;
+   } else if (pipeline->device->force_vrs != RADV_FORCE_VRS_NONE) {
+      /* Force enable vertex VRS if requested by the user. */
+      radeon_set_context_reg(
+         ctx_cs, R_028848_PA_CL_VRS_CNTL,
+         S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
+            S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
+
+      /* If the shader is using discard, turn off coarse shading
+       * because discard at 2x2 pixel granularity degrades quality
+       * too much. MIN allows sample shading but not coarse shading.
+       */
+      struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+
+      mode = ps->info.ps.can_discard ? V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU;
+   }
+
+   radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, S_028A98_EN_VRS_RATE(enable_vrs));
+
+   radeon_set_context_reg(ctx_cs, R_028064_DB_VRS_OVERRIDE_CNTL,
+                          S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
+                             S_028064_VRS_OVERRIDE_RATE_X(rate_x) |
+                             S_028064_VRS_OVERRIDE_RATE_Y(rate_y));
 }
 
 static void
@@ -5375,802 +5192,780 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline,
                            const struct radv_graphics_pipeline_create_info *extra,
                            const struct radv_blend_state *blend)
 {
-	struct radeon_cmdbuf *ctx_cs = &pipeline->ctx_cs;
-	struct radeon_cmdbuf *cs = &pipeline->cs;
-
-	cs->max_dw = 64;
-	ctx_cs->max_dw = 256;
-	cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw));
-	ctx_cs->buf = cs->buf + cs->max_dw;
-
-	radv_pipeline_generate_depth_stencil_state(ctx_cs, pipeline, pCreateInfo, extra);
-	radv_pipeline_generate_blend_state(ctx_cs, pipeline, blend);
-	radv_pipeline_generate_raster_state(ctx_cs, pipeline, pCreateInfo);
-	radv_pipeline_generate_multisample_state(ctx_cs, pipeline);
-	radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline);
-	radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline);
-
-	if (radv_pipeline_has_tess(pipeline)) {
-		radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline);
-		radv_pipeline_generate_tess_state(ctx_cs, pipeline, pCreateInfo);
-	}
-
-	radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline);
-	radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline);
-	radv_pipeline_generate_ps_inputs(ctx_cs, pipeline);
-	radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline);
-	radv_pipeline_generate_vgt_shader_config(ctx_cs, pipeline);
-	radv_pipeline_generate_cliprect_rule(ctx_cs, pCreateInfo);
-	radv_pipeline_generate_vgt_gs_out(ctx_cs, pipeline, pCreateInfo, extra);
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 && !radv_pipeline_has_ngg(pipeline))
-		gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline);
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
-		gfx103_pipeline_generate_vrs_state(ctx_cs, pipeline, pCreateInfo);
-
-	pipeline->ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4);
-
-	assert(ctx_cs->cdw <= ctx_cs->max_dw);
-	assert(cs->cdw <= cs->max_dw);
+   struct radeon_cmdbuf *ctx_cs = &pipeline->ctx_cs;
+   struct radeon_cmdbuf *cs = &pipeline->cs;
+
+   cs->max_dw = 64;
+   ctx_cs->max_dw = 256;
+   cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw));
+   ctx_cs->buf = cs->buf + cs->max_dw;
+
+   radv_pipeline_generate_depth_stencil_state(ctx_cs, pipeline, pCreateInfo, extra);
+   radv_pipeline_generate_blend_state(ctx_cs, pipeline, blend);
+   radv_pipeline_generate_raster_state(ctx_cs, pipeline, pCreateInfo);
+   radv_pipeline_generate_multisample_state(ctx_cs, pipeline);
+   radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline);
+   radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline);
+
+   if (radv_pipeline_has_tess(pipeline)) {
+      radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline);
+      radv_pipeline_generate_tess_state(ctx_cs, pipeline, pCreateInfo);
+   }
+
+   radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline);
+   radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline);
+   radv_pipeline_generate_ps_inputs(ctx_cs, pipeline);
+   radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline);
+   radv_pipeline_generate_vgt_shader_config(ctx_cs, pipeline);
+   radv_pipeline_generate_cliprect_rule(ctx_cs, pCreateInfo);
+   radv_pipeline_generate_vgt_gs_out(ctx_cs, pipeline, pCreateInfo, extra);
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
+       !radv_pipeline_has_ngg(pipeline))
+      gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline);
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
+      gfx103_pipeline_generate_vrs_state(ctx_cs, pipeline, pCreateInfo);
+
+   pipeline->ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4);
+
+   assert(ctx_cs->cdw <= ctx_cs->max_dw);
+   assert(cs->cdw <= cs->max_dw);
 }
 
 static void
 radv_pipeline_init_vertex_input_state(struct radv_pipeline *pipeline,
-				      const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                                      const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
-	const VkPipelineVertexInputStateCreateInfo *vi_info =
-		pCreateInfo->pVertexInputState;
+   const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState;
 
-	for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
-		const VkVertexInputBindingDescription *desc =
-			&vi_info->pVertexBindingDescriptions[i];
+   for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
+      const VkVertexInputBindingDescription *desc = &vi_info->pVertexBindingDescriptions[i];
 
-		pipeline->binding_stride[desc->binding] = desc->stride;
-		pipeline->num_vertex_bindings =
-			MAX2(pipeline->num_vertex_bindings, desc->binding + 1);
-	}
+      pipeline->binding_stride[desc->binding] = desc->stride;
+      pipeline->num_vertex_bindings = MAX2(pipeline->num_vertex_bindings, desc->binding + 1);
+   }
 }
 
 static struct radv_shader_variant *
 radv_pipeline_get_streamout_shader(struct radv_pipeline *pipeline)
 {
-	int i;
+   int i;
 
-	for (i = MESA_SHADER_GEOMETRY; i >= MESA_SHADER_VERTEX; i--) {
-		struct radv_shader_variant *shader =
-			radv_get_shader(pipeline, i);
+   for (i = MESA_SHADER_GEOMETRY; i >= MESA_SHADER_VERTEX; i--) {
+      struct radv_shader_variant *shader = radv_get_shader(pipeline, i);
 
-		if (shader && shader->info.so.num_outputs > 0)
-			return shader;
-	}
+      if (shader && shader->info.so.num_outputs > 0)
+         return shader;
+   }
 
-	return NULL;
+   return NULL;
 }
 
 static void
 radv_pipeline_init_shader_stages_state(struct radv_pipeline *pipeline)
 {
-	struct radv_device *device = pipeline->device;
-
-	for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-		pipeline->user_data_0[i] =
-			radv_pipeline_stage_to_user_data_0(pipeline, i,
-							   device->physical_device->rad_info.chip_class);
-
-		if (pipeline->shaders[i]) {
-			pipeline->need_indirect_descriptor_sets |= pipeline->shaders[i]->info.need_indirect_descriptor_sets;
-		}
-	}
-
-	struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX,
-							     AC_UD_VS_BASE_VERTEX_START_INSTANCE);
-	if (loc->sgpr_idx != -1) {
-		pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX];
-		pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
-		pipeline->graphics.vtx_emit_num = loc->num_sgprs;
-		pipeline->graphics.uses_drawid = radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id;
-		pipeline->graphics.uses_baseinstance = radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_base_instance;
-	}
+   struct radv_device *device = pipeline->device;
+
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      pipeline->user_data_0[i] = radv_pipeline_stage_to_user_data_0(
+         pipeline, i, device->physical_device->rad_info.chip_class);
+
+      if (pipeline->shaders[i]) {
+         pipeline->need_indirect_descriptor_sets |=
+            pipeline->shaders[i]->info.need_indirect_descriptor_sets;
+      }
+   }
+
+   struct radv_userdata_info *loc =
+      radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
+   if (loc->sgpr_idx != -1) {
+      pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX];
+      pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
+      pipeline->graphics.vtx_emit_num = loc->num_sgprs;
+      pipeline->graphics.uses_drawid =
+         radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id;
+      pipeline->graphics.uses_baseinstance =
+         radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_base_instance;
+   }
 }
 
 static VkResult
-radv_pipeline_init(struct radv_pipeline *pipeline,
-		   struct radv_device *device,
-		   struct radv_pipeline_cache *cache,
-		   const VkGraphicsPipelineCreateInfo *pCreateInfo,
-		   const struct radv_graphics_pipeline_create_info *extra)
+radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device,
+                   struct radv_pipeline_cache *cache,
+                   const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                   const struct radv_graphics_pipeline_create_info *extra)
 {
-	VkResult result;
-
-	pipeline->device = device;
-	pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
-	assert(pipeline->layout);
-
-	struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
-
-	const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
-		vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
-	radv_init_feedback(creation_feedback);
-
-	VkPipelineCreationFeedbackEXT *pipeline_feedback = creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
-
-	const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
-	VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
-	for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
-		gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
-		pStages[stage] = &pCreateInfo->pStages[i];
-		if(creation_feedback)
-			stage_feedbacks[stage] = &creation_feedback->pPipelineStageCreationFeedbacks[i];
-	}
-
-	struct radv_pipeline_key key = radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend);
-
-	result = radv_create_shaders(pipeline, device, cache, &key, pStages,
-		                     pCreateInfo->flags, pipeline_feedback,
-				     stage_feedbacks);
-	if (result != VK_SUCCESS)
-		return result;
-
-	pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
-	radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
-	radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra);
-	radv_pipeline_init_dynamic_state(pipeline, pCreateInfo, extra);
-	radv_pipeline_init_raster_state(pipeline, pCreateInfo);
-	radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo);
-
-	if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
-		gfx103_pipeline_init_vrs_state(pipeline, pCreateInfo);
-
-	/* Ensure that some export memory is always allocated, for two reasons:
-	 *
-	 * 1) Correctness: The hardware ignores the EXEC mask if no export
-	 *    memory is allocated, so KILL and alpha test do not work correctly
-	 *    without this.
-	 * 2) Performance: Every shader needs at least a NULL export, even when
-	 *    it writes no color/depth output. The NULL export instruction
-	 *    stalls without this setting.
-	 *
-	 * Don't add this to CB_SHADER_MASK.
-	 *
-	 * GFX10 supports pixel shaders without exports by setting both the
-	 * color and Z formats to SPI_SHADER_ZERO. The hw will skip export
-	 * instructions if any are present.
-	 */
-	struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-	if ((pipeline->device->physical_device->rad_info.chip_class <= GFX9 ||
-	     ps->info.ps.can_discard) &&
-	    !blend.spi_shader_col_format) {
-		if (!ps->info.ps.writes_z &&
-		    !ps->info.ps.writes_stencil &&
-		    !ps->info.ps.writes_sample_mask)
-			blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
-	}
-
-	if (extra &&
-	    (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR ||
-	     extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS ||
-	     extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS ||
-	     extra->custom_blend_mode == V_028808_CB_RESOLVE)) {
-		/* According to the CB spec states, CB_SHADER_MASK should be
-		 * set to enable writes to all four channels of MRT0.
-		 */
-		blend.cb_shader_mask = 0xf;
-	}
-
-	pipeline->graphics.col_format = blend.spi_shader_col_format;
-	pipeline->graphics.cb_target_mask = blend.cb_target_mask;
-
-	if (radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
-		struct radv_shader_variant *gs =
-			pipeline->shaders[MESA_SHADER_GEOMETRY];
-
-		radv_pipeline_init_gs_ring_state(pipeline, &gs->info.gs_ring_info);
-	}
-
-	if (radv_pipeline_has_tess(pipeline)) {
-		pipeline->graphics.tess_patch_control_points =
-			pCreateInfo->pTessellationState->patchControlPoints;
-	}
-
-	radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo);
-	radv_pipeline_init_binning_state(pipeline, pCreateInfo, &blend);
-	radv_pipeline_init_shader_stages_state(pipeline);
-	radv_pipeline_init_scratch(device, pipeline);
-
-	/* Find the last vertex shader stage that eventually uses streamout. */
-	pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline);
-
-	radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend);
-
-	return result;
-}
+   VkResult result;
+
+   pipeline->device = device;
+   pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
+   assert(pipeline->layout);
+
+   struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
+
+   const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
+      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+   radv_init_feedback(creation_feedback);
+
+   VkPipelineCreationFeedbackEXT *pipeline_feedback =
+      creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
+
+   const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = {
+      0,
+   };
+   VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = {0};
+   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+      gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
+      pStages[stage] = &pCreateInfo->pStages[i];
+      if (creation_feedback)
+         stage_feedbacks[stage] = &creation_feedback->pPipelineStageCreationFeedbacks[i];
+   }
 
-VkResult
-radv_graphics_pipeline_create(
-	VkDevice _device,
-	VkPipelineCache _cache,
-	const VkGraphicsPipelineCreateInfo *pCreateInfo,
-	const struct radv_graphics_pipeline_create_info *extra,
-	const VkAllocationCallbacks *pAllocator,
-	VkPipeline *pPipeline)
-{
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
-	struct radv_pipeline *pipeline;
-	VkResult result;
+   struct radv_pipeline_key key =
+      radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend);
+
+   result = radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags,
+                                pipeline_feedback, stage_feedbacks);
+   if (result != VK_SUCCESS)
+      return result;
+
+   pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
+   radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
+   radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra);
+   radv_pipeline_init_dynamic_state(pipeline, pCreateInfo, extra);
+   radv_pipeline_init_raster_state(pipeline, pCreateInfo);
+   radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo);
+
+   if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
+      gfx103_pipeline_init_vrs_state(pipeline, pCreateInfo);
+
+   /* Ensure that some export memory is always allocated, for two reasons:
+    *
+    * 1) Correctness: The hardware ignores the EXEC mask if no export
+    *    memory is allocated, so KILL and alpha test do not work correctly
+    *    without this.
+    * 2) Performance: Every shader needs at least a NULL export, even when
+    *    it writes no color/depth output. The NULL export instruction
+    *    stalls without this setting.
+    *
+    * Don't add this to CB_SHADER_MASK.
+    *
+    * GFX10 supports pixel shaders without exports by setting both the
+    * color and Z formats to SPI_SHADER_ZERO. The hw will skip export
+    * instructions if any are present.
+    */
+   struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+   if ((pipeline->device->physical_device->rad_info.chip_class <= GFX9 ||
+        ps->info.ps.can_discard) &&
+       !blend.spi_shader_col_format) {
+      if (!ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask)
+         blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
+   }
+
+   if (extra && (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR ||
+                 extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS ||
+                 extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS ||
+                 extra->custom_blend_mode == V_028808_CB_RESOLVE)) {
+      /* According to the CB spec states, CB_SHADER_MASK should be
+       * set to enable writes to all four channels of MRT0.
+       */
+      blend.cb_shader_mask = 0xf;
+   }
+
+   pipeline->graphics.col_format = blend.spi_shader_col_format;
+   pipeline->graphics.cb_target_mask = blend.cb_target_mask;
+
+   if (radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
+      struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+
+      radv_pipeline_init_gs_ring_state(pipeline, &gs->info.gs_ring_info);
+   }
+
+   if (radv_pipeline_has_tess(pipeline)) {
+      pipeline->graphics.tess_patch_control_points =
+         pCreateInfo->pTessellationState->patchControlPoints;
+   }
 
-	pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
-			      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (pipeline == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo);
+   radv_pipeline_init_binning_state(pipeline, pCreateInfo, &blend);
+   radv_pipeline_init_shader_stages_state(pipeline);
+   radv_pipeline_init_scratch(device, pipeline);
 
-	vk_object_base_init(&device->vk, &pipeline->base,
-			    VK_OBJECT_TYPE_PIPELINE);
+   /* Find the last vertex shader stage that eventually uses streamout. */
+   pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline);
 
-	result = radv_pipeline_init(pipeline, device, cache,
-				    pCreateInfo, extra);
-	if (result != VK_SUCCESS) {
-		radv_pipeline_destroy(device, pipeline, pAllocator);
-		return result;
-	}
+   radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend);
 
-	*pPipeline = radv_pipeline_to_handle(pipeline);
+   return result;
+}
 
-	return VK_SUCCESS;
+VkResult
+radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache,
+                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                              const struct radv_graphics_pipeline_create_info *extra,
+                              const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
+   struct radv_pipeline *pipeline;
+   VkResult result;
+
+   pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
+                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (pipeline == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
+
+   result = radv_pipeline_init(pipeline, device, cache, pCreateInfo, extra);
+   if (result != VK_SUCCESS) {
+      radv_pipeline_destroy(device, pipeline, pAllocator);
+      return result;
+   }
+
+   *pPipeline = radv_pipeline_to_handle(pipeline);
+
+   return VK_SUCCESS;
 }
 
-VkResult radv_CreateGraphicsPipelines(
-	VkDevice                                    _device,
-	VkPipelineCache                             pipelineCache,
-	uint32_t                                    count,
-	const VkGraphicsPipelineCreateInfo*         pCreateInfos,
-	const VkAllocationCallbacks*                pAllocator,
-	VkPipeline*                                 pPipelines)
+VkResult
+radv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
+                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
+                             const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
 {
-	VkResult result = VK_SUCCESS;
-	unsigned i = 0;
-
-	for (; i < count; i++) {
-		VkResult r;
-		r = radv_graphics_pipeline_create(_device,
-						  pipelineCache,
-						  &pCreateInfos[i],
-						  NULL, pAllocator, &pPipelines[i]);
-		if (r != VK_SUCCESS) {
-			result = r;
-			pPipelines[i] = VK_NULL_HANDLE;
-
-			if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
-				break;
-		}
-	}
-
-	for (; i < count; ++i)
-		pPipelines[i] = VK_NULL_HANDLE;
-
-	return result;
+   VkResult result = VK_SUCCESS;
+   unsigned i = 0;
+
+   for (; i < count; i++) {
+      VkResult r;
+      r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL, pAllocator,
+                                        &pPipelines[i]);
+      if (r != VK_SUCCESS) {
+         result = r;
+         pPipelines[i] = VK_NULL_HANDLE;
+
+         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
+            break;
+      }
+   }
+
+   for (; i < count; ++i)
+      pPipelines[i] = VK_NULL_HANDLE;
+
+   return result;
 }
 
 static void
-radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs,
-			     const struct radv_pipeline *pipeline)
+radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline)
 {
-	struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
-	uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-	struct radv_device *device = pipeline->device;
-
-	radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
-	radeon_emit(cs, va >> 8);
-	radeon_emit(cs, S_00B834_DATA(va >> 40));
-
-	radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
-	radeon_emit(cs, shader->config.rsrc1);
-	radeon_emit(cs, shader->config.rsrc2);
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
-	}
+   struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+   uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+   struct radv_device *device = pipeline->device;
+
+   radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
+   radeon_emit(cs, va >> 8);
+   radeon_emit(cs, S_00B834_DATA(va >> 40));
+
+   radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
+   radeon_emit(cs, shader->config.rsrc1);
+   radeon_emit(cs, shader->config.rsrc2);
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
+   }
 }
 
 static void
-radv_pipeline_generate_compute_state(struct radeon_cmdbuf *cs,
-				     const struct radv_pipeline *pipeline)
+radv_pipeline_generate_compute_state(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline)
 {
-	struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
-	struct radv_device *device = pipeline->device;
-	unsigned threads_per_threadgroup;
-	unsigned threadgroups_per_cu = 1;
-	unsigned waves_per_threadgroup;
-	unsigned max_waves_per_sh = 0;
-
-	/* Calculate best compute resource limits. */
-	threads_per_threadgroup = shader->info.cs.block_size[0] *
-				  shader->info.cs.block_size[1] *
-				  shader->info.cs.block_size[2];
-	waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup,
-					     shader->info.wave_size);
-
-	if (device->physical_device->rad_info.chip_class >= GFX10 &&
-	    waves_per_threadgroup == 1)
-		threadgroups_per_cu = 2;
-
-	radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
-			  ac_get_compute_resource_limits(&device->physical_device->rad_info,
-							 waves_per_threadgroup,
-							 max_waves_per_sh,
-							 threadgroups_per_cu));
-
-	radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
-	radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
-	radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
-	radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
+   struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+   struct radv_device *device = pipeline->device;
+   unsigned threads_per_threadgroup;
+   unsigned threadgroups_per_cu = 1;
+   unsigned waves_per_threadgroup;
+   unsigned max_waves_per_sh = 0;
+
+   /* Calculate best compute resource limits. */
+   threads_per_threadgroup =
+      shader->info.cs.block_size[0] * shader->info.cs.block_size[1] * shader->info.cs.block_size[2];
+   waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, shader->info.wave_size);
+
+   if (device->physical_device->rad_info.chip_class >= GFX10 && waves_per_threadgroup == 1)
+      threadgroups_per_cu = 2;
+
+   radeon_set_sh_reg(
+      cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
+      ac_get_compute_resource_limits(&device->physical_device->rad_info, waves_per_threadgroup,
+                                     max_waves_per_sh, threadgroups_per_cu));
+
+   radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+   radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
+   radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
+   radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
 }
 
 static void
 radv_compute_generate_pm4(struct radv_pipeline *pipeline)
 {
-	struct radv_device *device = pipeline->device;
-	struct radeon_cmdbuf *cs = &pipeline->cs;
+   struct radv_device *device = pipeline->device;
+   struct radeon_cmdbuf *cs = &pipeline->cs;
 
-	cs->max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 19 : 16;
-	cs->buf = malloc(cs->max_dw * 4);
+   cs->max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 19 : 16;
+   cs->buf = malloc(cs->max_dw * 4);
 
-	radv_pipeline_generate_hw_cs(cs, pipeline);
-	radv_pipeline_generate_compute_state(cs, pipeline);
+   radv_pipeline_generate_hw_cs(cs, pipeline);
+   radv_pipeline_generate_compute_state(cs, pipeline);
 
-	assert(pipeline->cs.cdw <= pipeline->cs.max_dw);
+   assert(pipeline->cs.cdw <= pipeline->cs.max_dw);
 }
 
 static struct radv_pipeline_key
 radv_generate_compute_pipeline_key(struct radv_pipeline *pipeline,
-				   const VkComputePipelineCreateInfo *pCreateInfo)
+                                   const VkComputePipelineCreateInfo *pCreateInfo)
 {
-	const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage;
-	struct radv_pipeline_key key;
-	memset(&key, 0, sizeof(key));
-
-	if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
-		key.optimisations_disabled = 1;
-
-	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *subgroup_size =
-		vk_find_struct_const(stage->pNext,
-				     PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
-
-	if (subgroup_size) {
-		assert(subgroup_size->requiredSubgroupSize == 32 ||
-		       subgroup_size->requiredSubgroupSize == 64);
-		key.compute_subgroup_size = subgroup_size->requiredSubgroupSize;
-	} else if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
-		key.require_full_subgroups = true;
-	}
-
-	return key;
+   const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage;
+   struct radv_pipeline_key key;
+   memset(&key, 0, sizeof(key));
+
+   if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
+      key.optimisations_disabled = 1;
+
+   const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *subgroup_size =
+      vk_find_struct_const(stage->pNext,
+                           PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
+
+   if (subgroup_size) {
+      assert(subgroup_size->requiredSubgroupSize == 32 ||
+             subgroup_size->requiredSubgroupSize == 64);
+      key.compute_subgroup_size = subgroup_size->requiredSubgroupSize;
+   } else if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
+      key.require_full_subgroups = true;
+   }
+
+   return key;
 }
 
-static VkResult radv_compute_pipeline_create(
-	VkDevice                                    _device,
-	VkPipelineCache                             _cache,
-	const VkComputePipelineCreateInfo*          pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkPipeline*                                 pPipeline)
+static VkResult
+radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
+                             const VkComputePipelineCreateInfo *pCreateInfo,
+                             const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
-	const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
-	VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
-	struct radv_pipeline *pipeline;
-	VkResult result;
-
-	pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
-			      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (pipeline == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &pipeline->base,
-			    VK_OBJECT_TYPE_PIPELINE);
-
-	pipeline->device = device;
-	pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
-	assert(pipeline->layout);
-
-	const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
-		vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
-	radv_init_feedback(creation_feedback);
-
-	VkPipelineCreationFeedbackEXT *pipeline_feedback = creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
-	if (creation_feedback)
-		stage_feedbacks[MESA_SHADER_COMPUTE] = &creation_feedback->pPipelineStageCreationFeedbacks[0];
-
-	pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
-
-	struct radv_pipeline_key key =
-		radv_generate_compute_pipeline_key(pipeline, pCreateInfo);
-
-	result = radv_create_shaders(pipeline, device, cache, &key, pStages,
-		                     pCreateInfo->flags, pipeline_feedback,
-				     stage_feedbacks);
-	if (result != VK_SUCCESS) {
-		radv_pipeline_destroy(device, pipeline, pAllocator);
-		return result;
-	}
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
+   const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = {
+      0,
+   };
+   VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = {0};
+   struct radv_pipeline *pipeline;
+   VkResult result;
+
+   pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
+                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (pipeline == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
+
+   pipeline->device = device;
+   pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
+   assert(pipeline->layout);
+
+   const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
+      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+   radv_init_feedback(creation_feedback);
+
+   VkPipelineCreationFeedbackEXT *pipeline_feedback =
+      creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
+   if (creation_feedback)
+      stage_feedbacks[MESA_SHADER_COMPUTE] = &creation_feedback->pPipelineStageCreationFeedbacks[0];
+
+   pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
+
+   struct radv_pipeline_key key = radv_generate_compute_pipeline_key(pipeline, pCreateInfo);
+
+   result = radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags,
+                                pipeline_feedback, stage_feedbacks);
+   if (result != VK_SUCCESS) {
+      radv_pipeline_destroy(device, pipeline, pAllocator);
+      return result;
+   }
 
-	pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
-	pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
-	radv_pipeline_init_scratch(device, pipeline);
+   pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(
+      pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
+   pipeline->need_indirect_descriptor_sets |=
+      pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
+   radv_pipeline_init_scratch(device, pipeline);
 
-	radv_compute_generate_pm4(pipeline);
+   radv_compute_generate_pm4(pipeline);
 
-	*pPipeline = radv_pipeline_to_handle(pipeline);
+   *pPipeline = radv_pipeline_to_handle(pipeline);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-VkResult radv_CreateComputePipelines(
-	VkDevice                                    _device,
-	VkPipelineCache                             pipelineCache,
-	uint32_t                                    count,
-	const VkComputePipelineCreateInfo*          pCreateInfos,
-	const VkAllocationCallbacks*                pAllocator,
-	VkPipeline*                                 pPipelines)
+VkResult
+radv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
+                            const VkComputePipelineCreateInfo *pCreateInfos,
+                            const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
 {
-	VkResult result = VK_SUCCESS;
-
-	unsigned i = 0;
-	for (; i < count; i++) {
-		VkResult r;
-		r = radv_compute_pipeline_create(_device, pipelineCache,
-						 &pCreateInfos[i],
-						 pAllocator, &pPipelines[i]);
-		if (r != VK_SUCCESS) {
-			result = r;
-			pPipelines[i] = VK_NULL_HANDLE;
-
-			if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
-				break;
-		}
-	}
-
-	for (; i < count; ++i)
-		pPipelines[i] = VK_NULL_HANDLE;
-
-	return result;
-}
+   VkResult result = VK_SUCCESS;
+
+   unsigned i = 0;
+   for (; i < count; i++) {
+      VkResult r;
+      r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator,
+                                       &pPipelines[i]);
+      if (r != VK_SUCCESS) {
+         result = r;
+         pPipelines[i] = VK_NULL_HANDLE;
+
+         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
+            break;
+      }
+   }
 
+   for (; i < count; ++i)
+      pPipelines[i] = VK_NULL_HANDLE;
 
-static uint32_t radv_get_executable_count(const struct radv_pipeline *pipeline)
+   return result;
+}
+
+static uint32_t
+radv_get_executable_count(const struct radv_pipeline *pipeline)
 {
-	uint32_t ret = 0;
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (!pipeline->shaders[i])
-			continue;
-
-		if (i == MESA_SHADER_GEOMETRY &&
-		    !radv_pipeline_has_ngg(pipeline)) {
-			ret += 2u;
-		} else {
-			ret += 1u;
-		}
-
-	}
-	return ret;
+   uint32_t ret = 0;
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (!pipeline->shaders[i])
+         continue;
+
+      if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(pipeline)) {
+         ret += 2u;
+      } else {
+         ret += 1u;
+      }
+   }
+   return ret;
 }
 
 static struct radv_shader_variant *
-radv_get_shader_from_executable_index(const struct radv_pipeline *pipeline, int index, gl_shader_stage *stage)
+radv_get_shader_from_executable_index(const struct radv_pipeline *pipeline, int index,
+                                      gl_shader_stage *stage)
 {
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (!pipeline->shaders[i])
-			continue;
-		if (!index) {
-			*stage = i;
-			return pipeline->shaders[i];
-		}
-
-		--index;
-
-		if (i == MESA_SHADER_GEOMETRY &&
-		    !radv_pipeline_has_ngg(pipeline)) {
-			if (!index) {
-				*stage = i;
-				return pipeline->gs_copy_shader;
-			}
-			--index;
-		}
-	}
-
-	*stage = -1;
-	return NULL;
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (!pipeline->shaders[i])
+         continue;
+      if (!index) {
+         *stage = i;
+         return pipeline->shaders[i];
+      }
+
+      --index;
+
+      if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(pipeline)) {
+         if (!index) {
+            *stage = i;
+            return pipeline->gs_copy_shader;
+         }
+         --index;
+      }
+   }
+
+   *stage = -1;
+   return NULL;
 }
 
 /* Basically strlcpy (which does not exist on linux) specialized for
  * descriptions. */
-static void desc_copy(char *desc, const char *src) {
-	int len = strlen(src);
-	assert(len < VK_MAX_DESCRIPTION_SIZE);
-	memcpy(desc, src, len);
-	memset(desc + len, 0, VK_MAX_DESCRIPTION_SIZE - len);
+static void
+desc_copy(char *desc, const char *src)
+{
+   int len = strlen(src);
+   assert(len < VK_MAX_DESCRIPTION_SIZE);
+   memcpy(desc, src, len);
+   memset(desc + len, 0, VK_MAX_DESCRIPTION_SIZE - len);
 }
 
-VkResult radv_GetPipelineExecutablePropertiesKHR(
-    VkDevice                                    _device,
-    const VkPipelineInfoKHR*                    pPipelineInfo,
-    uint32_t*                                   pExecutableCount,
-    VkPipelineExecutablePropertiesKHR*          pProperties)
+VkResult
+radv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo,
+                                        uint32_t *pExecutableCount,
+                                        VkPipelineExecutablePropertiesKHR *pProperties)
 {
-	RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline);
-	const uint32_t total_count = radv_get_executable_count(pipeline);
-
-	if (!pProperties) {
-		*pExecutableCount = total_count;
-		return VK_SUCCESS;
-	}
-
-	const uint32_t count = MIN2(total_count, *pExecutableCount);
-	for (unsigned i = 0, executable_idx = 0;
-	     i < MESA_SHADER_STAGES && executable_idx < count; ++i) {
-		if (!pipeline->shaders[i])
-			continue;
-		pProperties[executable_idx].stages = mesa_to_vk_shader_stage(i);
-		const char *name = NULL;
-		const char *description = NULL;
-		switch(i) {
-		case MESA_SHADER_VERTEX:
-			name = "Vertex Shader";
-			description = "Vulkan Vertex Shader";
-			break;
-		case MESA_SHADER_TESS_CTRL:
-			if (!pipeline->shaders[MESA_SHADER_VERTEX]) {
-				pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
-				name = "Vertex + Tessellation Control Shaders";
-				description = "Combined Vulkan Vertex and Tessellation Control Shaders";
-			} else {
-				name = "Tessellation Control Shader";
-				description = "Vulkan Tessellation Control Shader";
-			}
-			break;
-		case MESA_SHADER_TESS_EVAL:
-			name = "Tessellation Evaluation Shader";
-			description = "Vulkan Tessellation Evaluation Shader";
-			break;
-		case MESA_SHADER_GEOMETRY:
-			if (radv_pipeline_has_tess(pipeline) && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
-				pProperties[executable_idx].stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
-				name = "Tessellation Evaluation + Geometry Shaders";
-				description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders";
-			} else if (!radv_pipeline_has_tess(pipeline) && !pipeline->shaders[MESA_SHADER_VERTEX]) {
-				pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
-				name = "Vertex + Geometry Shader";
-				description = "Combined Vulkan Vertex and Geometry Shaders";
-			} else {
-				name = "Geometry Shader";
-				description = "Vulkan Geometry Shader";
-			}
-			break;
-		case MESA_SHADER_FRAGMENT:
-			name = "Fragment Shader";
-			description = "Vulkan Fragment Shader";
-			break;
-		case MESA_SHADER_COMPUTE:
-			name = "Compute Shader";
-			description = "Vulkan Compute Shader";
-			break;
-		}
-
-		pProperties[executable_idx].subgroupSize = pipeline->shaders[i]->info.wave_size;
-		desc_copy(pProperties[executable_idx].name, name);
-		desc_copy(pProperties[executable_idx].description, description);
-
-		++executable_idx;
-		if (i == MESA_SHADER_GEOMETRY &&
-		    !radv_pipeline_has_ngg(pipeline)) {
-			assert(pipeline->gs_copy_shader);
-			if (executable_idx >= count)
-				break;
-
-			pProperties[executable_idx].stages = VK_SHADER_STAGE_GEOMETRY_BIT;
-			pProperties[executable_idx].subgroupSize = 64;
-			desc_copy(pProperties[executable_idx].name, "GS Copy Shader");
-			desc_copy(pProperties[executable_idx].description,
-				  "Extra shader stage that loads the GS output ringbuffer into the rasterizer");
-
-			++executable_idx;
-		}
-	}
-
-	VkResult result = *pExecutableCount < total_count ? VK_INCOMPLETE : VK_SUCCESS;
-	*pExecutableCount = count;
-	return result;
+   RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline);
+   const uint32_t total_count = radv_get_executable_count(pipeline);
+
+   if (!pProperties) {
+      *pExecutableCount = total_count;
+      return VK_SUCCESS;
+   }
+
+   const uint32_t count = MIN2(total_count, *pExecutableCount);
+   for (unsigned i = 0, executable_idx = 0; i < MESA_SHADER_STAGES && executable_idx < count; ++i) {
+      if (!pipeline->shaders[i])
+         continue;
+      pProperties[executable_idx].stages = mesa_to_vk_shader_stage(i);
+      const char *name = NULL;
+      const char *description = NULL;
+      switch (i) {
+      case MESA_SHADER_VERTEX:
+         name = "Vertex Shader";
+         description = "Vulkan Vertex Shader";
+         break;
+      case MESA_SHADER_TESS_CTRL:
+         if (!pipeline->shaders[MESA_SHADER_VERTEX]) {
+            pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
+            name = "Vertex + Tessellation Control Shaders";
+            description = "Combined Vulkan Vertex and Tessellation Control Shaders";
+         } else {
+            name = "Tessellation Control Shader";
+            description = "Vulkan Tessellation Control Shader";
+         }
+         break;
+      case MESA_SHADER_TESS_EVAL:
+         name = "Tessellation Evaluation Shader";
+         description = "Vulkan Tessellation Evaluation Shader";
+         break;
+      case MESA_SHADER_GEOMETRY:
+         if (radv_pipeline_has_tess(pipeline) && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
+            pProperties[executable_idx].stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+            name = "Tessellation Evaluation + Geometry Shaders";
+            description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders";
+         } else if (!radv_pipeline_has_tess(pipeline) && !pipeline->shaders[MESA_SHADER_VERTEX]) {
+            pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
+            name = "Vertex + Geometry Shader";
+            description = "Combined Vulkan Vertex and Geometry Shaders";
+         } else {
+            name = "Geometry Shader";
+            description = "Vulkan Geometry Shader";
+         }
+         break;
+      case MESA_SHADER_FRAGMENT:
+         name = "Fragment Shader";
+         description = "Vulkan Fragment Shader";
+         break;
+      case MESA_SHADER_COMPUTE:
+         name = "Compute Shader";
+         description = "Vulkan Compute Shader";
+         break;
+      }
+
+      pProperties[executable_idx].subgroupSize = pipeline->shaders[i]->info.wave_size;
+      desc_copy(pProperties[executable_idx].name, name);
+      desc_copy(pProperties[executable_idx].description, description);
+
+      ++executable_idx;
+      if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(pipeline)) {
+         assert(pipeline->gs_copy_shader);
+         if (executable_idx >= count)
+            break;
+
+         pProperties[executable_idx].stages = VK_SHADER_STAGE_GEOMETRY_BIT;
+         pProperties[executable_idx].subgroupSize = 64;
+         desc_copy(pProperties[executable_idx].name, "GS Copy Shader");
+         desc_copy(pProperties[executable_idx].description,
+                   "Extra shader stage that loads the GS output ringbuffer into the rasterizer");
+
+         ++executable_idx;
+      }
+   }
+
+   VkResult result = *pExecutableCount < total_count ? VK_INCOMPLETE : VK_SUCCESS;
+   *pExecutableCount = count;
+   return result;
 }
 
-VkResult radv_GetPipelineExecutableStatisticsKHR(
-    VkDevice                                    _device,
-    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
-    uint32_t*                                   pStatisticCount,
-    VkPipelineExecutableStatisticKHR*           pStatistics)
+VkResult
+radv_GetPipelineExecutableStatisticsKHR(VkDevice _device,
+                                        const VkPipelineExecutableInfoKHR *pExecutableInfo,
+                                        uint32_t *pStatisticCount,
+                                        VkPipelineExecutableStatisticKHR *pStatistics)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
-	gl_shader_stage stage;
-	struct radv_shader_variant *shader = radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
-
-	enum chip_class chip_class = device->physical_device->rad_info.chip_class;
-	unsigned lds_increment = chip_class >= GFX7 ? 512 : 256;
-	unsigned max_waves = radv_get_max_waves(device, shader, stage);
-
-	VkPipelineExecutableStatisticKHR *s = pStatistics;
-	VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
-	VkResult result = VK_SUCCESS;
-
-	if (s < end) {
-		desc_copy(s->name, "SGPRs");
-		desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
-		s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-		s->value.u64 = shader->config.num_sgprs;
-	}
-	++s;
-
-	if (s < end) {
-		desc_copy(s->name, "VGPRs");
-		desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
-		s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-		s->value.u64 = shader->config.num_vgprs;
-	}
-	++s;
-
-	if (s < end) {
-		desc_copy(s->name, "Spilled SGPRs");
-		desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
-		s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-		s->value.u64 = shader->config.spilled_sgprs;
-	}
-	++s;
-
-	if (s < end) {
-		desc_copy(s->name, "Spilled VGPRs");
-		desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
-		s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-		s->value.u64 = shader->config.spilled_vgprs;
-	}
-	++s;
-
-	if (s < end) {
-		desc_copy(s->name, "PrivMem VGPRs");
-		desc_copy(s->description, "Number of VGPRs stored in private memory per subgroup");
-		s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-		s->value.u64 = shader->info.private_mem_vgprs;
-	}
-	++s;
-
-	if (s < end) {
-		desc_copy(s->name, "Code size");
-		desc_copy(s->description, "Code size in bytes");
-		s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-		s->value.u64 = shader->exec_size;
-	}
-	++s;
-
-	if (s < end) {
-		desc_copy(s->name, "LDS size");
-		desc_copy(s->description, "LDS size in bytes per workgroup");
-		s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-		s->value.u64 = shader->config.lds_size * lds_increment;
-	}
-	++s;
-
-	if (s < end) {
-		desc_copy(s->name, "Scratch size");
-		desc_copy(s->description, "Private memory in bytes per subgroup");
-		s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-		s->value.u64 = shader->config.scratch_bytes_per_wave;
-	}
-	++s;
-
-	if (s < end) {
-		desc_copy(s->name, "Subgroups per SIMD");
-		desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
-		s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-		s->value.u64 = max_waves;
-	}
-	++s;
-
-	if (shader->statistics) {
-		for (unsigned i = 0; i < aco_num_statistics; i++) {
-			const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i];
-			if (s < end) {
-				desc_copy(s->name, info->name);
-				desc_copy(s->description, info->desc);
-				s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-				s->value.u64 = shader->statistics[i];
-			}
-			++s;
-		}
-	}
-
-	if (!pStatistics)
-		*pStatisticCount = s - pStatistics;
-	else if (s > end) {
-		*pStatisticCount = end - pStatistics;
-		result = VK_INCOMPLETE;
-	} else {
-		*pStatisticCount = s - pStatistics;
-	}
-
-	return result;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
+   gl_shader_stage stage;
+   struct radv_shader_variant *shader =
+      radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
+
+   enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+   unsigned lds_increment = chip_class >= GFX7 ? 512 : 256;
+   unsigned max_waves = radv_get_max_waves(device, shader, stage);
+
+   VkPipelineExecutableStatisticKHR *s = pStatistics;
+   VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
+   VkResult result = VK_SUCCESS;
+
+   if (s < end) {
+      desc_copy(s->name, "SGPRs");
+      desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
+      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      s->value.u64 = shader->config.num_sgprs;
+   }
+   ++s;
+
+   if (s < end) {
+      desc_copy(s->name, "VGPRs");
+      desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
+      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      s->value.u64 = shader->config.num_vgprs;
+   }
+   ++s;
+
+   if (s < end) {
+      desc_copy(s->name, "Spilled SGPRs");
+      desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
+      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      s->value.u64 = shader->config.spilled_sgprs;
+   }
+   ++s;
+
+   if (s < end) {
+      desc_copy(s->name, "Spilled VGPRs");
+      desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
+      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      s->value.u64 = shader->config.spilled_vgprs;
+   }
+   ++s;
+
+   if (s < end) {
+      desc_copy(s->name, "PrivMem VGPRs");
+      desc_copy(s->description, "Number of VGPRs stored in private memory per subgroup");
+      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      s->value.u64 = shader->info.private_mem_vgprs;
+   }
+   ++s;
+
+   if (s < end) {
+      desc_copy(s->name, "Code size");
+      desc_copy(s->description, "Code size in bytes");
+      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      s->value.u64 = shader->exec_size;
+   }
+   ++s;
+
+   if (s < end) {
+      desc_copy(s->name, "LDS size");
+      desc_copy(s->description, "LDS size in bytes per workgroup");
+      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      s->value.u64 = shader->config.lds_size * lds_increment;
+   }
+   ++s;
+
+   if (s < end) {
+      desc_copy(s->name, "Scratch size");
+      desc_copy(s->description, "Private memory in bytes per subgroup");
+      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      s->value.u64 = shader->config.scratch_bytes_per_wave;
+   }
+   ++s;
+
+   if (s < end) {
+      desc_copy(s->name, "Subgroups per SIMD");
+      desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
+      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      s->value.u64 = max_waves;
+   }
+   ++s;
+
+   if (shader->statistics) {
+      for (unsigned i = 0; i < aco_num_statistics; i++) {
+         const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i];
+         if (s < end) {
+            desc_copy(s->name, info->name);
+            desc_copy(s->description, info->desc);
+            s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+            s->value.u64 = shader->statistics[i];
+         }
+         ++s;
+      }
+   }
+
+   if (!pStatistics)
+      *pStatisticCount = s - pStatistics;
+   else if (s > end) {
+      *pStatisticCount = end - pStatistics;
+      result = VK_INCOMPLETE;
+   } else {
+      *pStatisticCount = s - pStatistics;
+   }
+
+   return result;
 }
 
-static VkResult radv_copy_representation(void *data, size_t *data_size, const char *src)
+static VkResult
+radv_copy_representation(void *data, size_t *data_size, const char *src)
 {
-	size_t total_size  = strlen(src) + 1;
+   size_t total_size = strlen(src) + 1;
 
-	if (!data) {
-		*data_size = total_size;
-		return VK_SUCCESS;
-	}
+   if (!data) {
+      *data_size = total_size;
+      return VK_SUCCESS;
+   }
 
-	size_t size = MIN2(total_size, *data_size);
+   size_t size = MIN2(total_size, *data_size);
 
-	memcpy(data, src, size);
-	if (size)
-		*((char*)data + size - 1) = 0;
-	return size < total_size ? VK_INCOMPLETE : VK_SUCCESS;
+   memcpy(data, src, size);
+   if (size)
+      *((char *)data + size - 1) = 0;
+   return size < total_size ? VK_INCOMPLETE : VK_SUCCESS;
 }
 
-VkResult radv_GetPipelineExecutableInternalRepresentationsKHR(
-    VkDevice                                    device,
-    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
-    uint32_t*                                   pInternalRepresentationCount,
-    VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
+VkResult
+radv_GetPipelineExecutableInternalRepresentationsKHR(
+   VkDevice device, const VkPipelineExecutableInfoKHR *pExecutableInfo,
+   uint32_t *pInternalRepresentationCount,
+   VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
 {
-	RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
-	gl_shader_stage stage;
-	struct radv_shader_variant *shader = radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
-
-	VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations;
-	VkPipelineExecutableInternalRepresentationKHR *end = p + (pInternalRepresentations ? *pInternalRepresentationCount : 0);
-	VkResult result = VK_SUCCESS;
-	/* optimized NIR */
-	if (p < end) {
-		p->isText = true;
-		desc_copy(p->name, "NIR Shader(s)");
-		desc_copy(p->description, "The optimized NIR shader(s)");
-		if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS)
-			result = VK_INCOMPLETE;
-	}
-	++p;
-
-	/* backend IR */
-	if (p < end) {
-		p->isText = true;
-		if (radv_use_llvm_for_stage(pipeline->device, stage)) {
-			desc_copy(p->name, "LLVM IR");
-			desc_copy(p->description, "The LLVM IR after some optimizations");
-		} else {
-			desc_copy(p->name, "ACO IR");
-			desc_copy(p->description, "The ACO IR after some optimizations");
-		}
-		if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
-			result = VK_INCOMPLETE;
-	}
-	++p;
-
-	/* Disassembler */
-	if (p < end) {
-		p->isText = true;
-		desc_copy(p->name, "Assembly");
-		desc_copy(p->description, "Final Assembly");
-		if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS)
-			result = VK_INCOMPLETE;
-	}
-	++p;
-
-	if (!pInternalRepresentations)
-		*pInternalRepresentationCount = p - pInternalRepresentations;
-	else if(p > end) {
-		result = VK_INCOMPLETE;
-		*pInternalRepresentationCount = end - pInternalRepresentations;
-	} else {
-		*pInternalRepresentationCount = p - pInternalRepresentations;
-	}
-
-	return result;
+   RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
+   gl_shader_stage stage;
+   struct radv_shader_variant *shader =
+      radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
+
+   VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations;
+   VkPipelineExecutableInternalRepresentationKHR *end =
+      p + (pInternalRepresentations ? *pInternalRepresentationCount : 0);
+   VkResult result = VK_SUCCESS;
+   /* optimized NIR */
+   if (p < end) {
+      p->isText = true;
+      desc_copy(p->name, "NIR Shader(s)");
+      desc_copy(p->description, "The optimized NIR shader(s)");
+      if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS)
+         result = VK_INCOMPLETE;
+   }
+   ++p;
+
+   /* backend IR */
+   if (p < end) {
+      p->isText = true;
+      if (radv_use_llvm_for_stage(pipeline->device, stage)) {
+         desc_copy(p->name, "LLVM IR");
+         desc_copy(p->description, "The LLVM IR after some optimizations");
+      } else {
+         desc_copy(p->name, "ACO IR");
+         desc_copy(p->description, "The ACO IR after some optimizations");
+      }
+      if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
+         result = VK_INCOMPLETE;
+   }
+   ++p;
+
+   /* Disassembler */
+   if (p < end) {
+      p->isText = true;
+      desc_copy(p->name, "Assembly");
+      desc_copy(p->description, "Final Assembly");
+      if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS)
+         result = VK_INCOMPLETE;
+   }
+   ++p;
+
+   if (!pInternalRepresentations)
+      *pInternalRepresentationCount = p - pInternalRepresentations;
+   else if (p > end) {
+      result = VK_INCOMPLETE;
+      *pInternalRepresentationCount = end - pInternalRepresentations;
+   } else {
+      *pInternalRepresentationCount = p - pInternalRepresentations;
+   }
+
+   return result;
 }
diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c
index d25a12f5151..dafe9b43b74 100644
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -21,629 +21,594 @@
  * IN THE SOFTWARE.
  */
 
-#include "util/macros.h"
-#include "util/mesa-sha1.h"
 #include "util/debug.h"
 #include "util/disk_cache.h"
+#include "util/macros.h"
+#include "util/mesa-sha1.h"
 #include "util/u_atomic.h"
+#include "vulkan/util/vk_util.h"
 #include "radv_debug.h"
 #include "radv_private.h"
 #include "radv_shader.h"
-#include "vulkan/util/vk_util.h"
 
 #include "ac_nir_to_llvm.h"
 
 struct cache_entry {
-	union {
-		unsigned char sha1[20];
-		uint32_t sha1_dw[5];
-	};
-	uint32_t binary_sizes[MESA_SHADER_STAGES];
-	struct radv_shader_variant *variants[MESA_SHADER_STAGES];
-	char code[0];
+   union {
+      unsigned char sha1[20];
+      uint32_t sha1_dw[5];
+   };
+   uint32_t binary_sizes[MESA_SHADER_STAGES];
+   struct radv_shader_variant *variants[MESA_SHADER_STAGES];
+   char code[0];
 };
 
 static void
 radv_pipeline_cache_lock(struct radv_pipeline_cache *cache)
 {
-	if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT)
-		return;
+   if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT)
+      return;
 
-	mtx_lock(&cache->mutex);
+   mtx_lock(&cache->mutex);
 }
 
 static void
 radv_pipeline_cache_unlock(struct radv_pipeline_cache *cache)
 {
-	if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT)
-		return;
+   if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT)
+      return;
 
-	mtx_unlock(&cache->mutex);
+   mtx_unlock(&cache->mutex);
 }
 
 void
-radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
-			 struct radv_device *device)
+radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device)
 {
-	cache->device = device;
-	mtx_init(&cache->mutex, mtx_plain);
-	cache->flags = 0;
-
-	cache->modified = false;
-	cache->kernel_count = 0;
-	cache->total_size = 0;
-	cache->table_size = 1024;
-	const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
-	cache->hash_table = malloc(byte_size);
-
-	/* We don't consider allocation failure fatal, we just start with a 0-sized
-	 * cache. Disable caching when we want to keep shader debug info, since
-	 * we don't get the debug info on cached shaders. */
-	if (cache->hash_table == NULL ||
-	    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
-		cache->table_size = 0;
-	else
-		memset(cache->hash_table, 0, byte_size);
+   cache->device = device;
+   mtx_init(&cache->mutex, mtx_plain);
+   cache->flags = 0;
+
+   cache->modified = false;
+   cache->kernel_count = 0;
+   cache->total_size = 0;
+   cache->table_size = 1024;
+   const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
+   cache->hash_table = malloc(byte_size);
+
+   /* We don't consider allocation failure fatal, we just start with a 0-sized
+    * cache. Disable caching when we want to keep shader debug info, since
+    * we don't get the debug info on cached shaders. */
+   if (cache->hash_table == NULL || (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
+      cache->table_size = 0;
+   else
+      memset(cache->hash_table, 0, byte_size);
 }
 
 void
 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
 {
-	for (unsigned i = 0; i < cache->table_size; ++i)
-		if (cache->hash_table[i]) {
-			for(int j = 0; j < MESA_SHADER_STAGES; ++j)  {
-				if (cache->hash_table[i]->variants[j])
-					radv_shader_variant_destroy(cache->device,
-								    cache->hash_table[i]->variants[j]);
-			}
-			vk_free(&cache->alloc, cache->hash_table[i]);
-		}
-	mtx_destroy(&cache->mutex);
-	free(cache->hash_table);
+   for (unsigned i = 0; i < cache->table_size; ++i)
+      if (cache->hash_table[i]) {
+         for (int j = 0; j < MESA_SHADER_STAGES; ++j) {
+            if (cache->hash_table[i]->variants[j])
+               radv_shader_variant_destroy(cache->device, cache->hash_table[i]->variants[j]);
+         }
+         vk_free(&cache->alloc, cache->hash_table[i]);
+      }
+   mtx_destroy(&cache->mutex);
+   free(cache->hash_table);
 }
 
 static uint32_t
 entry_size(struct cache_entry *entry)
 {
-	size_t ret = sizeof(*entry);
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i)
-		if (entry->binary_sizes[i])
-			ret += entry->binary_sizes[i];
-	ret = align(ret, alignof(struct cache_entry));
-	return ret;
+   size_t ret = sizeof(*entry);
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+      if (entry->binary_sizes[i])
+         ret += entry->binary_sizes[i];
+   ret = align(ret, alignof(struct cache_entry));
+   return ret;
 }
 
 void
-radv_hash_shaders(unsigned char *hash,
-		  const VkPipelineShaderStageCreateInfo **stages,
-		  const struct radv_pipeline_layout *layout,
-		  const struct radv_pipeline_key *key,
-		  uint32_t flags)
+radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInfo **stages,
+                  const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key,
+                  uint32_t flags)
 {
-	struct mesa_sha1 ctx;
-
-	_mesa_sha1_init(&ctx);
-	if (key)
-		_mesa_sha1_update(&ctx, key, sizeof(*key));
-	if (layout)
-		_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
-
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (stages[i]) {
-			RADV_FROM_HANDLE(vk_shader_module, module, stages[i]->module);
-			const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo;
-
-			_mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
-			_mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
-			if (spec_info && spec_info->mapEntryCount) {
-				_mesa_sha1_update(&ctx, spec_info->pMapEntries,
-				                  spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
-				_mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
-			}
-		}
-	}
-	_mesa_sha1_update(&ctx, &flags, 4);
-	_mesa_sha1_final(&ctx, hash);
+   struct mesa_sha1 ctx;
+
+   _mesa_sha1_init(&ctx);
+   if (key)
+      _mesa_sha1_update(&ctx, key, sizeof(*key));
+   if (layout)
+      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
+
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (stages[i]) {
+         RADV_FROM_HANDLE(vk_shader_module, module, stages[i]->module);
+         const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo;
+
+         _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
+         _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
+         if (spec_info && spec_info->mapEntryCount) {
+            _mesa_sha1_update(&ctx, spec_info->pMapEntries,
+                              spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
+            _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
+         }
+      }
+   }
+   _mesa_sha1_update(&ctx, &flags, 4);
+   _mesa_sha1_final(&ctx, hash);
 }
 
-
 static struct cache_entry *
-radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
-				    const unsigned char *sha1)
+radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, const unsigned char *sha1)
 {
-	const uint32_t mask = cache->table_size - 1;
-	const uint32_t start = (*(uint32_t *) sha1);
+   const uint32_t mask = cache->table_size - 1;
+   const uint32_t start = (*(uint32_t *)sha1);
 
-	if (cache->table_size == 0)
-		return NULL;
+   if (cache->table_size == 0)
+      return NULL;
 
-	for (uint32_t i = 0; i < cache->table_size; i++) {
-		const uint32_t index = (start + i) & mask;
-		struct cache_entry *entry = cache->hash_table[index];
+   for (uint32_t i = 0; i < cache->table_size; i++) {
+      const uint32_t index = (start + i) & mask;
+      struct cache_entry *entry = cache->hash_table[index];
 
-		if (!entry)
-			return NULL;
+      if (!entry)
+         return NULL;
 
-		if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
-			return entry;
-		}
-	}
+      if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
+         return entry;
+      }
+   }
 
-	unreachable("hash table should never be full");
+   unreachable("hash table should never be full");
 }
 
 static struct cache_entry *
-radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
-			   const unsigned char *sha1)
+radv_pipeline_cache_search(struct radv_pipeline_cache *cache, const unsigned char *sha1)
 {
-	struct cache_entry *entry;
+   struct cache_entry *entry;
 
-	radv_pipeline_cache_lock(cache);
+   radv_pipeline_cache_lock(cache);
 
-	entry = radv_pipeline_cache_search_unlocked(cache, sha1);
+   entry = radv_pipeline_cache_search_unlocked(cache, sha1);
 
-	radv_pipeline_cache_unlock(cache);
+   radv_pipeline_cache_unlock(cache);
 
-	return entry;
+   return entry;
 }
 
 static void
-radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
-			      struct cache_entry *entry)
+radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache, struct cache_entry *entry)
 {
-	const uint32_t mask = cache->table_size - 1;
-	const uint32_t start = entry->sha1_dw[0];
-
-	/* We'll always be able to insert when we get here. */
-	assert(cache->kernel_count < cache->table_size / 2);
-
-	for (uint32_t i = 0; i < cache->table_size; i++) {
-		const uint32_t index = (start + i) & mask;
-		if (!cache->hash_table[index]) {
-			cache->hash_table[index] = entry;
-			break;
-		}
-	}
-
-	cache->total_size += entry_size(entry);
-	cache->kernel_count++;
+   const uint32_t mask = cache->table_size - 1;
+   const uint32_t start = entry->sha1_dw[0];
+
+   /* We'll always be able to insert when we get here. */
+   assert(cache->kernel_count < cache->table_size / 2);
+
+   for (uint32_t i = 0; i < cache->table_size; i++) {
+      const uint32_t index = (start + i) & mask;
+      if (!cache->hash_table[index]) {
+         cache->hash_table[index] = entry;
+         break;
+      }
+   }
+
+   cache->total_size += entry_size(entry);
+   cache->kernel_count++;
 }
 
-
 static VkResult
 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
 {
-	const uint32_t table_size = cache->table_size * 2;
-	const uint32_t old_table_size = cache->table_size;
-	const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
-	struct cache_entry **table;
-	struct cache_entry **old_table = cache->hash_table;
+   const uint32_t table_size = cache->table_size * 2;
+   const uint32_t old_table_size = cache->table_size;
+   const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
+   struct cache_entry **table;
+   struct cache_entry **old_table = cache->hash_table;
 
-	table = malloc(byte_size);
-	if (table == NULL)
-		return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   table = malloc(byte_size);
+   if (table == NULL)
+      return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	cache->hash_table = table;
-	cache->table_size = table_size;
-	cache->kernel_count = 0;
-	cache->total_size = 0;
+   cache->hash_table = table;
+   cache->table_size = table_size;
+   cache->kernel_count = 0;
+   cache->total_size = 0;
 
-	memset(cache->hash_table, 0, byte_size);
-	for (uint32_t i = 0; i < old_table_size; i++) {
-		struct cache_entry *entry = old_table[i];
-		if (!entry)
-			continue;
+   memset(cache->hash_table, 0, byte_size);
+   for (uint32_t i = 0; i < old_table_size; i++) {
+      struct cache_entry *entry = old_table[i];
+      if (!entry)
+         continue;
 
-		radv_pipeline_cache_set_entry(cache, entry);
-	}
+      radv_pipeline_cache_set_entry(cache, entry);
+   }
 
-	free(old_table);
+   free(old_table);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
 static void
-radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
-			      struct cache_entry *entry)
+radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache, struct cache_entry *entry)
 {
-	if (cache->kernel_count == cache->table_size / 2)
-		radv_pipeline_cache_grow(cache);
-
-	/* Failing to grow that hash table isn't fatal, but may mean we don't
-	 * have enough space to add this new kernel. Only add it if there's room.
-	 */
-	if (cache->kernel_count < cache->table_size / 2)
-		radv_pipeline_cache_set_entry(cache, entry);
+   if (cache->kernel_count == cache->table_size / 2)
+      radv_pipeline_cache_grow(cache);
+
+   /* Failing to grow that hash table isn't fatal, but may mean we don't
+    * have enough space to add this new kernel. Only add it if there's room.
+    */
+   if (cache->kernel_count < cache->table_size / 2)
+      radv_pipeline_cache_set_entry(cache, entry);
 }
 
 static bool
 radv_is_cache_disabled(struct radv_device *device)
 {
-	/* Pipeline caches can be disabled with RADV_DEBUG=nocache, with
-	 * MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested.
-	 */
-	return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE);
+   /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with
+    * MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested.
+    */
+   return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE);
 }
 
 bool
 radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
-					        struct radv_pipeline_cache *cache,
-					        const unsigned char *sha1,
-					        struct radv_shader_variant **variants,
-						bool *found_in_application_cache)
+                                                struct radv_pipeline_cache *cache,
+                                                const unsigned char *sha1,
+                                                struct radv_shader_variant **variants,
+                                                bool *found_in_application_cache)
 {
-	struct cache_entry *entry;
-
-	if (!cache) {
-		cache = device->mem_cache;
-		*found_in_application_cache = false;
-	}
-
-	radv_pipeline_cache_lock(cache);
-
-	entry = radv_pipeline_cache_search_unlocked(cache, sha1);
-
-	if (!entry) {
-		*found_in_application_cache = false;
-
-		/* Don't cache when we want debug info, since this isn't
-		 * present in the cache.
-		 */
-		if (radv_is_cache_disabled(device) || !device->physical_device->disk_cache) {
-			radv_pipeline_cache_unlock(cache);
-			return false;
-		}
-
-		uint8_t disk_sha1[20];
-		disk_cache_compute_key(device->physical_device->disk_cache,
-				       sha1, 20, disk_sha1);
-
-		entry = (struct cache_entry *)
-			disk_cache_get(device->physical_device->disk_cache,
-				       disk_sha1, NULL);
-		if (!entry) {
-			radv_pipeline_cache_unlock(cache);
-			return false;
-		} else {
-			size_t size = entry_size(entry);
-			struct cache_entry *new_entry = vk_alloc(&cache->alloc, size, 8,
-								 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
-			if (!new_entry) {
-				free(entry);
-				radv_pipeline_cache_unlock(cache);
-				return false;
-			}
-
-			memcpy(new_entry, entry, entry_size(entry));
-			free(entry);
-			entry = new_entry;
-
-			if (!(device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE) ||
-			    cache != device->mem_cache)
-				radv_pipeline_cache_add_entry(cache, new_entry);
-		}
-	}
-
-	char *p = entry->code;
-	for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (!entry->variants[i] && entry->binary_sizes[i]) {
-			struct radv_shader_binary *binary = calloc(1, entry->binary_sizes[i]);
-			memcpy(binary, p, entry->binary_sizes[i]);
-			p += entry->binary_sizes[i];
-
-			entry->variants[i] = radv_shader_variant_create(device, binary, false);
-			free(binary);
-		} else if (entry->binary_sizes[i]) {
-			p += entry->binary_sizes[i];
-		}
-
-	}
-
-	memcpy(variants, entry->variants, sizeof(entry->variants));
-
-	if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE &&
-	    cache == device->mem_cache)
-		vk_free(&cache->alloc, entry);
-	else {
-		for (int i = 0; i < MESA_SHADER_STAGES; ++i)
-			if (entry->variants[i])
-				p_atomic_inc(&entry->variants[i]->ref_count);
-	}
-
-	radv_pipeline_cache_unlock(cache);
-	return true;
+   struct cache_entry *entry;
+
+   if (!cache) {
+      cache = device->mem_cache;
+      *found_in_application_cache = false;
+   }
+
+   radv_pipeline_cache_lock(cache);
+
+   entry = radv_pipeline_cache_search_unlocked(cache, sha1);
+
+   if (!entry) {
+      *found_in_application_cache = false;
+
+      /* Don't cache when we want debug info, since this isn't
+       * present in the cache.
+       */
+      if (radv_is_cache_disabled(device) || !device->physical_device->disk_cache) {
+         radv_pipeline_cache_unlock(cache);
+         return false;
+      }
+
+      uint8_t disk_sha1[20];
+      disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20, disk_sha1);
+
+      entry =
+         (struct cache_entry *)disk_cache_get(device->physical_device->disk_cache, disk_sha1, NULL);
+      if (!entry) {
+         radv_pipeline_cache_unlock(cache);
+         return false;
+      } else {
+         size_t size = entry_size(entry);
+         struct cache_entry *new_entry =
+            vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+         if (!new_entry) {
+            free(entry);
+            radv_pipeline_cache_unlock(cache);
+            return false;
+         }
+
+         memcpy(new_entry, entry, entry_size(entry));
+         free(entry);
+         entry = new_entry;
+
+         if (!(device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE) ||
+             cache != device->mem_cache)
+            radv_pipeline_cache_add_entry(cache, new_entry);
+      }
+   }
+
+   char *p = entry->code;
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (!entry->variants[i] && entry->binary_sizes[i]) {
+         struct radv_shader_binary *binary = calloc(1, entry->binary_sizes[i]);
+         memcpy(binary, p, entry->binary_sizes[i]);
+         p += entry->binary_sizes[i];
+
+         entry->variants[i] = radv_shader_variant_create(device, binary, false);
+         free(binary);
+      } else if (entry->binary_sizes[i]) {
+         p += entry->binary_sizes[i];
+      }
+   }
+
+   memcpy(variants, entry->variants, sizeof(entry->variants));
+
+   if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE && cache == device->mem_cache)
+      vk_free(&cache->alloc, entry);
+   else {
+      for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+         if (entry->variants[i])
+            p_atomic_inc(&entry->variants[i]->ref_count);
+   }
+
+   radv_pipeline_cache_unlock(cache);
+   return true;
 }
 
 void
-radv_pipeline_cache_insert_shaders(struct radv_device *device,
-				   struct radv_pipeline_cache *cache,
-				   const unsigned char *sha1,
-				   struct radv_shader_variant **variants,
-				   struct radv_shader_binary *const *binaries)
+radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache,
+                                   const unsigned char *sha1, struct radv_shader_variant **variants,
+                                   struct radv_shader_binary *const *binaries)
 {
-	if (!cache)
-		cache = device->mem_cache;
-
-	radv_pipeline_cache_lock(cache);
-	struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
-	if (entry) {
-		for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-			if (entry->variants[i]) {
-				radv_shader_variant_destroy(cache->device, variants[i]);
-				variants[i] = entry->variants[i];
-			} else {
-				entry->variants[i] = variants[i];
-			}
-			if (variants[i])
-				p_atomic_inc(&variants[i]->ref_count);
-		}
-		radv_pipeline_cache_unlock(cache);
-		return;
-	}
-
-	/* Don't cache when we want debug info, since this isn't
-	 * present in the cache.
-	 */
-	if (radv_is_cache_disabled(device)) {
-		radv_pipeline_cache_unlock(cache);
-		return;
-	}
-
-	size_t size = sizeof(*entry);
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i)
-		if (variants[i])
-			size += binaries[i]->total_size;
-	const size_t size_without_align = size;
-	size = align(size_without_align, alignof(struct cache_entry));
-
-	entry = vk_alloc(&cache->alloc, size, 8,
-			   VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
-	if (!entry) {
-		radv_pipeline_cache_unlock(cache);
-		return;
-	}
-
-	memset(entry, 0, sizeof(*entry));
-	memcpy(entry->sha1, sha1, 20);
-
-	char* p = entry->code;
-
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (!variants[i])
-			continue;
-
-		entry->binary_sizes[i] = binaries[i]->total_size;
-
-		memcpy(p, binaries[i], binaries[i]->total_size);
-		p += binaries[i]->total_size;
-	}
-
-	// Make valgrind happy by filling the alignment hole at the end.
-	assert(p == (char*)entry + size_without_align);
-	assert(sizeof(*entry) + (p - entry->code) == size_without_align);
-	memset((char*)entry + size_without_align, 0, size - size_without_align);
-
-	/* Always add cache items to disk. This will allow collection of
-	 * compiled shaders by third parties such as steam, even if the app
-	 * implements its own pipeline cache.
-	 */
-	if (device->physical_device->disk_cache) {
-		uint8_t disk_sha1[20];
-		disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20,
-			       disk_sha1);
-
-		disk_cache_put(device->physical_device->disk_cache, disk_sha1,
-			       entry, entry_size(entry), NULL);
-	}
-
-	if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE &&
-	    cache == device->mem_cache) {
-		vk_free2(&cache->alloc, NULL, entry);
-		radv_pipeline_cache_unlock(cache);
-		return;
-	}
-
-	/* We delay setting the variant so we have reproducible disk cache
-	 * items.
-	 */
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (!variants[i])
-			continue;
-
-		entry->variants[i] = variants[i];
-		p_atomic_inc(&variants[i]->ref_count);
-	}
-
-	radv_pipeline_cache_add_entry(cache, entry);
-
-	cache->modified = true;
-	radv_pipeline_cache_unlock(cache);
-	return;
+   if (!cache)
+      cache = device->mem_cache;
+
+   radv_pipeline_cache_lock(cache);
+   struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
+   if (entry) {
+      for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+         if (entry->variants[i]) {
+            radv_shader_variant_destroy(cache->device, variants[i]);
+            variants[i] = entry->variants[i];
+         } else {
+            entry->variants[i] = variants[i];
+         }
+         if (variants[i])
+            p_atomic_inc(&variants[i]->ref_count);
+      }
+      radv_pipeline_cache_unlock(cache);
+      return;
+   }
+
+   /* Don't cache when we want debug info, since this isn't
+    * present in the cache.
+    */
+   if (radv_is_cache_disabled(device)) {
+      radv_pipeline_cache_unlock(cache);
+      return;
+   }
+
+   size_t size = sizeof(*entry);
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+      if (variants[i])
+         size += binaries[i]->total_size;
+   const size_t size_without_align = size;
+   size = align(size_without_align, alignof(struct cache_entry));
+
+   entry = vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+   if (!entry) {
+      radv_pipeline_cache_unlock(cache);
+      return;
+   }
+
+   memset(entry, 0, sizeof(*entry));
+   memcpy(entry->sha1, sha1, 20);
+
+   char *p = entry->code;
+
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (!variants[i])
+         continue;
+
+      entry->binary_sizes[i] = binaries[i]->total_size;
+
+      memcpy(p, binaries[i], binaries[i]->total_size);
+      p += binaries[i]->total_size;
+   }
+
+   // Make valgrind happy by filling the alignment hole at the end.
+   assert(p == (char *)entry + size_without_align);
+   assert(sizeof(*entry) + (p - entry->code) == size_without_align);
+   memset((char *)entry + size_without_align, 0, size - size_without_align);
+
+   /* Always add cache items to disk. This will allow collection of
+    * compiled shaders by third parties such as steam, even if the app
+    * implements its own pipeline cache.
+    */
+   if (device->physical_device->disk_cache) {
+      uint8_t disk_sha1[20];
+      disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20, disk_sha1);
+
+      disk_cache_put(device->physical_device->disk_cache, disk_sha1, entry, entry_size(entry),
+                     NULL);
+   }
+
+   if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE && cache == device->mem_cache) {
+      vk_free2(&cache->alloc, NULL, entry);
+      radv_pipeline_cache_unlock(cache);
+      return;
+   }
+
+   /* We delay setting the variant so we have reproducible disk cache
+    * items.
+    */
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (!variants[i])
+         continue;
+
+      entry->variants[i] = variants[i];
+      p_atomic_inc(&variants[i]->ref_count);
+   }
+
+   radv_pipeline_cache_add_entry(cache, entry);
+
+   cache->modified = true;
+   radv_pipeline_cache_unlock(cache);
+   return;
 }
 
 bool
-radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
-			 const void *data, size_t size)
+radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size)
 {
-	struct radv_device *device = cache->device;
-	struct vk_pipeline_cache_header header;
-
-	if (size < sizeof(header))
-		return false;
-	memcpy(&header, data, sizeof(header));
-	if (header.header_size < sizeof(header))
-		return false;
-	if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
-		return false;
-	if (header.vendor_id != ATI_VENDOR_ID)
-		return false;
-	if (header.device_id != device->physical_device->rad_info.pci_id)
-		return false;
-	if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
-		return false;
-
-	char *end = (char *) data + size;
-	char *p = (char *) data + header.header_size;
-
-	while (end - p >= sizeof(struct cache_entry)) {
-		struct cache_entry *entry = (struct cache_entry*)p;
-		struct cache_entry *dest_entry;
-		size_t size_of_entry = entry_size(entry);
-		if(end - p < size_of_entry)
-			break;
-
-		dest_entry = vk_alloc(&cache->alloc, size_of_entry,
-					8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
-		if (dest_entry) {
-			memcpy(dest_entry, entry, size_of_entry);
-			for (int i = 0; i < MESA_SHADER_STAGES; ++i)
-				dest_entry->variants[i] = NULL;
-			radv_pipeline_cache_add_entry(cache, dest_entry);
-		}
-		p += size_of_entry;
-	}
-
-	return true;
+   struct radv_device *device = cache->device;
+   struct vk_pipeline_cache_header header;
+
+   if (size < sizeof(header))
+      return false;
+   memcpy(&header, data, sizeof(header));
+   if (header.header_size < sizeof(header))
+      return false;
+   if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
+      return false;
+   if (header.vendor_id != ATI_VENDOR_ID)
+      return false;
+   if (header.device_id != device->physical_device->rad_info.pci_id)
+      return false;
+   if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
+      return false;
+
+   char *end = (char *)data + size;
+   char *p = (char *)data + header.header_size;
+
+   while (end - p >= sizeof(struct cache_entry)) {
+      struct cache_entry *entry = (struct cache_entry *)p;
+      struct cache_entry *dest_entry;
+      size_t size_of_entry = entry_size(entry);
+      if (end - p < size_of_entry)
+         break;
+
+      dest_entry = vk_alloc(&cache->alloc, size_of_entry, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+      if (dest_entry) {
+         memcpy(dest_entry, entry, size_of_entry);
+         for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+            dest_entry->variants[i] = NULL;
+         radv_pipeline_cache_add_entry(cache, dest_entry);
+      }
+      p += size_of_entry;
+   }
+
+   return true;
 }
 
-VkResult radv_CreatePipelineCache(
-	VkDevice                                    _device,
-	const VkPipelineCacheCreateInfo*            pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkPipelineCache*                            pPipelineCache)
+VkResult
+radv_CreatePipelineCache(VkDevice _device, const VkPipelineCacheCreateInfo *pCreateInfo,
+                         const VkAllocationCallbacks *pAllocator, VkPipelineCache *pPipelineCache)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_pipeline_cache *cache;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_pipeline_cache *cache;
 
-	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
 
-	cache = vk_alloc2(&device->vk.alloc, pAllocator,
-			    sizeof(*cache), 8,
-			    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (cache == NULL)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   cache = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*cache), 8,
+                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (cache == NULL)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-	vk_object_base_init(&device->vk, &cache->base,
-			    VK_OBJECT_TYPE_PIPELINE_CACHE);
+   vk_object_base_init(&device->vk, &cache->base, VK_OBJECT_TYPE_PIPELINE_CACHE);
 
-	if (pAllocator)
-		cache->alloc = *pAllocator;
-	else
-		cache->alloc = device->vk.alloc;
+   if (pAllocator)
+      cache->alloc = *pAllocator;
+   else
+      cache->alloc = device->vk.alloc;
 
-	radv_pipeline_cache_init(cache, device);
-	cache->flags = pCreateInfo->flags;
+   radv_pipeline_cache_init(cache, device);
+   cache->flags = pCreateInfo->flags;
 
-	if (pCreateInfo->initialDataSize > 0) {
-		radv_pipeline_cache_load(cache,
-					 pCreateInfo->pInitialData,
-					 pCreateInfo->initialDataSize);
-	}
+   if (pCreateInfo->initialDataSize > 0) {
+      radv_pipeline_cache_load(cache, pCreateInfo->pInitialData, pCreateInfo->initialDataSize);
+   }
 
-	*pPipelineCache = radv_pipeline_cache_to_handle(cache);
+   *pPipelineCache = radv_pipeline_cache_to_handle(cache);
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-void radv_DestroyPipelineCache(
-	VkDevice                                    _device,
-	VkPipelineCache                             _cache,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyPipelineCache(VkDevice _device, VkPipelineCache _cache,
+                          const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
 
-	if (!cache)
-		return;
-	radv_pipeline_cache_finish(cache);
+   if (!cache)
+      return;
+   radv_pipeline_cache_finish(cache);
 
-	vk_object_base_finish(&cache->base);
-	vk_free2(&device->vk.alloc, pAllocator, cache);
+   vk_object_base_finish(&cache->base);
+   vk_free2(&device->vk.alloc, pAllocator, cache);
 }
 
-VkResult radv_GetPipelineCacheData(
-	VkDevice                                    _device,
-	VkPipelineCache                             _cache,
-	size_t*                                     pDataSize,
-	void*                                       pData)
+VkResult
+radv_GetPipelineCacheData(VkDevice _device, VkPipelineCache _cache, size_t *pDataSize, void *pData)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
-	struct vk_pipeline_cache_header *header;
-	VkResult result = VK_SUCCESS;
-
-	radv_pipeline_cache_lock(cache);
-
-	const size_t size = sizeof(*header) + cache->total_size;
-	if (pData == NULL) {
-		radv_pipeline_cache_unlock(cache);
-		*pDataSize = size;
-		return VK_SUCCESS;
-	}
-	if (*pDataSize < sizeof(*header)) {
-		radv_pipeline_cache_unlock(cache);
-		*pDataSize = 0;
-		return VK_INCOMPLETE;
-	}
-	void *p = pData, *end = (char *) pData + *pDataSize;
-	header = p;
-	header->header_size = align(sizeof(*header), alignof(struct cache_entry));
-	header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
-	header->vendor_id = ATI_VENDOR_ID;
-	header->device_id = device->physical_device->rad_info.pci_id;
-	memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
-	p = (char *)p + header->header_size;
-
-	struct cache_entry *entry;
-	for (uint32_t i = 0; i < cache->table_size; i++) {
-		if (!cache->hash_table[i])
-			continue;
-		entry = cache->hash_table[i];
-		const uint32_t size_of_entry = entry_size(entry);
-		if ((char *)end < (char *)p + size_of_entry) {
-			result = VK_INCOMPLETE;
-			break;
-		}
-
-		memcpy(p, entry, size_of_entry);
-		for(int j = 0; j < MESA_SHADER_STAGES; ++j)
-			((struct cache_entry*)p)->variants[j] = NULL;
-		p = (char *)p + size_of_entry;
-	}
-	*pDataSize = (char *)p - (char *)pData;
-
-	radv_pipeline_cache_unlock(cache);
-	return result;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
+   struct vk_pipeline_cache_header *header;
+   VkResult result = VK_SUCCESS;
+
+   radv_pipeline_cache_lock(cache);
+
+   const size_t size = sizeof(*header) + cache->total_size;
+   if (pData == NULL) {
+      radv_pipeline_cache_unlock(cache);
+      *pDataSize = size;
+      return VK_SUCCESS;
+   }
+   if (*pDataSize < sizeof(*header)) {
+      radv_pipeline_cache_unlock(cache);
+      *pDataSize = 0;
+      return VK_INCOMPLETE;
+   }
+   void *p = pData, *end = (char *)pData + *pDataSize;
+   header = p;
+   header->header_size = align(sizeof(*header), alignof(struct cache_entry));
+   header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
+   header->vendor_id = ATI_VENDOR_ID;
+   header->device_id = device->physical_device->rad_info.pci_id;
+   memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
+   p = (char *)p + header->header_size;
+
+   struct cache_entry *entry;
+   for (uint32_t i = 0; i < cache->table_size; i++) {
+      if (!cache->hash_table[i])
+         continue;
+      entry = cache->hash_table[i];
+      const uint32_t size_of_entry = entry_size(entry);
+      if ((char *)end < (char *)p + size_of_entry) {
+         result = VK_INCOMPLETE;
+         break;
+      }
+
+      memcpy(p, entry, size_of_entry);
+      for (int j = 0; j < MESA_SHADER_STAGES; ++j)
+         ((struct cache_entry *)p)->variants[j] = NULL;
+      p = (char *)p + size_of_entry;
+   }
+   *pDataSize = (char *)p - (char *)pData;
+
+   radv_pipeline_cache_unlock(cache);
+   return result;
 }
 
 static void
-radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
-			  struct radv_pipeline_cache *src)
+radv_pipeline_cache_merge(struct radv_pipeline_cache *dst, struct radv_pipeline_cache *src)
 {
-	for (uint32_t i = 0; i < src->table_size; i++) {
-		struct cache_entry *entry = src->hash_table[i];
-		if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
-			continue;
+   for (uint32_t i = 0; i < src->table_size; i++) {
+      struct cache_entry *entry = src->hash_table[i];
+      if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
+         continue;
 
-		radv_pipeline_cache_add_entry(dst, entry);
+      radv_pipeline_cache_add_entry(dst, entry);
 
-		src->hash_table[i] = NULL;
-	}
+      src->hash_table[i] = NULL;
+   }
 }
 
-VkResult radv_MergePipelineCaches(
-	VkDevice                                    _device,
-	VkPipelineCache                             destCache,
-	uint32_t                                    srcCacheCount,
-	const VkPipelineCache*                      pSrcCaches)
+VkResult
+radv_MergePipelineCaches(VkDevice _device, VkPipelineCache destCache, uint32_t srcCacheCount,
+                         const VkPipelineCache *pSrcCaches)
 {
-	RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
+   RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
 
-	for (uint32_t i = 0; i < srcCacheCount; i++) {
-		RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
+   for (uint32_t i = 0; i < srcCacheCount; i++) {
+      RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
 
-		radv_pipeline_cache_merge(dst, src);
-	}
+      radv_pipeline_cache_merge(dst, src);
+   }
 
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 02368fa1e07..15ae7ebb36c 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -28,15 +28,15 @@
 #ifndef RADV_PRIVATE_H
 #define RADV_PRIVATE_H
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdbool.h>
 #include <assert.h>
+#include <stdbool.h>
 #include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #ifdef HAVE_VALGRIND
-#include <valgrind.h>
 #include <memcheck.h>
+#include <valgrind.h>
 #define VG(x) x
 #else
 #define VG(x) ((void)0)
@@ -50,30 +50,30 @@
 #include "compiler/shader_enums.h"
 #include "util/bitscan.h"
 #include "util/cnd_monotonic.h"
-#include "util/macros.h"
 #include "util/list.h"
+#include "util/macros.h"
 #include "util/rwlock.h"
 #include "util/xmlconfig.h"
 #include "vk_alloc.h"
 #include "vk_debug_report.h"
 #include "vk_device.h"
-#include "vk_instance.h"
 #include "vk_format.h"
+#include "vk_instance.h"
 #include "vk_physical_device.h"
 #include "vk_shader_module.h"
 #include "vk_util.h"
 
-#include "radv_radeon_winsys.h"
 #include "ac_binary.h"
-#include "ac_nir_to_llvm.h"
 #include "ac_gpu_info.h"
-#include "ac_surface.h"
 #include "ac_llvm_build.h"
 #include "ac_llvm_util.h"
+#include "ac_nir_to_llvm.h"
+#include "ac_sqtt.h"
+#include "ac_surface.h"
 #include "radv_constants.h"
 #include "radv_descriptor_set.h"
+#include "radv_radeon_winsys.h"
 #include "sid.h"
-#include "ac_sqtt.h"
 
 /* Pre-declarations needed for WSI entrypoints */
 struct wl_surface;
@@ -82,10 +82,10 @@ typedef struct xcb_connection_t xcb_connection_t;
 typedef uint32_t xcb_visualid_t;
 typedef uint32_t xcb_window_t;
 
+#include <vulkan/vk_android_native_buffer.h>
+#include <vulkan/vk_icd.h>
 #include <vulkan/vulkan.h>
 #include <vulkan/vulkan_android.h>
-#include <vulkan/vk_icd.h>
-#include <vulkan/vk_android_native_buffer.h>
 
 #include "radv_entrypoints.h"
 
@@ -118,80 +118,80 @@ typedef uint32_t xcb_window_t;
 static inline uint32_t
 align_u32(uint32_t v, uint32_t a)
 {
-	assert(a != 0 && a == (a & -a));
-	return (v + a - 1) & ~(a - 1);
+   assert(a != 0 && a == (a & -a));
+   return (v + a - 1) & ~(a - 1);
 }
 
 static inline uint32_t
 align_u32_npot(uint32_t v, uint32_t a)
 {
-	return (v + a - 1) / a * a;
+   return (v + a - 1) / a * a;
 }
 
 static inline uint64_t
 align_u64(uint64_t v, uint64_t a)
 {
-	assert(a != 0 && a == (a & -a));
-	return (v + a - 1) & ~(a - 1);
+   assert(a != 0 && a == (a & -a));
+   return (v + a - 1) & ~(a - 1);
 }
 
 static inline int32_t
 align_i32(int32_t v, int32_t a)
 {
-	assert(a != 0 && a == (a & -a));
-	return (v + a - 1) & ~(a - 1);
+   assert(a != 0 && a == (a & -a));
+   return (v + a - 1) & ~(a - 1);
 }
 
 /** Alignment must be a power of 2. */
 static inline bool
 radv_is_aligned(uintmax_t n, uintmax_t a)
 {
-	assert(a == (a & -a));
-	return (n & (a - 1)) == 0;
+   assert(a == (a & -a));
+   return (n & (a - 1)) == 0;
 }
 
 static inline uint32_t
 round_up_u32(uint32_t v, uint32_t a)
 {
-	return (v + a - 1) / a;
+   return (v + a - 1) / a;
 }
 
 static inline uint64_t
 round_up_u64(uint64_t v, uint64_t a)
 {
-	return (v + a - 1) / a;
+   return (v + a - 1) / a;
 }
 
 static inline uint32_t
 radv_minify(uint32_t n, uint32_t levels)
 {
-	if (unlikely(n == 0))
-		return 0;
-	else
-		return MAX2(n >> levels, 1);
+   if (unlikely(n == 0))
+      return 0;
+   else
+      return MAX2(n >> levels, 1);
 }
 static inline float
 radv_clamp_f(float f, float min, float max)
 {
-	assert(min < max);
-
-	if (f > max)
-		return max;
-	else if (f < min)
-		return min;
-	else
-		return f;
+   assert(min < max);
+
+   if (f > max)
+      return max;
+   else if (f < min)
+      return min;
+   else
+      return f;
 }
 
 static inline bool
 radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
 {
-	if (*inout_mask & clear_mask) {
-		*inout_mask &= ~clear_mask;
-		return true;
-	} else {
-		return false;
-	}
+   if (*inout_mask & clear_mask) {
+      *inout_mask &= ~clear_mask;
+      return true;
+   } else {
+      return false;
+   }
 }
 
 /* Whenever we generate an error, pass it through this function. Useful for
@@ -203,26 +203,21 @@ struct radv_image_view;
 struct radv_instance;
 
 VkResult __vk_errorv(struct radv_instance *instance, const void *object,
-		     VkDebugReportObjectTypeEXT type, VkResult error,
-		     const char *file, int line, const char *format,
-		     va_list args);
+                     VkDebugReportObjectTypeEXT type, VkResult error, const char *file, int line,
+                     const char *format, va_list args);
 
 VkResult __vk_errorf(struct radv_instance *instance, const void *object,
-		     VkDebugReportObjectTypeEXT type, VkResult error,
-		     const char *file, int line, const char *format, ...)
-	radv_printflike(7, 8);
-
-#define vk_error(instance, error) \
-	__vk_errorf(instance, NULL, \
-		    VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, \
-		    error, __FILE__, __LINE__, NULL);
-#define vk_errorf(instance, error, format, ...) \
-	__vk_errorf(instance, NULL, \
-		    VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, \
-		    error, __FILE__, __LINE__, format, ## __VA_ARGS__);
-
-void __radv_finishme(const char *file, int line, const char *format, ...)
-	radv_printflike(3, 4);
+                     VkDebugReportObjectTypeEXT type, VkResult error, const char *file, int line,
+                     const char *format, ...) radv_printflike(7, 8);
+
+#define vk_error(instance, error)                                                                  \
+   __vk_errorf(instance, NULL, VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, error, __FILE__, __LINE__, \
+               NULL);
+#define vk_errorf(instance, error, format, ...)                                                    \
+   __vk_errorf(instance, NULL, VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, error, __FILE__, __LINE__, \
+               format, ##__VA_ARGS__);
+
+void __radv_finishme(const char *file, int line, const char *format, ...) radv_printflike(3, 4);
 void radv_loge(const char *format, ...) radv_printflike(1, 2);
 void radv_loge_v(const char *format, va_list va);
 void radv_logi(const char *format, ...) radv_printflike(1, 2);
@@ -231,23 +226,26 @@ void radv_logi_v(const char *format, va_list va);
 /**
  * Print a FINISHME message, including its source location.
  */
-#define radv_finishme(format, ...)					\
-	do { \
-		static bool reported = false; \
-		if (!reported) { \
-			__radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
-			reported = true; \
-		} \
-	} while (0)
+#define radv_finishme(format, ...)                                                                 \
+   do {                                                                                            \
+      static bool reported = false;                                                                \
+      if (!reported) {                                                                             \
+         __radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__);                               \
+         reported = true;                                                                          \
+      }                                                                                            \
+   } while (0)
 
 /* A non-fatal assert.  Useful for debugging. */
 #ifdef NDEBUG
-#define radv_assert(x) do {} while(0)
+#define radv_assert(x)                                                                             \
+   do {                                                                                            \
+   } while (0)
 #else
-#define radv_assert(x) do { \
-	if (unlikely(!(x))) \
-		fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
-} while (0)
+#define radv_assert(x)                                                                             \
+   do {                                                                                            \
+      if (unlikely(!(x)))                                                                          \
+         fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x);                            \
+   } while (0)
 #endif
 
 int radv_get_instance_entrypoint_index(const char *name);
@@ -259,78 +257,78 @@ const char *radv_get_physical_device_entry_name(int index);
 const char *radv_get_device_entry_name(int index);
 
 struct radv_physical_device {
-	struct vk_physical_device                   vk;
+   struct vk_physical_device vk;
 
-	/* Link in radv_instance::physical_devices */
-	struct list_head                            link;
+   /* Link in radv_instance::physical_devices */
+   struct list_head link;
 
-	struct radv_instance *                       instance;
+   struct radv_instance *instance;
 
-	struct radeon_winsys *ws;
-	struct radeon_info rad_info;
-	char                                        name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
-	uint8_t                                     driver_uuid[VK_UUID_SIZE];
-	uint8_t                                     device_uuid[VK_UUID_SIZE];
-	uint8_t                                     cache_uuid[VK_UUID_SIZE];
+   struct radeon_winsys *ws;
+   struct radeon_info rad_info;
+   char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
+   uint8_t driver_uuid[VK_UUID_SIZE];
+   uint8_t device_uuid[VK_UUID_SIZE];
+   uint8_t cache_uuid[VK_UUID_SIZE];
 
-	int local_fd;
-	int master_fd;
-	struct wsi_device                       wsi_device;
+   int local_fd;
+   int master_fd;
+   struct wsi_device wsi_device;
 
-	bool out_of_order_rast_allowed;
+   bool out_of_order_rast_allowed;
 
-	/* Whether DCC should be enabled for MSAA textures. */
-	bool dcc_msaa_allowed;
+   /* Whether DCC should be enabled for MSAA textures. */
+   bool dcc_msaa_allowed;
 
-	/* Whether to enable NGG. */
-	bool use_ngg;
+   /* Whether to enable NGG. */
+   bool use_ngg;
 
-	/* Whether to enable NGG streamout. */
-	bool use_ngg_streamout;
+   /* Whether to enable NGG streamout. */
+   bool use_ngg_streamout;
 
-	/* Number of threads per wave. */
-	uint8_t ps_wave_size;
-	uint8_t cs_wave_size;
-	uint8_t ge_wave_size;
+   /* Number of threads per wave. */
+   uint8_t ps_wave_size;
+   uint8_t cs_wave_size;
+   uint8_t ge_wave_size;
 
-	/* Whether to use the LLVM compiler backend */
-	bool use_llvm;
+   /* Whether to use the LLVM compiler backend */
+   bool use_llvm;
 
-	/* This is the drivers on-disk cache used as a fallback as opposed to
-	 * the pipeline cache defined by apps.
-	 */
-	struct disk_cache *                          disk_cache;
+   /* This is the drivers on-disk cache used as a fallback as opposed to
+    * the pipeline cache defined by apps.
+    */
+   struct disk_cache *disk_cache;
 
-	VkPhysicalDeviceMemoryProperties memory_properties;
-	enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
-	enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
-	unsigned heaps;
+   VkPhysicalDeviceMemoryProperties memory_properties;
+   enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
+   enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
+   unsigned heaps;
 
 #ifndef _WIN32
-	drmPciBusInfo bus_info;
+   drmPciBusInfo bus_info;
 #endif
 };
 
 struct radv_instance {
-	struct vk_instance                          vk;
+   struct vk_instance vk;
 
-	VkAllocationCallbacks                       alloc;
+   VkAllocationCallbacks alloc;
 
-	uint64_t debug_flags;
-	uint64_t perftest_flags;
+   uint64_t debug_flags;
+   uint64_t perftest_flags;
 
-	bool                                        physical_devices_enumerated;
-	struct list_head                            physical_devices;
+   bool physical_devices_enumerated;
+   struct list_head physical_devices;
 
-	struct driOptionCache dri_options;
-	struct driOptionCache available_dri_options;
+   struct driOptionCache dri_options;
+   struct driOptionCache available_dri_options;
 
-	/**
-	 * Workarounds for game bugs.
-	 */
-	bool enable_mrt_output_nan_fixup;
-	bool disable_tc_compat_htile_in_general;
-	bool disable_shrink_image_store;
+   /**
+    * Workarounds for game bugs.
+    */
+   bool enable_mrt_output_nan_fixup;
+   bool disable_tc_compat_htile_in_general;
+   bool disable_shrink_image_store;
 };
 
 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
@@ -339,327 +337,328 @@ void radv_finish_wsi(struct radv_physical_device *physical_device);
 struct cache_entry;
 
 struct radv_pipeline_cache {
-	struct vk_object_base                        base;
-	struct radv_device *                         device;
-	mtx_t                                        mutex;
-	VkPipelineCacheCreateFlags                   flags;
+   struct vk_object_base base;
+   struct radv_device *device;
+   mtx_t mutex;
+   VkPipelineCacheCreateFlags flags;
 
-	uint32_t                                     total_size;
-	uint32_t                                     table_size;
-	uint32_t                                     kernel_count;
-	struct cache_entry **                        hash_table;
-	bool                                         modified;
+   uint32_t total_size;
+   uint32_t table_size;
+   uint32_t kernel_count;
+   struct cache_entry **hash_table;
+   bool modified;
 
-	VkAllocationCallbacks                        alloc;
+   VkAllocationCallbacks alloc;
 };
 
 struct radv_pipeline_key {
-	uint32_t instance_rate_inputs;
-	uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
-	uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
-	uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
-	uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
-	uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
-	enum ac_fetch_format vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
-	uint32_t vertex_post_shuffle;
-	unsigned tess_input_vertices;
-	uint32_t col_format;
-	uint32_t is_int8;
-	uint32_t is_int10;
-	uint8_t log2_ps_iter_samples;
-	uint8_t num_samples;
-	uint32_t has_multiview_view_index : 1;
-	uint32_t optimisations_disabled : 1;
-	uint8_t topology;
-
-	/* Non-zero if a required subgroup size is specified via
-	 * VK_EXT_subgroup_size_control.
-	 */
-	uint8_t compute_subgroup_size;
-	bool require_full_subgroups;
+   uint32_t instance_rate_inputs;
+   uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
+   uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
+   uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
+   uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
+   uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
+   enum ac_fetch_format vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
+   uint32_t vertex_post_shuffle;
+   unsigned tess_input_vertices;
+   uint32_t col_format;
+   uint32_t is_int8;
+   uint32_t is_int10;
+   uint8_t log2_ps_iter_samples;
+   uint8_t num_samples;
+   uint32_t has_multiview_view_index : 1;
+   uint32_t optimisations_disabled : 1;
+   uint8_t topology;
+
+   /* Non-zero if a required subgroup size is specified via
+    * VK_EXT_subgroup_size_control.
+    */
+   uint8_t compute_subgroup_size;
+   bool require_full_subgroups;
 };
 
 struct radv_shader_binary;
 struct radv_shader_variant;
 
-void
-radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
-			 struct radv_device *device);
-void
-radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
-bool
-radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
-			 const void *data, size_t size);
-
-bool
-radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
-					        struct radv_pipeline_cache *cache,
-					        const unsigned char *sha1,
-					        struct radv_shader_variant **variants,
-						bool *found_in_application_cache);
-
-void
-radv_pipeline_cache_insert_shaders(struct radv_device *device,
-				   struct radv_pipeline_cache *cache,
-				   const unsigned char *sha1,
-				   struct radv_shader_variant **variants,
-				   struct radv_shader_binary *const *binaries);
+void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device);
+void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
+bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size);
+
+bool radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
+                                                     struct radv_pipeline_cache *cache,
+                                                     const unsigned char *sha1,
+                                                     struct radv_shader_variant **variants,
+                                                     bool *found_in_application_cache);
+
+void radv_pipeline_cache_insert_shaders(struct radv_device *device,
+                                        struct radv_pipeline_cache *cache,
+                                        const unsigned char *sha1,
+                                        struct radv_shader_variant **variants,
+                                        struct radv_shader_binary *const *binaries);
 
 enum radv_blit_ds_layout {
-	RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
-	RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
-	RADV_BLIT_DS_LAYOUT_COUNT,
+   RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
+   RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
+   RADV_BLIT_DS_LAYOUT_COUNT,
 };
 
-static inline enum radv_blit_ds_layout radv_meta_blit_ds_to_type(VkImageLayout layout)
+static inline enum radv_blit_ds_layout
+radv_meta_blit_ds_to_type(VkImageLayout layout)
 {
-	return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+   return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE
+                                              : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
 }
 
-static inline VkImageLayout radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
+static inline VkImageLayout
+radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
 {
-	return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
+   return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
+                                                       : VK_IMAGE_LAYOUT_GENERAL;
 }
 
 enum radv_meta_dst_layout {
-	RADV_META_DST_LAYOUT_GENERAL,
-	RADV_META_DST_LAYOUT_OPTIMAL,
-	RADV_META_DST_LAYOUT_COUNT,
+   RADV_META_DST_LAYOUT_GENERAL,
+   RADV_META_DST_LAYOUT_OPTIMAL,
+   RADV_META_DST_LAYOUT_COUNT,
 };
 
-static inline enum radv_meta_dst_layout radv_meta_dst_layout_from_layout(VkImageLayout layout)
+static inline enum radv_meta_dst_layout
+radv_meta_dst_layout_from_layout(VkImageLayout layout)
 {
-	return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL : RADV_META_DST_LAYOUT_OPTIMAL;
+   return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL
+                                              : RADV_META_DST_LAYOUT_OPTIMAL;
 }
 
-static inline VkImageLayout radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
+static inline VkImageLayout
+radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
 {
-	return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
+   return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
+                                                 : VK_IMAGE_LAYOUT_GENERAL;
 }
 
 struct radv_meta_state {
-	VkAllocationCallbacks alloc;
-
-	struct radv_pipeline_cache cache;
-
-	/*
-	 * For on-demand pipeline creation, makes sure that
-	 * only one thread tries to build a pipeline at the same time.
-	 */
-	mtx_t mtx;
-
-	/**
-	 * Use array element `i` for images with `2^i` samples.
-	 */
-	struct {
-		VkRenderPass render_pass[NUM_META_FS_KEYS];
-		VkPipeline color_pipelines[NUM_META_FS_KEYS];
-
-		VkRenderPass depthstencil_rp;
-		VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-		VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-		VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-
-		VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-		VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-		VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-	} clear[MAX_SAMPLES_LOG2];
-
-	VkPipelineLayout                          clear_color_p_layout;
-	VkPipelineLayout                          clear_depth_p_layout;
-	VkPipelineLayout                          clear_depth_unrestricted_p_layout;
-
-	/* Optimized compute fast HTILE clear for stencil or depth only. */
-	VkPipeline clear_htile_mask_pipeline;
-	VkPipelineLayout clear_htile_mask_p_layout;
-	VkDescriptorSetLayout clear_htile_mask_ds_layout;
-
-	struct {
-		VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
-
-		/** Pipeline that blits from a 1D image. */
-		VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
-
-		/** Pipeline that blits from a 2D image. */
-		VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
-
-		/** Pipeline that blits from a 3D image. */
-		VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
-
-		VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
-		VkPipeline depth_only_1d_pipeline;
-		VkPipeline depth_only_2d_pipeline;
-		VkPipeline depth_only_3d_pipeline;
-
-		VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
-		VkPipeline stencil_only_1d_pipeline;
-		VkPipeline stencil_only_2d_pipeline;
-		VkPipeline stencil_only_3d_pipeline;
-		VkPipelineLayout                          pipeline_layout;
-		VkDescriptorSetLayout                     ds_layout;
-	} blit;
-
-	struct {
-		VkPipelineLayout p_layouts[5];
-		VkDescriptorSetLayout ds_layouts[5];
-		VkPipeline pipelines[5][NUM_META_FS_KEYS];
-
-		VkPipeline depth_only_pipeline[5];
-
-		VkPipeline stencil_only_pipeline[5];
-	} blit2d[MAX_SAMPLES_LOG2];
-
-	VkRenderPass blit2d_render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
-	VkRenderPass blit2d_depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
-	VkRenderPass blit2d_stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
-
-	struct {
-		VkPipelineLayout                          img_p_layout;
-		VkDescriptorSetLayout                     img_ds_layout;
-		VkPipeline pipeline;
-		VkPipeline pipeline_3d;
-	} itob;
-	struct {
-		VkPipelineLayout                          img_p_layout;
-		VkDescriptorSetLayout                     img_ds_layout;
-		VkPipeline pipeline;
-		VkPipeline pipeline_3d;
-	} btoi;
-	struct {
-		VkPipelineLayout                          img_p_layout;
-		VkDescriptorSetLayout                     img_ds_layout;
-		VkPipeline pipeline;
-	} btoi_r32g32b32;
-	struct {
-		VkPipelineLayout                          img_p_layout;
-		VkDescriptorSetLayout                     img_ds_layout;
-		VkPipeline pipeline[MAX_SAMPLES_LOG2];
-		VkPipeline pipeline_3d;
-	} itoi;
-	struct {
-		VkPipelineLayout                          img_p_layout;
-		VkDescriptorSetLayout                     img_ds_layout;
-		VkPipeline pipeline;
-	} itoi_r32g32b32;
-	struct {
-		VkPipelineLayout                          img_p_layout;
-		VkDescriptorSetLayout                     img_ds_layout;
-		VkPipeline pipeline[MAX_SAMPLES_LOG2];
-		VkPipeline pipeline_3d;
-	} cleari;
-	struct {
-		VkPipelineLayout                          img_p_layout;
-		VkDescriptorSetLayout                     img_ds_layout;
-		VkPipeline pipeline;
-	} cleari_r32g32b32;
-
-	struct {
-		VkPipelineLayout                          p_layout;
-		VkPipeline                                pipeline[NUM_META_FS_KEYS];
-		VkRenderPass                              pass[NUM_META_FS_KEYS];
-	} resolve;
-
-	struct {
-		VkDescriptorSetLayout                     ds_layout;
-		VkPipelineLayout                          p_layout;
-		struct {
-			VkPipeline                                pipeline;
-			VkPipeline                                i_pipeline;
-			VkPipeline                                srgb_pipeline;
-		} rc[MAX_SAMPLES_LOG2];
-
-		VkPipeline depth_zero_pipeline;
-		struct {
-			VkPipeline average_pipeline;
-			VkPipeline max_pipeline;
-			VkPipeline min_pipeline;
-		} depth[MAX_SAMPLES_LOG2];
-
-		VkPipeline stencil_zero_pipeline;
-		struct {
-			VkPipeline max_pipeline;
-			VkPipeline min_pipeline;
-		} stencil[MAX_SAMPLES_LOG2];
-	} resolve_compute;
-
-	struct {
-		VkDescriptorSetLayout                     ds_layout;
-		VkPipelineLayout                          p_layout;
-
-		struct {
-			VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
-			VkPipeline   pipeline[NUM_META_FS_KEYS];
-		} rc[MAX_SAMPLES_LOG2];
-
-		VkRenderPass depth_render_pass;
-		VkPipeline depth_zero_pipeline;
-		struct {
-			VkPipeline average_pipeline;
-			VkPipeline max_pipeline;
-			VkPipeline min_pipeline;
-		} depth[MAX_SAMPLES_LOG2];
-
-		VkRenderPass stencil_render_pass;
-		VkPipeline stencil_zero_pipeline;
-		struct {
-			VkPipeline max_pipeline;
-			VkPipeline min_pipeline;
-		} stencil[MAX_SAMPLES_LOG2];
-	} resolve_fragment;
-
-	struct {
-		VkPipelineLayout                          p_layout;
-		VkPipeline                                decompress_pipeline[NUM_DEPTH_DECOMPRESS_PIPELINES];
-		VkPipeline                                resummarize_pipeline;
-		VkRenderPass                              pass;
-	} depth_decomp[MAX_SAMPLES_LOG2];
-
-	struct {
-		VkPipelineLayout                          p_layout;
-		VkPipeline                                cmask_eliminate_pipeline;
-		VkPipeline                                fmask_decompress_pipeline;
-		VkPipeline                                dcc_decompress_pipeline;
-		VkRenderPass                              pass;
-
-		VkDescriptorSetLayout                     dcc_decompress_compute_ds_layout;
-		VkPipelineLayout                          dcc_decompress_compute_p_layout;
-		VkPipeline                                dcc_decompress_compute_pipeline;
-	} fast_clear_flush;
-
-	struct {
-		VkPipelineLayout fill_p_layout;
-		VkPipelineLayout copy_p_layout;
-		VkDescriptorSetLayout fill_ds_layout;
-		VkDescriptorSetLayout copy_ds_layout;
-		VkPipeline fill_pipeline;
-		VkPipeline copy_pipeline;
-	} buffer;
-
-	struct {
-		VkDescriptorSetLayout ds_layout;
-		VkPipelineLayout p_layout;
-		VkPipeline occlusion_query_pipeline;
-		VkPipeline pipeline_statistics_query_pipeline;
-		VkPipeline tfb_query_pipeline;
-		VkPipeline timestamp_query_pipeline;
-	} query;
-
-	struct {
-		VkDescriptorSetLayout ds_layout;
-		VkPipelineLayout p_layout;
-		VkPipeline pipeline[MAX_SAMPLES_LOG2];
-	} fmask_expand;
-
-	struct {
-		VkDescriptorSetLayout ds_layout;
-		VkPipelineLayout p_layout;
-		VkPipeline pipeline;
-	} dcc_retile;
+   VkAllocationCallbacks alloc;
+
+   struct radv_pipeline_cache cache;
+
+   /*
+    * For on-demand pipeline creation, makes sure that
+    * only one thread tries to build a pipeline at the same time.
+    */
+   mtx_t mtx;
+
+   /**
+    * Use array element `i` for images with `2^i` samples.
+    */
+   struct {
+      VkRenderPass render_pass[NUM_META_FS_KEYS];
+      VkPipeline color_pipelines[NUM_META_FS_KEYS];
+
+      VkRenderPass depthstencil_rp;
+      VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+      VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+      VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+
+      VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+      VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+      VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+   } clear[MAX_SAMPLES_LOG2];
+
+   VkPipelineLayout clear_color_p_layout;
+   VkPipelineLayout clear_depth_p_layout;
+   VkPipelineLayout clear_depth_unrestricted_p_layout;
+
+   /* Optimized compute fast HTILE clear for stencil or depth only. */
+   VkPipeline clear_htile_mask_pipeline;
+   VkPipelineLayout clear_htile_mask_p_layout;
+   VkDescriptorSetLayout clear_htile_mask_ds_layout;
+
+   struct {
+      VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
+
+      /** Pipeline that blits from a 1D image. */
+      VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
+
+      /** Pipeline that blits from a 2D image. */
+      VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
+
+      /** Pipeline that blits from a 3D image. */
+      VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
+
+      VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+      VkPipeline depth_only_1d_pipeline;
+      VkPipeline depth_only_2d_pipeline;
+      VkPipeline depth_only_3d_pipeline;
+
+      VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+      VkPipeline stencil_only_1d_pipeline;
+      VkPipeline stencil_only_2d_pipeline;
+      VkPipeline stencil_only_3d_pipeline;
+      VkPipelineLayout pipeline_layout;
+      VkDescriptorSetLayout ds_layout;
+   } blit;
+
+   struct {
+      VkPipelineLayout p_layouts[5];
+      VkDescriptorSetLayout ds_layouts[5];
+      VkPipeline pipelines[5][NUM_META_FS_KEYS];
+
+      VkPipeline depth_only_pipeline[5];
+
+      VkPipeline stencil_only_pipeline[5];
+   } blit2d[MAX_SAMPLES_LOG2];
+
+   VkRenderPass blit2d_render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
+   VkRenderPass blit2d_depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+   VkRenderPass blit2d_stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+
+   struct {
+      VkPipelineLayout img_p_layout;
+      VkDescriptorSetLayout img_ds_layout;
+      VkPipeline pipeline;
+      VkPipeline pipeline_3d;
+   } itob;
+   struct {
+      VkPipelineLayout img_p_layout;
+      VkDescriptorSetLayout img_ds_layout;
+      VkPipeline pipeline;
+      VkPipeline pipeline_3d;
+   } btoi;
+   struct {
+      VkPipelineLayout img_p_layout;
+      VkDescriptorSetLayout img_ds_layout;
+      VkPipeline pipeline;
+   } btoi_r32g32b32;
+   struct {
+      VkPipelineLayout img_p_layout;
+      VkDescriptorSetLayout img_ds_layout;
+      VkPipeline pipeline[MAX_SAMPLES_LOG2];
+      VkPipeline pipeline_3d;
+   } itoi;
+   struct {
+      VkPipelineLayout img_p_layout;
+      VkDescriptorSetLayout img_ds_layout;
+      VkPipeline pipeline;
+   } itoi_r32g32b32;
+   struct {
+      VkPipelineLayout img_p_layout;
+      VkDescriptorSetLayout img_ds_layout;
+      VkPipeline pipeline[MAX_SAMPLES_LOG2];
+      VkPipeline pipeline_3d;
+   } cleari;
+   struct {
+      VkPipelineLayout img_p_layout;
+      VkDescriptorSetLayout img_ds_layout;
+      VkPipeline pipeline;
+   } cleari_r32g32b32;
+
+   struct {
+      VkPipelineLayout p_layout;
+      VkPipeline pipeline[NUM_META_FS_KEYS];
+      VkRenderPass pass[NUM_META_FS_KEYS];
+   } resolve;
+
+   struct {
+      VkDescriptorSetLayout ds_layout;
+      VkPipelineLayout p_layout;
+      struct {
+         VkPipeline pipeline;
+         VkPipeline i_pipeline;
+         VkPipeline srgb_pipeline;
+      } rc[MAX_SAMPLES_LOG2];
+
+      VkPipeline depth_zero_pipeline;
+      struct {
+         VkPipeline average_pipeline;
+         VkPipeline max_pipeline;
+         VkPipeline min_pipeline;
+      } depth[MAX_SAMPLES_LOG2];
+
+      VkPipeline stencil_zero_pipeline;
+      struct {
+         VkPipeline max_pipeline;
+         VkPipeline min_pipeline;
+      } stencil[MAX_SAMPLES_LOG2];
+   } resolve_compute;
+
+   struct {
+      VkDescriptorSetLayout ds_layout;
+      VkPipelineLayout p_layout;
+
+      struct {
+         VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
+         VkPipeline pipeline[NUM_META_FS_KEYS];
+      } rc[MAX_SAMPLES_LOG2];
+
+      VkRenderPass depth_render_pass;
+      VkPipeline depth_zero_pipeline;
+      struct {
+         VkPipeline average_pipeline;
+         VkPipeline max_pipeline;
+         VkPipeline min_pipeline;
+      } depth[MAX_SAMPLES_LOG2];
+
+      VkRenderPass stencil_render_pass;
+      VkPipeline stencil_zero_pipeline;
+      struct {
+         VkPipeline max_pipeline;
+         VkPipeline min_pipeline;
+      } stencil[MAX_SAMPLES_LOG2];
+   } resolve_fragment;
+
+   struct {
+      VkPipelineLayout p_layout;
+      VkPipeline decompress_pipeline[NUM_DEPTH_DECOMPRESS_PIPELINES];
+      VkPipeline resummarize_pipeline;
+      VkRenderPass pass;
+   } depth_decomp[MAX_SAMPLES_LOG2];
+
+   struct {
+      VkPipelineLayout p_layout;
+      VkPipeline cmask_eliminate_pipeline;
+      VkPipeline fmask_decompress_pipeline;
+      VkPipeline dcc_decompress_pipeline;
+      VkRenderPass pass;
+
+      VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
+      VkPipelineLayout dcc_decompress_compute_p_layout;
+      VkPipeline dcc_decompress_compute_pipeline;
+   } fast_clear_flush;
+
+   struct {
+      VkPipelineLayout fill_p_layout;
+      VkPipelineLayout copy_p_layout;
+      VkDescriptorSetLayout fill_ds_layout;
+      VkDescriptorSetLayout copy_ds_layout;
+      VkPipeline fill_pipeline;
+      VkPipeline copy_pipeline;
+   } buffer;
+
+   struct {
+      VkDescriptorSetLayout ds_layout;
+      VkPipelineLayout p_layout;
+      VkPipeline occlusion_query_pipeline;
+      VkPipeline pipeline_statistics_query_pipeline;
+      VkPipeline tfb_query_pipeline;
+      VkPipeline timestamp_query_pipeline;
+   } query;
+
+   struct {
+      VkDescriptorSetLayout ds_layout;
+      VkPipelineLayout p_layout;
+      VkPipeline pipeline[MAX_SAMPLES_LOG2];
+   } fmask_expand;
+
+   struct {
+      VkDescriptorSetLayout ds_layout;
+      VkPipelineLayout p_layout;
+      VkPipeline pipeline;
+   } dcc_retile;
 };
 
 /* queue types */
-#define RADV_QUEUE_GENERAL 0
-#define RADV_QUEUE_COMPUTE 1
+#define RADV_QUEUE_GENERAL  0
+#define RADV_QUEUE_COMPUTE  1
 #define RADV_QUEUE_TRANSFER 2
 
 /* Not a real queue family */
@@ -674,588 +673,574 @@ struct radv_deferred_queue_submission;
 enum ring_type radv_queue_family_to_ring(int f);
 
 struct radv_queue {
-	struct vk_object_base                       base;
-	struct radv_device *                         device;
-	struct radeon_winsys_ctx                    *hw_ctx;
-	enum radeon_ctx_priority                     priority;
-	uint32_t queue_family_index;
-	int queue_idx;
-	VkDeviceQueueCreateFlags flags;
-
-	uint32_t scratch_size_per_wave;
-	uint32_t scratch_waves;
-	uint32_t compute_scratch_size_per_wave;
-	uint32_t compute_scratch_waves;
-	uint32_t esgs_ring_size;
-	uint32_t gsvs_ring_size;
-	bool has_tess_rings;
-	bool has_gds;
-	bool has_gds_oa;
-	bool has_sample_positions;
-
-	struct radeon_winsys_bo *scratch_bo;
-	struct radeon_winsys_bo *descriptor_bo;
-	struct radeon_winsys_bo *compute_scratch_bo;
-	struct radeon_winsys_bo *esgs_ring_bo;
-	struct radeon_winsys_bo *gsvs_ring_bo;
-	struct radeon_winsys_bo *tess_rings_bo;
-	struct radeon_winsys_bo *gds_bo;
-	struct radeon_winsys_bo *gds_oa_bo;
-	struct radeon_cmdbuf *initial_preamble_cs;
-	struct radeon_cmdbuf *initial_full_flush_preamble_cs;
-	struct radeon_cmdbuf *continue_preamble_cs;
-
-	struct list_head pending_submissions;
-	mtx_t pending_mutex;
-
-	mtx_t thread_mutex;
-	struct u_cnd_monotonic thread_cond;
-	struct radv_deferred_queue_submission *thread_submission;
-	thrd_t submission_thread;
-	bool thread_exit;
-	bool thread_running;
-	bool cond_created;
+   struct vk_object_base base;
+   struct radv_device *device;
+   struct radeon_winsys_ctx *hw_ctx;
+   enum radeon_ctx_priority priority;
+   uint32_t queue_family_index;
+   int queue_idx;
+   VkDeviceQueueCreateFlags flags;
+
+   uint32_t scratch_size_per_wave;
+   uint32_t scratch_waves;
+   uint32_t compute_scratch_size_per_wave;
+   uint32_t compute_scratch_waves;
+   uint32_t esgs_ring_size;
+   uint32_t gsvs_ring_size;
+   bool has_tess_rings;
+   bool has_gds;
+   bool has_gds_oa;
+   bool has_sample_positions;
+
+   struct radeon_winsys_bo *scratch_bo;
+   struct radeon_winsys_bo *descriptor_bo;
+   struct radeon_winsys_bo *compute_scratch_bo;
+   struct radeon_winsys_bo *esgs_ring_bo;
+   struct radeon_winsys_bo *gsvs_ring_bo;
+   struct radeon_winsys_bo *tess_rings_bo;
+   struct radeon_winsys_bo *gds_bo;
+   struct radeon_winsys_bo *gds_oa_bo;
+   struct radeon_cmdbuf *initial_preamble_cs;
+   struct radeon_cmdbuf *initial_full_flush_preamble_cs;
+   struct radeon_cmdbuf *continue_preamble_cs;
+
+   struct list_head pending_submissions;
+   mtx_t pending_mutex;
+
+   mtx_t thread_mutex;
+   struct u_cnd_monotonic thread_cond;
+   struct radv_deferred_queue_submission *thread_submission;
+   thrd_t submission_thread;
+   bool thread_exit;
+   bool thread_running;
+   bool cond_created;
 };
 
 #define RADV_BORDER_COLOR_COUNT       4096
 #define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
 
 struct radv_device_border_color_data {
-	bool 			 used[RADV_BORDER_COLOR_COUNT];
+   bool used[RADV_BORDER_COLOR_COUNT];
 
-	struct radeon_winsys_bo *bo;
-	VkClearColorValue       *colors_gpu_ptr;
+   struct radeon_winsys_bo *bo;
+   VkClearColorValue *colors_gpu_ptr;
 
-	/* Mutex is required to guarantee vkCreateSampler thread safety
-	 * given that we are writing to a buffer and checking color occupation */
-	mtx_t                    mutex;
+   /* Mutex is required to guarantee vkCreateSampler thread safety
+    * given that we are writing to a buffer and checking color occupation */
+   mtx_t mutex;
 };
 
-enum radv_force_vrs
-{
-	RADV_FORCE_VRS_NONE = 0,
-	RADV_FORCE_VRS_2x2,
-	RADV_FORCE_VRS_2x1,
-	RADV_FORCE_VRS_1x2,
+enum radv_force_vrs {
+   RADV_FORCE_VRS_NONE = 0,
+   RADV_FORCE_VRS_2x2,
+   RADV_FORCE_VRS_2x1,
+   RADV_FORCE_VRS_1x2,
 };
 
 struct radv_device {
-	struct vk_device vk;
+   struct vk_device vk;
 
-	struct radv_instance *                       instance;
-	struct radeon_winsys *ws;
+   struct radv_instance *instance;
+   struct radeon_winsys *ws;
 
-	struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
-	struct radv_meta_state                       meta_state;
+   struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
+   struct radv_meta_state meta_state;
 
-	struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
-	int queue_count[RADV_MAX_QUEUE_FAMILIES];
-	struct radeon_cmdbuf *empty_cs[RADV_MAX_QUEUE_FAMILIES];
+   struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
+   int queue_count[RADV_MAX_QUEUE_FAMILIES];
+   struct radeon_cmdbuf *empty_cs[RADV_MAX_QUEUE_FAMILIES];
 
-	bool pbb_allowed;
-	bool dfsm_allowed;
-	uint32_t tess_offchip_block_dw_size;
-	uint32_t scratch_waves;
-	uint32_t dispatch_initiator;
+   bool pbb_allowed;
+   bool dfsm_allowed;
+   uint32_t tess_offchip_block_dw_size;
+   uint32_t scratch_waves;
+   uint32_t dispatch_initiator;
 
-	uint32_t gs_table_depth;
+   uint32_t gs_table_depth;
 
-	/* MSAA sample locations.
-	 * The first index is the sample index.
-	 * The second index is the coordinate: X, Y. */
-	float sample_locations_1x[1][2];
-	float sample_locations_2x[2][2];
-	float sample_locations_4x[4][2];
-	float sample_locations_8x[8][2];
+   /* MSAA sample locations.
+    * The first index is the sample index.
+    * The second index is the coordinate: X, Y. */
+   float sample_locations_1x[1][2];
+   float sample_locations_2x[2][2];
+   float sample_locations_4x[4][2];
+   float sample_locations_8x[8][2];
 
-	/* GFX7 and later */
-	uint32_t gfx_init_size_dw;
-	struct radeon_winsys_bo                      *gfx_init;
+   /* GFX7 and later */
+   uint32_t gfx_init_size_dw;
+   struct radeon_winsys_bo *gfx_init;
 
-	struct radeon_winsys_bo                      *trace_bo;
-	uint32_t                                     *trace_id_ptr;
+   struct radeon_winsys_bo *trace_bo;
+   uint32_t *trace_id_ptr;
 
-	/* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */
-	bool                                         keep_shader_info;
+   /* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */
+   bool keep_shader_info;
 
-	struct radv_physical_device                  *physical_device;
+   struct radv_physical_device *physical_device;
 
-	/* Backup in-memory cache to be used if the app doesn't provide one */
-	struct radv_pipeline_cache *                mem_cache;
+   /* Backup in-memory cache to be used if the app doesn't provide one */
+   struct radv_pipeline_cache *mem_cache;
 
-	/*
-	 * use different counters so MSAA MRTs get consecutive surface indices,
-	 * even if MASK is allocated in between.
-	 */
-	uint32_t image_mrt_offset_counter;
-	uint32_t fmask_mrt_offset_counter;
-	struct list_head shader_slabs;
-	mtx_t shader_slab_mutex;
+   /*
+    * use different counters so MSAA MRTs get consecutive surface indices,
+    * even if MASK is allocated in between.
+    */
+   uint32_t image_mrt_offset_counter;
+   uint32_t fmask_mrt_offset_counter;
+   struct list_head shader_slabs;
+   mtx_t shader_slab_mutex;
 
-	/* For detecting VM faults reported by dmesg. */
-	uint64_t dmesg_timestamp;
+   /* For detecting VM faults reported by dmesg. */
+   uint64_t dmesg_timestamp;
 
-	/* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
-	bool robust_buffer_access;
-	bool robust_buffer_access2;
+   /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
+   bool robust_buffer_access;
+   bool robust_buffer_access2;
 
-	/* Whether gl_FragCoord.z should be adjusted for VRS due to a hw bug
-	 * on some GFX10.3 chips.
-	 */
-	bool adjust_frag_coord_z;
+   /* Whether gl_FragCoord.z should be adjusted for VRS due to a hw bug
+    * on some GFX10.3 chips.
+    */
+   bool adjust_frag_coord_z;
 
-	/* Whether the driver uses a global BO list. */
-	bool use_global_bo_list;
+   /* Whether the driver uses a global BO list. */
+   bool use_global_bo_list;
 
-	/* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
-	int force_aniso;
+   /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
+   int force_aniso;
 
-	struct radv_device_border_color_data border_color_data;
+   struct radv_device_border_color_data border_color_data;
 
-	/* Condition variable for legacy timelines, to notify waiters when a
-	 * new point gets submitted. */
-	struct u_cnd_monotonic timeline_cond;
+   /* Condition variable for legacy timelines, to notify waiters when a
+    * new point gets submitted. */
+   struct u_cnd_monotonic timeline_cond;
 
-	/* Thread trace. */
-	struct ac_thread_trace_data thread_trace;
+   /* Thread trace. */
+   struct ac_thread_trace_data thread_trace;
 
-	/* Trap handler. */
-	struct radv_shader_variant *trap_handler_shader;
-	struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
-	uint32_t *tma_ptr;
+   /* Trap handler. */
+   struct radv_shader_variant *trap_handler_shader;
+   struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
+   uint32_t *tma_ptr;
 
-	/* Overallocation. */
-	bool overallocation_disallowed;
-	uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
-	mtx_t overallocation_mutex;
+   /* Overallocation. */
+   bool overallocation_disallowed;
+   uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
+   mtx_t overallocation_mutex;
 
-	/* Track the number of device loss occurs. */
-	int lost;
+   /* Track the number of device loss occurs. */
+   int lost;
 
-	/* Whether the user forced VRS rates on GFX10.3+. */
-	enum radv_force_vrs force_vrs;
+   /* Whether the user forced VRS rates on GFX10.3+. */
+   enum radv_force_vrs force_vrs;
 };
 
-VkResult _radv_device_set_lost(struct radv_device *device,
-                              const char *file, int line,
-                              const char *msg, ...)
-	radv_printflike(4, 5);
+VkResult _radv_device_set_lost(struct radv_device *device, const char *file, int line,
+                               const char *msg, ...) radv_printflike(4, 5);
 
-#define radv_device_set_lost(dev, ...) \
-	_radv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
+#define radv_device_set_lost(dev, ...) _radv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
 
 static inline bool
 radv_device_is_lost(const struct radv_device *device)
 {
-	return unlikely(p_atomic_read(&device->lost));
+   return unlikely(p_atomic_read(&device->lost));
 }
 
 struct radv_device_memory {
-	struct vk_object_base                        base;
-	struct radeon_winsys_bo                      *bo;
-	/* for dedicated allocations */
-	struct radv_image                            *image;
-	struct radv_buffer                           *buffer;
-	uint32_t                                     heap_index;
-	uint64_t                                     alloc_size;
-	void *                                       map;
-	void *                                       user_ptr;
+   struct vk_object_base base;
+   struct radeon_winsys_bo *bo;
+   /* for dedicated allocations */
+   struct radv_image *image;
+   struct radv_buffer *buffer;
+   uint32_t heap_index;
+   uint64_t alloc_size;
+   void *map;
+   void *user_ptr;
 
 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
-	struct AHardwareBuffer *                    android_hardware_buffer;
+   struct AHardwareBuffer *android_hardware_buffer;
 #endif
 };
 
-
 struct radv_descriptor_range {
-	uint64_t va;
-	uint32_t size;
+   uint64_t va;
+   uint32_t size;
 };
 
 struct radv_descriptor_set_header {
-	struct vk_object_base base;
-	const struct radv_descriptor_set_layout *layout;
-	uint32_t size;
-	uint32_t buffer_count;
+   struct vk_object_base base;
+   const struct radv_descriptor_set_layout *layout;
+   uint32_t size;
+   uint32_t buffer_count;
 
-	struct radeon_winsys_bo *bo;
-	uint64_t va;
-	uint32_t *mapped_ptr;
-	struct radv_descriptor_range *dynamic_descriptors;
+   struct radeon_winsys_bo *bo;
+   uint64_t va;
+   uint32_t *mapped_ptr;
+   struct radv_descriptor_range *dynamic_descriptors;
 };
 
 struct radv_descriptor_set {
-	struct radv_descriptor_set_header header;
+   struct radv_descriptor_set_header header;
 
-	struct radeon_winsys_bo *descriptors[];
+   struct radeon_winsys_bo *descriptors[];
 };
 
-struct radv_push_descriptor_set
-{
-	struct radv_descriptor_set_header set;
-	uint32_t capacity;
+struct radv_push_descriptor_set {
+   struct radv_descriptor_set_header set;
+   uint32_t capacity;
 };
 
 struct radv_descriptor_pool_entry {
-	uint32_t offset;
-	uint32_t size;
-	struct radv_descriptor_set *set;
+   uint32_t offset;
+   uint32_t size;
+   struct radv_descriptor_set *set;
 };
 
 struct radv_descriptor_pool {
-	struct vk_object_base base;
-	struct radeon_winsys_bo *bo;
-	uint8_t *host_bo;
-	uint8_t *mapped_ptr;
-	uint64_t current_offset;
-	uint64_t size;
+   struct vk_object_base base;
+   struct radeon_winsys_bo *bo;
+   uint8_t *host_bo;
+   uint8_t *mapped_ptr;
+   uint64_t current_offset;
+   uint64_t size;
 
-	uint8_t *host_memory_base;
-	uint8_t *host_memory_ptr;
-	uint8_t *host_memory_end;
+   uint8_t *host_memory_base;
+   uint8_t *host_memory_ptr;
+   uint8_t *host_memory_end;
 
-	uint32_t entry_count;
-	uint32_t max_entry_count;
-	struct radv_descriptor_pool_entry entries[0];
+   uint32_t entry_count;
+   uint32_t max_entry_count;
+   struct radv_descriptor_pool_entry entries[0];
 };
 
 struct radv_descriptor_update_template_entry {
-	VkDescriptorType descriptor_type;
+   VkDescriptorType descriptor_type;
 
-	/* The number of descriptors to update */
-	uint32_t descriptor_count;
+   /* The number of descriptors to update */
+   uint32_t descriptor_count;
 
-	/* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
-	uint32_t dst_offset;
+   /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
+   uint32_t dst_offset;
 
-	/* In dwords. Not valid/used for dynamic descriptors */
-	uint32_t dst_stride;
+   /* In dwords. Not valid/used for dynamic descriptors */
+   uint32_t dst_stride;
 
-	uint32_t buffer_offset;
+   uint32_t buffer_offset;
 
-	/* Only valid for combined image samplers and samplers */
-	uint8_t has_sampler;
-	uint8_t sampler_offset;
+   /* Only valid for combined image samplers and samplers */
+   uint8_t has_sampler;
+   uint8_t sampler_offset;
 
-	/* In bytes */
-	size_t src_offset;
-	size_t src_stride;
+   /* In bytes */
+   size_t src_offset;
+   size_t src_stride;
 
-	/* For push descriptors */
-	const uint32_t *immutable_samplers;
+   /* For push descriptors */
+   const uint32_t *immutable_samplers;
 };
 
 struct radv_descriptor_update_template {
-	struct vk_object_base base;
-	uint32_t entry_count;
-	VkPipelineBindPoint bind_point;
-	struct radv_descriptor_update_template_entry entry[0];
+   struct vk_object_base base;
+   uint32_t entry_count;
+   VkPipelineBindPoint bind_point;
+   struct radv_descriptor_update_template_entry entry[0];
 };
 
 struct radv_buffer {
-	struct vk_object_base                        base;
-	VkDeviceSize                                 size;
+   struct vk_object_base base;
+   VkDeviceSize size;
 
-	VkBufferUsageFlags                           usage;
-	VkBufferCreateFlags                          flags;
+   VkBufferUsageFlags usage;
+   VkBufferCreateFlags flags;
 
-	/* Set when bound */
-	struct radeon_winsys_bo *                      bo;
-	VkDeviceSize                                 offset;
+   /* Set when bound */
+   struct radeon_winsys_bo *bo;
+   VkDeviceSize offset;
 
-	bool shareable;
+   bool shareable;
 };
 
 enum radv_dynamic_state_bits {
-	RADV_DYNAMIC_VIEWPORT				= 1ull << 0,
-	RADV_DYNAMIC_SCISSOR				= 1ull << 1,
-	RADV_DYNAMIC_LINE_WIDTH				= 1ull << 2,
-	RADV_DYNAMIC_DEPTH_BIAS				= 1ull << 3,
-	RADV_DYNAMIC_BLEND_CONSTANTS			= 1ull << 4,
-	RADV_DYNAMIC_DEPTH_BOUNDS			= 1ull << 5,
-	RADV_DYNAMIC_STENCIL_COMPARE_MASK		= 1ull << 6,
-	RADV_DYNAMIC_STENCIL_WRITE_MASK			= 1ull << 7,
-	RADV_DYNAMIC_STENCIL_REFERENCE			= 1ull << 8,
-	RADV_DYNAMIC_DISCARD_RECTANGLE			= 1ull << 9,
-	RADV_DYNAMIC_SAMPLE_LOCATIONS			= 1ull << 10,
-	RADV_DYNAMIC_LINE_STIPPLE			= 1ull << 11,
-	RADV_DYNAMIC_CULL_MODE				= 1ull << 12,
-	RADV_DYNAMIC_FRONT_FACE				= 1ull << 13,
-	RADV_DYNAMIC_PRIMITIVE_TOPOLOGY			= 1ull << 14,
-	RADV_DYNAMIC_DEPTH_TEST_ENABLE			= 1ull << 15,
-	RADV_DYNAMIC_DEPTH_WRITE_ENABLE			= 1ull << 16,
-	RADV_DYNAMIC_DEPTH_COMPARE_OP			= 1ull << 17,
-	RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE		= 1ull << 18,
-	RADV_DYNAMIC_STENCIL_TEST_ENABLE		= 1ull << 19,
-	RADV_DYNAMIC_STENCIL_OP				= 1ull << 20,
-	RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE        = 1ull << 21,
-	RADV_DYNAMIC_FRAGMENT_SHADING_RATE              = 1ull << 22,
-	RADV_DYNAMIC_ALL				= (1ull << 23) - 1,
+   RADV_DYNAMIC_VIEWPORT = 1ull << 0,
+   RADV_DYNAMIC_SCISSOR = 1ull << 1,
+   RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
+   RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
+   RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
+   RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
+   RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
+   RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
+   RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
+   RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
+   RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
+   RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
+   RADV_DYNAMIC_CULL_MODE = 1ull << 12,
+   RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
+   RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
+   RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
+   RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
+   RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
+   RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
+   RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
+   RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
+   RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
+   RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
+   RADV_DYNAMIC_ALL = (1ull << 23) - 1,
 };
 
 enum radv_cmd_dirty_bits {
-	/* Keep the dynamic state dirty bits in sync with
-	 * enum radv_dynamic_state_bits */
-	RADV_CMD_DIRTY_DYNAMIC_VIEWPORT				= 1ull << 0,
-	RADV_CMD_DIRTY_DYNAMIC_SCISSOR				= 1ull << 1,
-	RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH			= 1ull << 2,
-	RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS			= 1ull << 3,
-	RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS			= 1ull << 4,
-	RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS			= 1ull << 5,
-	RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK		= 1ull << 6,
-	RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK		= 1ull << 7,
-	RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE		= 1ull << 8,
-	RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE		= 1ull << 9,
-	RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS			= 1ull << 10,
-	RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE			= 1ull << 11,
-	RADV_CMD_DIRTY_DYNAMIC_CULL_MODE			= 1ull << 12,
-	RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE			= 1ull << 13,
-	RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY		= 1ull << 14,
-	RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE		= 1ull << 15,
-	RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE		= 1ull << 16,
-	RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP			= 1ull << 17,
-	RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE		= 1ull << 18,
-	RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE		= 1ull << 19,
-	RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP			= 1ull << 20,
-	RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE      = 1ull << 21,
-	RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE            = 1ull << 22,
-	RADV_CMD_DIRTY_DYNAMIC_ALL				= (1ull << 23) - 1,
-	RADV_CMD_DIRTY_PIPELINE					= 1ull << 23,
-	RADV_CMD_DIRTY_INDEX_BUFFER				= 1ull << 24,
-	RADV_CMD_DIRTY_FRAMEBUFFER				= 1ull << 25,
-	RADV_CMD_DIRTY_VERTEX_BUFFER				= 1ull << 26,
-	RADV_CMD_DIRTY_STREAMOUT_BUFFER				= 1ull << 27,
+   /* Keep the dynamic state dirty bits in sync with
+    * enum radv_dynamic_state_bits */
+   RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
+   RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
+   RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
+   RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
+   RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
+   RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
+   RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
+   RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
+   RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
+   RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
+   RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
+   RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
+   RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
+   RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
+   RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
+   RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
+   RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
+   RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
+   RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
+   RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
+   RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
+   RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
+   RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
+   RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 23) - 1,
+   RADV_CMD_DIRTY_PIPELINE = 1ull << 23,
+   RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 24,
+   RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 25,
+   RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 26,
+   RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 27,
 };
 
 enum radv_cmd_flush_bits {
-	/* Instruction cache. */
-	RADV_CMD_FLAG_INV_ICACHE			 = 1 << 0,
-	/* Scalar L1 cache. */
-	RADV_CMD_FLAG_INV_SCACHE			 = 1 << 1,
-	/* Vector L1 cache. */
-	RADV_CMD_FLAG_INV_VCACHE			 = 1 << 2,
-	/* L2 cache + L2 metadata cache writeback & invalidate.
-	 * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
-	RADV_CMD_FLAG_INV_L2				 = 1 << 3,
-	/* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
-	 * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
-	 * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
-	RADV_CMD_FLAG_WB_L2				 = 1 << 4,
-	/* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
-	 * changed and we want to read an image from shaders. */
-	RADV_CMD_FLAG_INV_L2_METADATA			 = 1 << 5,
-	/* Framebuffer caches */
-	RADV_CMD_FLAG_FLUSH_AND_INV_CB_META		 = 1 << 6,
-	RADV_CMD_FLAG_FLUSH_AND_INV_DB_META		 = 1 << 7,
-	RADV_CMD_FLAG_FLUSH_AND_INV_DB			 = 1 << 8,
-	RADV_CMD_FLAG_FLUSH_AND_INV_CB			 = 1 << 9,
-	/* Engine synchronization. */
-	RADV_CMD_FLAG_VS_PARTIAL_FLUSH			 = 1 << 10,
-	RADV_CMD_FLAG_PS_PARTIAL_FLUSH			 = 1 << 11,
-	RADV_CMD_FLAG_CS_PARTIAL_FLUSH			 = 1 << 12,
-	RADV_CMD_FLAG_VGT_FLUSH				 = 1 << 13,
-	/* Pipeline query controls. */
-	RADV_CMD_FLAG_START_PIPELINE_STATS		 = 1 << 14,
-	RADV_CMD_FLAG_STOP_PIPELINE_STATS		 = 1 << 15,
-	RADV_CMD_FLAG_VGT_STREAMOUT_SYNC		 = 1 << 16,
-
-	RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-					      RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
-					      RADV_CMD_FLAG_FLUSH_AND_INV_DB |
-					      RADV_CMD_FLAG_FLUSH_AND_INV_DB_META)
+   /* Instruction cache. */
+   RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
+   /* Scalar L1 cache. */
+   RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
+   /* Vector L1 cache. */
+   RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
+   /* L2 cache + L2 metadata cache writeback & invalidate.
+    * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
+   RADV_CMD_FLAG_INV_L2 = 1 << 3,
+   /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
+    * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
+    * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
+   RADV_CMD_FLAG_WB_L2 = 1 << 4,
+   /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
+    * changed and we want to read an image from shaders. */
+   RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
+   /* Framebuffer caches */
+   RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
+   RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
+   RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
+   RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
+   /* Engine synchronization. */
+   RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
+   RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
+   RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
+   RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
+   /* Pipeline query controls. */
+   RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
+   RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
+   RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
+
+   RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER =
+      (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+       RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META)
 };
 
 struct radv_vertex_binding {
-	struct radv_buffer *                          buffer;
-	VkDeviceSize                                 offset;
-	VkDeviceSize size;
-	VkDeviceSize stride;
+   struct radv_buffer *buffer;
+   VkDeviceSize offset;
+   VkDeviceSize size;
+   VkDeviceSize stride;
 };
 
 struct radv_streamout_binding {
-	struct radv_buffer *buffer;
-	VkDeviceSize offset;
-	VkDeviceSize size;
+   struct radv_buffer *buffer;
+   VkDeviceSize offset;
+   VkDeviceSize size;
 };
 
 struct radv_streamout_state {
-	/* Mask of bound streamout buffers. */
-	uint8_t enabled_mask;
+   /* Mask of bound streamout buffers. */
+   uint8_t enabled_mask;
 
-	/* External state that comes from the last vertex stage, it must be
-	 * set explicitely when binding a new graphics pipeline.
-	 */
-	uint16_t stride_in_dw[MAX_SO_BUFFERS];
-	uint32_t enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
+   /* External state that comes from the last vertex stage, it must be
+    * set explicitely when binding a new graphics pipeline.
+    */
+   uint16_t stride_in_dw[MAX_SO_BUFFERS];
+   uint32_t enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
 
-	/* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
-	uint32_t hw_enabled_mask;
+   /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
+   uint32_t hw_enabled_mask;
 
-	/* State of VGT_STRMOUT_(CONFIG|EN) */
-	bool streamout_enabled;
+   /* State of VGT_STRMOUT_(CONFIG|EN) */
+   bool streamout_enabled;
 };
 
 struct radv_viewport_state {
-	uint32_t                                          count;
-	VkViewport                                        viewports[MAX_VIEWPORTS];
+   uint32_t count;
+   VkViewport viewports[MAX_VIEWPORTS];
 };
 
 struct radv_scissor_state {
-	uint32_t                                          count;
-	VkRect2D                                          scissors[MAX_SCISSORS];
+   uint32_t count;
+   VkRect2D scissors[MAX_SCISSORS];
 };
 
 struct radv_discard_rectangle_state {
-	uint32_t                                          count;
-	VkRect2D                                          rectangles[MAX_DISCARD_RECTANGLES];
+   uint32_t count;
+   VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
 };
 
 struct radv_sample_locations_state {
-	VkSampleCountFlagBits per_pixel;
-	VkExtent2D grid_size;
-	uint32_t count;
-	VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
+   VkSampleCountFlagBits per_pixel;
+   VkExtent2D grid_size;
+   uint32_t count;
+   VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
 };
 
 struct radv_dynamic_state {
-	/**
-	 * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
-	 * Defines the set of saved dynamic state.
-	 */
-	uint64_t mask;
-
-	struct radv_viewport_state                        viewport;
-
-	struct radv_scissor_state                         scissor;
-
-	float                                        line_width;
-
-	struct {
-		float                                     bias;
-		float                                     clamp;
-		float                                     slope;
-	} depth_bias;
-
-	float                                        blend_constants[4];
-
-	struct {
-		float                                     min;
-		float                                     max;
-	} depth_bounds;
-
-	struct {
-		uint32_t                                  front;
-		uint32_t                                  back;
-	} stencil_compare_mask;
-
-	struct {
-		uint32_t                                  front;
-		uint32_t                                  back;
-	} stencil_write_mask;
-
-	struct {
-		struct {
-			VkStencilOp fail_op;
-			VkStencilOp pass_op;
-			VkStencilOp depth_fail_op;
-			VkCompareOp compare_op;
-		} front;
-
-		struct {
-			VkStencilOp fail_op;
-			VkStencilOp pass_op;
-			VkStencilOp depth_fail_op;
-			VkCompareOp compare_op;
-		} back;
-	} stencil_op;
-
-	struct {
-		uint32_t                                  front;
-		uint32_t                                  back;
-	} stencil_reference;
-
-	struct radv_discard_rectangle_state               discard_rectangle;
-
-	struct radv_sample_locations_state                sample_location;
-
-	struct {
-		uint32_t factor;
-		uint16_t pattern;
-	} line_stipple;
-
-	VkCullModeFlags cull_mode;
-	VkFrontFace front_face;
-	unsigned primitive_topology;
-
-	bool depth_test_enable;
-	bool depth_write_enable;
-	VkCompareOp depth_compare_op;
-	bool depth_bounds_test_enable;
-	bool stencil_test_enable;
-
-	struct {
-		VkExtent2D size;
-		VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
-	} fragment_shading_rate;
+   /**
+    * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
+    * Defines the set of saved dynamic state.
+    */
+   uint64_t mask;
+
+   struct radv_viewport_state viewport;
+
+   struct radv_scissor_state scissor;
+
+   float line_width;
+
+   struct {
+      float bias;
+      float clamp;
+      float slope;
+   } depth_bias;
+
+   float blend_constants[4];
+
+   struct {
+      float min;
+      float max;
+   } depth_bounds;
+
+   struct {
+      uint32_t front;
+      uint32_t back;
+   } stencil_compare_mask;
+
+   struct {
+      uint32_t front;
+      uint32_t back;
+   } stencil_write_mask;
+
+   struct {
+      struct {
+         VkStencilOp fail_op;
+         VkStencilOp pass_op;
+         VkStencilOp depth_fail_op;
+         VkCompareOp compare_op;
+      } front;
+
+      struct {
+         VkStencilOp fail_op;
+         VkStencilOp pass_op;
+         VkStencilOp depth_fail_op;
+         VkCompareOp compare_op;
+      } back;
+   } stencil_op;
+
+   struct {
+      uint32_t front;
+      uint32_t back;
+   } stencil_reference;
+
+   struct radv_discard_rectangle_state discard_rectangle;
+
+   struct radv_sample_locations_state sample_location;
+
+   struct {
+      uint32_t factor;
+      uint16_t pattern;
+   } line_stipple;
+
+   VkCullModeFlags cull_mode;
+   VkFrontFace front_face;
+   unsigned primitive_topology;
+
+   bool depth_test_enable;
+   bool depth_write_enable;
+   VkCompareOp depth_compare_op;
+   bool depth_bounds_test_enable;
+   bool stencil_test_enable;
+
+   struct {
+      VkExtent2D size;
+      VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
+   } fragment_shading_rate;
 };
 
 extern const struct radv_dynamic_state default_dynamic_state;
 
-const char *
-radv_get_debug_option_name(int id);
+const char *radv_get_debug_option_name(int id);
 
-const char *
-radv_get_perftest_option_name(int id);
+const char *radv_get_perftest_option_name(int id);
 
-int
-radv_get_int_debug_option(const char *name, int default_value);
+int radv_get_int_debug_option(const char *name, int default_value);
 
 struct radv_color_buffer_info {
-	uint64_t cb_color_base;
-	uint64_t cb_color_cmask;
-	uint64_t cb_color_fmask;
-	uint64_t cb_dcc_base;
-	uint32_t cb_color_slice;
-	uint32_t cb_color_view;
-	uint32_t cb_color_info;
-	uint32_t cb_color_attrib;
-	uint32_t cb_color_attrib2; /* GFX9 and later */
-	uint32_t cb_color_attrib3; /* GFX10 and later */
-	uint32_t cb_dcc_control;
-	uint32_t cb_color_cmask_slice;
-	uint32_t cb_color_fmask_slice;
-	union {
-		uint32_t cb_color_pitch; // GFX6-GFX8
-		uint32_t cb_mrt_epitch; // GFX9+
-	};
+   uint64_t cb_color_base;
+   uint64_t cb_color_cmask;
+   uint64_t cb_color_fmask;
+   uint64_t cb_dcc_base;
+   uint32_t cb_color_slice;
+   uint32_t cb_color_view;
+   uint32_t cb_color_info;
+   uint32_t cb_color_attrib;
+   uint32_t cb_color_attrib2; /* GFX9 and later */
+   uint32_t cb_color_attrib3; /* GFX10 and later */
+   uint32_t cb_dcc_control;
+   uint32_t cb_color_cmask_slice;
+   uint32_t cb_color_fmask_slice;
+   union {
+      uint32_t cb_color_pitch; // GFX6-GFX8
+      uint32_t cb_mrt_epitch;  // GFX9+
+   };
 };
 
 struct radv_ds_buffer_info {
-	uint64_t db_z_read_base;
-	uint64_t db_stencil_read_base;
-	uint64_t db_z_write_base;
-	uint64_t db_stencil_write_base;
-	uint64_t db_htile_data_base;
-	uint32_t db_depth_info;
-	uint32_t db_z_info;
-	uint32_t db_stencil_info;
-	uint32_t db_depth_view;
-	uint32_t db_depth_size;
-	uint32_t db_depth_slice;
-	uint32_t db_htile_surface;
-	uint32_t pa_su_poly_offset_db_fmt_cntl;
-	uint32_t db_z_info2; /* GFX9 only */
-	uint32_t db_stencil_info2; /* GFX9 only */
-};
-
-void
-radv_initialise_color_surface(struct radv_device *device,
-			      struct radv_color_buffer_info *cb,
-			      struct radv_image_view *iview);
-void
-radv_initialise_ds_surface(struct radv_device *device,
-			   struct radv_ds_buffer_info *ds,
-			   struct radv_image_view *iview);
+   uint64_t db_z_read_base;
+   uint64_t db_stencil_read_base;
+   uint64_t db_z_write_base;
+   uint64_t db_stencil_write_base;
+   uint64_t db_htile_data_base;
+   uint32_t db_depth_info;
+   uint32_t db_z_info;
+   uint32_t db_stencil_info;
+   uint32_t db_depth_view;
+   uint32_t db_depth_size;
+   uint32_t db_depth_slice;
+   uint32_t db_htile_surface;
+   uint32_t pa_su_poly_offset_db_fmt_cntl;
+   uint32_t db_z_info2;       /* GFX9 only */
+   uint32_t db_stencil_info2; /* GFX9 only */
+};
+
+void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
+                                   struct radv_image_view *iview);
+void radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
+                                struct radv_image_view *iview);
 
 /**
  * Attachment state when recording a renderpass instance.
@@ -1263,205 +1248,205 @@ radv_initialise_ds_surface(struct radv_device *device,
  * The clear value is valid only if there exists a pending clear.
  */
 struct radv_attachment_state {
-	VkImageAspectFlags                           pending_clear_aspects;
-	uint32_t                                     cleared_views;
-	VkClearValue                                 clear_value;
-	VkImageLayout                                current_layout;
-	VkImageLayout                                current_stencil_layout;
-	bool                                         current_in_render_loop;
-	bool                                         disable_dcc;
-	struct radv_sample_locations_state	     sample_location;
-
-	union {
-		struct radv_color_buffer_info cb;
-		struct radv_ds_buffer_info ds;
-	};
-	struct radv_image_view *iview;
+   VkImageAspectFlags pending_clear_aspects;
+   uint32_t cleared_views;
+   VkClearValue clear_value;
+   VkImageLayout current_layout;
+   VkImageLayout current_stencil_layout;
+   bool current_in_render_loop;
+   bool disable_dcc;
+   struct radv_sample_locations_state sample_location;
+
+   union {
+      struct radv_color_buffer_info cb;
+      struct radv_ds_buffer_info ds;
+   };
+   struct radv_image_view *iview;
 };
 
 struct radv_descriptor_state {
-	struct radv_descriptor_set *sets[MAX_SETS];
-	uint32_t dirty;
-	uint32_t valid;
-	struct radv_push_descriptor_set push_set;
-	bool push_dirty;
-	uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
+   struct radv_descriptor_set *sets[MAX_SETS];
+   uint32_t dirty;
+   uint32_t valid;
+   struct radv_push_descriptor_set push_set;
+   bool push_dirty;
+   uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
 };
 
 struct radv_subpass_sample_locs_state {
-	uint32_t subpass_idx;
-	struct radv_sample_locations_state sample_location;
+   uint32_t subpass_idx;
+   struct radv_sample_locations_state sample_location;
 };
 
 enum rgp_flush_bits {
-	RGP_FLUSH_WAIT_ON_EOP_TS   = 0x1,
-	RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
-	RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
-	RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
-	RGP_FLUSH_PFP_SYNC_ME      = 0x10,
-	RGP_FLUSH_SYNC_CP_DMA      = 0x20,
-	RGP_FLUSH_INVAL_VMEM_L0    = 0x40,
-	RGP_FLUSH_INVAL_ICACHE     = 0x80,
-	RGP_FLUSH_INVAL_SMEM_L0    = 0x100,
-	RGP_FLUSH_FLUSH_L2         = 0x200,
-	RGP_FLUSH_INVAL_L2         = 0x400,
-	RGP_FLUSH_FLUSH_CB         = 0x800,
-	RGP_FLUSH_INVAL_CB         = 0x1000,
-	RGP_FLUSH_FLUSH_DB         = 0x2000,
-	RGP_FLUSH_INVAL_DB         = 0x4000,
-	RGP_FLUSH_INVAL_L1         = 0x8000,
+   RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
+   RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
+   RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
+   RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
+   RGP_FLUSH_PFP_SYNC_ME = 0x10,
+   RGP_FLUSH_SYNC_CP_DMA = 0x20,
+   RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
+   RGP_FLUSH_INVAL_ICACHE = 0x80,
+   RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
+   RGP_FLUSH_FLUSH_L2 = 0x200,
+   RGP_FLUSH_INVAL_L2 = 0x400,
+   RGP_FLUSH_FLUSH_CB = 0x800,
+   RGP_FLUSH_INVAL_CB = 0x1000,
+   RGP_FLUSH_FLUSH_DB = 0x2000,
+   RGP_FLUSH_INVAL_DB = 0x4000,
+   RGP_FLUSH_INVAL_L1 = 0x8000,
 };
 
 struct radv_cmd_state {
-	/* Vertex descriptors */
-	uint64_t                                      vb_va;
-	unsigned                                      vb_size;
-
-	bool predicating;
-	uint64_t                                      dirty;
-
-	uint32_t                                      prefetch_L2_mask;
-
-	struct radv_pipeline *                        pipeline;
-	struct radv_pipeline *                        emitted_pipeline;
-	struct radv_pipeline *                        compute_pipeline;
-	struct radv_pipeline *                        emitted_compute_pipeline;
-	struct radv_framebuffer *                     framebuffer;
-	struct radv_render_pass *                     pass;
-	const struct radv_subpass *                         subpass;
-	struct radv_dynamic_state                     dynamic;
-	struct radv_attachment_state *                attachments;
-	struct radv_streamout_state                  streamout;
-	VkRect2D                                     render_area;
-
-	uint32_t                                     num_subpass_sample_locs;
-	struct radv_subpass_sample_locs_state *      subpass_sample_locs;
-
-	/* Index buffer */
-	struct radv_buffer                           *index_buffer;
-	uint64_t                                     index_offset;
-	uint32_t                                     index_type;
-	uint32_t                                     max_index_count;
-	uint64_t                                     index_va;
-	int32_t                                      last_index_type;
-
-	int32_t                                      last_primitive_reset_en;
-	uint32_t                                     last_primitive_reset_index;
-	enum radv_cmd_flush_bits                     flush_bits;
-	unsigned                                     active_occlusion_queries;
-	bool                                         perfect_occlusion_queries_enabled;
-	unsigned                                     active_pipeline_queries;
-	unsigned                                     active_pipeline_gds_queries;
-	uint32_t                                      trace_id;
-	uint32_t                                      last_ia_multi_vgt_param;
-
-	uint32_t last_num_instances;
-	uint32_t last_first_instance;
-	uint32_t last_vertex_offset;
-	uint32_t last_drawid;
-
-	uint32_t last_sx_ps_downconvert;
-	uint32_t last_sx_blend_opt_epsilon;
-	uint32_t last_sx_blend_opt_control;
-
-	/* Whether CP DMA is busy/idle. */
-	bool dma_is_busy;
-
-	/* Conditional rendering info. */
-	uint8_t predication_op; /* 32-bit or 64-bit predicate value */
-	int predication_type; /* -1: disabled, 0: normal, 1: inverted */
-	uint64_t predication_va;
-
-	/* Inheritance info. */
-	VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
-
-	bool context_roll_without_scissor_emitted;
-
-	/* SQTT related state. */
-	uint32_t current_event_type;
-	uint32_t num_events;
-	uint32_t num_layout_transitions;
-	bool pending_sqtt_barrier_end;
-	enum rgp_flush_bits sqtt_flush_bits;
-
-	uint8_t cb_mip[MAX_RTS];
+   /* Vertex descriptors */
+   uint64_t vb_va;
+   unsigned vb_size;
+
+   bool predicating;
+   uint64_t dirty;
+
+   uint32_t prefetch_L2_mask;
+
+   struct radv_pipeline *pipeline;
+   struct radv_pipeline *emitted_pipeline;
+   struct radv_pipeline *compute_pipeline;
+   struct radv_pipeline *emitted_compute_pipeline;
+   struct radv_framebuffer *framebuffer;
+   struct radv_render_pass *pass;
+   const struct radv_subpass *subpass;
+   struct radv_dynamic_state dynamic;
+   struct radv_attachment_state *attachments;
+   struct radv_streamout_state streamout;
+   VkRect2D render_area;
+
+   uint32_t num_subpass_sample_locs;
+   struct radv_subpass_sample_locs_state *subpass_sample_locs;
+
+   /* Index buffer */
+   struct radv_buffer *index_buffer;
+   uint64_t index_offset;
+   uint32_t index_type;
+   uint32_t max_index_count;
+   uint64_t index_va;
+   int32_t last_index_type;
+
+   int32_t last_primitive_reset_en;
+   uint32_t last_primitive_reset_index;
+   enum radv_cmd_flush_bits flush_bits;
+   unsigned active_occlusion_queries;
+   bool perfect_occlusion_queries_enabled;
+   unsigned active_pipeline_queries;
+   unsigned active_pipeline_gds_queries;
+   uint32_t trace_id;
+   uint32_t last_ia_multi_vgt_param;
+
+   uint32_t last_num_instances;
+   uint32_t last_first_instance;
+   uint32_t last_vertex_offset;
+   uint32_t last_drawid;
+
+   uint32_t last_sx_ps_downconvert;
+   uint32_t last_sx_blend_opt_epsilon;
+   uint32_t last_sx_blend_opt_control;
+
+   /* Whether CP DMA is busy/idle. */
+   bool dma_is_busy;
+
+   /* Conditional rendering info. */
+   uint8_t predication_op; /* 32-bit or 64-bit predicate value */
+   int predication_type;   /* -1: disabled, 0: normal, 1: inverted */
+   uint64_t predication_va;
+
+   /* Inheritance info. */
+   VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
+
+   bool context_roll_without_scissor_emitted;
+
+   /* SQTT related state. */
+   uint32_t current_event_type;
+   uint32_t num_events;
+   uint32_t num_layout_transitions;
+   bool pending_sqtt_barrier_end;
+   enum rgp_flush_bits sqtt_flush_bits;
+
+   uint8_t cb_mip[MAX_RTS];
 };
 
 struct radv_cmd_pool {
-	struct vk_object_base                        base;
-	VkAllocationCallbacks                        alloc;
-	struct list_head                             cmd_buffers;
-	struct list_head                             free_cmd_buffers;
-	uint32_t queue_family_index;
+   struct vk_object_base base;
+   VkAllocationCallbacks alloc;
+   struct list_head cmd_buffers;
+   struct list_head free_cmd_buffers;
+   uint32_t queue_family_index;
 };
 
 struct radv_cmd_buffer_upload {
-	uint8_t *map;
-	unsigned offset;
-	uint64_t size;
-	struct radeon_winsys_bo *upload_bo;
-	struct list_head list;
+   uint8_t *map;
+   unsigned offset;
+   uint64_t size;
+   struct radeon_winsys_bo *upload_bo;
+   struct list_head list;
 };
 
 enum radv_cmd_buffer_status {
-	RADV_CMD_BUFFER_STATUS_INVALID,
-	RADV_CMD_BUFFER_STATUS_INITIAL,
-	RADV_CMD_BUFFER_STATUS_RECORDING,
-	RADV_CMD_BUFFER_STATUS_EXECUTABLE,
-	RADV_CMD_BUFFER_STATUS_PENDING,
+   RADV_CMD_BUFFER_STATUS_INVALID,
+   RADV_CMD_BUFFER_STATUS_INITIAL,
+   RADV_CMD_BUFFER_STATUS_RECORDING,
+   RADV_CMD_BUFFER_STATUS_EXECUTABLE,
+   RADV_CMD_BUFFER_STATUS_PENDING,
 };
 
 struct radv_cmd_buffer {
-	struct vk_object_base                         base;
+   struct vk_object_base base;
 
-	struct radv_device *                          device;
+   struct radv_device *device;
 
-	struct radv_cmd_pool *                        pool;
-	struct list_head                             pool_link;
+   struct radv_cmd_pool *pool;
+   struct list_head pool_link;
 
-	VkCommandBufferUsageFlags                    usage_flags;
-	VkCommandBufferLevel                         level;
-	enum radv_cmd_buffer_status status;
-	struct radeon_cmdbuf *cs;
-	struct radv_cmd_state state;
-	struct radv_vertex_binding                   vertex_bindings[MAX_VBS];
-	struct radv_streamout_binding                streamout_bindings[MAX_SO_BUFFERS];
-	uint32_t queue_family_index;
+   VkCommandBufferUsageFlags usage_flags;
+   VkCommandBufferLevel level;
+   enum radv_cmd_buffer_status status;
+   struct radeon_cmdbuf *cs;
+   struct radv_cmd_state state;
+   struct radv_vertex_binding vertex_bindings[MAX_VBS];
+   struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
+   uint32_t queue_family_index;
 
-	uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
-	VkShaderStageFlags push_constant_stages;
-	struct radv_descriptor_set_header meta_push_descriptors;
+   uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
+   VkShaderStageFlags push_constant_stages;
+   struct radv_descriptor_set_header meta_push_descriptors;
 
-	struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
+   struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
 
-	struct radv_cmd_buffer_upload upload;
+   struct radv_cmd_buffer_upload upload;
 
-	uint32_t scratch_size_per_wave_needed;
-	uint32_t scratch_waves_wanted;
-	uint32_t compute_scratch_size_per_wave_needed;
-	uint32_t compute_scratch_waves_wanted;
-	uint32_t esgs_ring_size_needed;
-	uint32_t gsvs_ring_size_needed;
-	bool tess_rings_needed;
-	bool gds_needed; /* for GFX10 streamout and NGG GS queries */
-	bool gds_oa_needed; /* for GFX10 streamout */
-	bool sample_positions_needed;
+   uint32_t scratch_size_per_wave_needed;
+   uint32_t scratch_waves_wanted;
+   uint32_t compute_scratch_size_per_wave_needed;
+   uint32_t compute_scratch_waves_wanted;
+   uint32_t esgs_ring_size_needed;
+   uint32_t gsvs_ring_size_needed;
+   bool tess_rings_needed;
+   bool gds_needed;    /* for GFX10 streamout and NGG GS queries */
+   bool gds_oa_needed; /* for GFX10 streamout */
+   bool sample_positions_needed;
 
-	VkResult record_result;
+   VkResult record_result;
 
-	uint64_t gfx9_fence_va;
-	uint32_t gfx9_fence_idx;
-	uint64_t gfx9_eop_bug_va;
+   uint64_t gfx9_fence_va;
+   uint32_t gfx9_fence_idx;
+   uint64_t gfx9_eop_bug_va;
 
-	/**
-	 * Whether a query pool has been resetted and we have to flush caches.
-	 */
-	bool pending_reset_query;
+   /**
+    * Whether a query pool has been resetted and we have to flush caches.
+    */
+   bool pending_reset_query;
 
-	/**
-	 * Bitmask of pending active query flushes.
-	 */
-	enum radv_cmd_flush_bits active_query_flush_bits;
+   /**
+    * Bitmask of pending active query flushes.
+    */
+   enum radv_cmd_flush_bits active_query_flush_bits;
 };
 
 struct radv_image;
@@ -1469,162 +1454,130 @@ struct radv_image_view;
 
 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
 
-void si_emit_graphics(struct radv_device *device,
-		      struct radeon_cmdbuf *cs);
-void si_emit_compute(struct radv_device *device,
-		      struct radeon_cmdbuf *cs);
+void si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
+void si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs);
 
 void cik_create_gfx_config(struct radv_device *device);
 
-void si_write_viewport(struct radeon_cmdbuf *cs, int first_vp,
-		       int count, const VkViewport *viewports);
-void si_write_scissors(struct radeon_cmdbuf *cs, int first,
-		       int count, const VkRect2D *scissors,
-		       const VkViewport *viewports, bool can_use_guardband);
-uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
-				   bool instanced_draw, bool indirect_draw,
-				   bool count_from_stream_output,
-				   uint32_t draw_vertex_count,
-				   unsigned topology);
-void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
-				enum chip_class chip_class,
-				bool is_mec,
-				unsigned event, unsigned event_flags,
-				unsigned dst_sel, unsigned data_sel,
-				uint64_t va,
-				uint32_t new_fence,
-				uint64_t gfx9_eop_bug_va);
-
-void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va,
-		      uint32_t ref, uint32_t mask);
-void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
-			    enum chip_class chip_class,
-			    uint32_t *fence_ptr, uint64_t va,
-			    bool is_mec,
-			    enum radv_cmd_flush_bits flush_bits,
-			    enum rgp_flush_bits *sqtt_flush_bits,
-			    uint64_t gfx9_eop_bug_va);
+void si_write_viewport(struct radeon_cmdbuf *cs, int first_vp, int count,
+                       const VkViewport *viewports);
+void si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors,
+                       const VkViewport *viewports, bool can_use_guardband);
+uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
+                                   bool indirect_draw, bool count_from_stream_output,
+                                   uint32_t draw_vertex_count, unsigned topology);
+void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class, bool is_mec,
+                                unsigned event, unsigned event_flags, unsigned dst_sel,
+                                unsigned data_sel, uint64_t va, uint32_t new_fence,
+                                uint64_t gfx9_eop_bug_va);
+
+void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
+                      uint32_t mask);
+void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class,
+                            uint32_t *fence_ptr, uint64_t va, bool is_mec,
+                            enum radv_cmd_flush_bits flush_bits,
+                            enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
-void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer,
-				   bool draw_visible, unsigned pred_op,
-				   uint64_t va);
-void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
-			   uint64_t src_va, uint64_t dest_va,
-			   uint64_t size);
-void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
-                        unsigned size);
-void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
-			    uint64_t size, unsigned value);
+void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
+                                   unsigned pred_op, uint64_t va);
+void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
+                           uint64_t size);
+void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
+void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
+                            unsigned value);
 void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
 
 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
-bool
-radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
-			     unsigned size, unsigned *out_offset, void **ptr);
-void
-radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
-			    const struct radv_subpass *subpass);
-bool
-radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
-			    unsigned size, const void *data, unsigned *out_offset);
+bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
+                                  unsigned *out_offset, void **ptr);
+void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
+                                 const struct radv_subpass *subpass);
+bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size,
+                                 const void *data, unsigned *out_offset);
 
 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
 void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
-					   VkImageAspectFlags aspects,
-					   VkResolveModeFlagBits resolve_mode);
+                                           VkImageAspectFlags aspects,
+                                           VkResolveModeFlagBits resolve_mode);
 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
 void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
-					   VkImageAspectFlags aspects,
-					   VkResolveModeFlagBits resolve_mode);
+                                           VkImageAspectFlags aspects,
+                                           VkResolveModeFlagBits resolve_mode);
 void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
 unsigned radv_get_default_max_sample_dist(int log_samples);
 void radv_device_init_msaa(struct radv_device *device);
 
 void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
-				   const struct radv_image_view *iview,
-				   VkClearDepthStencilValue ds_clear_value,
-				   VkImageAspectFlags aspects);
+                                   const struct radv_image_view *iview,
+                                   VkClearDepthStencilValue ds_clear_value,
+                                   VkImageAspectFlags aspects);
 
 void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
-				      const struct radv_image_view *iview,
-				      int cb_idx,
-				      uint32_t color_values[2]);
+                                      const struct radv_image_view *iview, int cb_idx,
+                                      uint32_t color_values[2]);
 
 bool radv_image_use_dcc_image_stores(const struct radv_device *device,
-				     const struct radv_image *image);
+                                     const struct radv_image *image);
 bool radv_image_use_dcc_predication(const struct radv_device *device,
-				    const struct radv_image *image);
-void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
-			      struct radv_image *image,
-			      const VkImageSubresourceRange *range, bool value);
-
-void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
-			      struct radv_image *image,
-			      const VkImageSubresourceRange *range, bool value);
-enum radv_cmd_flush_bits
-radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
-		      VkAccessFlags src_flags,
-		      const struct radv_image *image);
-enum radv_cmd_flush_bits
-radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
-                      VkAccessFlags dst_flags,
-                      const struct radv_image *image);
-uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
-			  const struct radv_image *image,
-			  struct radeon_winsys_bo *bo,
-			  uint64_t offset, uint64_t size, uint32_t value);
+                                    const struct radv_image *image);
+void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                              const VkImageSubresourceRange *range, bool value);
+
+void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                              const VkImageSubresourceRange *range, bool value);
+enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
+                                               VkAccessFlags src_flags,
+                                               const struct radv_image *image);
+enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
+                                               VkAccessFlags dst_flags,
+                                               const struct radv_image *image);
+uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
+                          struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
+                          uint32_t value);
 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
-bool radv_get_memory_fd(struct radv_device *device,
-			struct radv_device_memory *memory,
-			int *pFD);
-void radv_free_memory(struct radv_device *device,
-		      const VkAllocationCallbacks* pAllocator,
-		      struct radv_device_memory *mem);
+bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
+void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+                      struct radv_device_memory *mem);
 
 static inline void
-radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs,
-			      unsigned sh_offset, unsigned pointer_count,
-			      bool use_32bit_pointers)
+radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
+                              bool use_32bit_pointers)
 {
-	radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
-	radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
+   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
+   radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
 }
 
 static inline void
-radv_emit_shader_pointer_body(struct radv_device *device,
-			      struct radeon_cmdbuf *cs,
-			      uint64_t va, bool use_32bit_pointers)
+radv_emit_shader_pointer_body(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
+                              bool use_32bit_pointers)
 {
-	radeon_emit(cs, va);
-
-	if (use_32bit_pointers) {
-		assert(va == 0 ||
-		       (va >> 32) == device->physical_device->rad_info.address32_hi);
-	} else {
-		radeon_emit(cs, va >> 32);
-	}
+   radeon_emit(cs, va);
+
+   if (use_32bit_pointers) {
+      assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi);
+   } else {
+      radeon_emit(cs, va >> 32);
+   }
 }
 
 static inline void
-radv_emit_shader_pointer(struct radv_device *device,
-			 struct radeon_cmdbuf *cs,
-			 uint32_t sh_offset, uint64_t va, bool global)
+radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset,
+                         uint64_t va, bool global)
 {
-	bool use_32bit_pointers = !global;
+   bool use_32bit_pointers = !global;
 
-	radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
-	radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
+   radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
+   radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
 }
 
 static inline struct radv_descriptor_state *
-radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer,
-			   VkPipelineBindPoint bind_point)
+radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
 {
-	assert(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS ||
-	       bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
-	return &cmd_buffer->descriptors[bind_point];
+   assert(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS ||
+          bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
+   return &cmd_buffer->descriptors[bind_point];
 }
 
 /*
@@ -1633,148 +1586,142 @@ radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer,
  * Limitations: Can't call normal dispatch functions without binding or rebinding
  *              the compute pipeline.
  */
-void radv_unaligned_dispatch(
-	struct radv_cmd_buffer                      *cmd_buffer,
-	uint32_t                                    x,
-	uint32_t                                    y,
-	uint32_t                                    z);
+void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
+                             uint32_t z);
 
 struct radv_event {
-	struct vk_object_base base;
-	struct radeon_winsys_bo *bo;
-	uint64_t *map;
-};
-
-#define RADV_HASH_SHADER_NO_NGG              (1 << 0)
-#define RADV_HASH_SHADER_CS_WAVE32           (1 << 1)
-#define RADV_HASH_SHADER_PS_WAVE32           (1 << 2)
-#define RADV_HASH_SHADER_GE_WAVE32           (1 << 3)
-#define RADV_HASH_SHADER_LLVM                (1 << 4)
-#define RADV_HASH_SHADER_DISCARD_TO_DEMOTE   (1 << 5)
-#define RADV_HASH_SHADER_MRT_NAN_FIXUP       (1 << 6)
-#define RADV_HASH_SHADER_INVARIANT_GEOM      (1 << 7)
-#define RADV_HASH_SHADER_KEEP_STATISTICS     (1 << 8)
-#define RADV_HASH_SHADER_FORCE_VRS_2x2       (1 << 9)
-#define RADV_HASH_SHADER_FORCE_VRS_2x1       (1 << 10)
-#define RADV_HASH_SHADER_FORCE_VRS_1x2       (1 << 11)
-
-void
-radv_hash_shaders(unsigned char *hash,
-		  const VkPipelineShaderStageCreateInfo **stages,
-		  const struct radv_pipeline_layout *layout,
-		  const struct radv_pipeline_key *key,
-		  uint32_t flags);
+   struct vk_object_base base;
+   struct radeon_winsys_bo *bo;
+   uint64_t *map;
+};
+
+#define RADV_HASH_SHADER_NO_NGG            (1 << 0)
+#define RADV_HASH_SHADER_CS_WAVE32         (1 << 1)
+#define RADV_HASH_SHADER_PS_WAVE32         (1 << 2)
+#define RADV_HASH_SHADER_GE_WAVE32         (1 << 3)
+#define RADV_HASH_SHADER_LLVM              (1 << 4)
+#define RADV_HASH_SHADER_DISCARD_TO_DEMOTE (1 << 5)
+#define RADV_HASH_SHADER_MRT_NAN_FIXUP     (1 << 6)
+#define RADV_HASH_SHADER_INVARIANT_GEOM    (1 << 7)
+#define RADV_HASH_SHADER_KEEP_STATISTICS   (1 << 8)
+#define RADV_HASH_SHADER_FORCE_VRS_2x2     (1 << 9)
+#define RADV_HASH_SHADER_FORCE_VRS_2x1     (1 << 10)
+#define RADV_HASH_SHADER_FORCE_VRS_1x2     (1 << 11)
+
+void radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInfo **stages,
+                       const struct radv_pipeline_layout *layout,
+                       const struct radv_pipeline_key *key, uint32_t flags);
 
 #define RADV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
 
-#define radv_foreach_stage(stage, stage_bits)				\
-	for (gl_shader_stage stage,					\
-		     __tmp = (gl_shader_stage)((stage_bits) & RADV_STAGE_MASK);	\
-	     stage = ffs(__tmp) - 1, __tmp;				\
-	     __tmp &= ~(1 << (stage)))
+#define radv_foreach_stage(stage, stage_bits)                                                      \
+   for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK);            \
+        stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
 
 extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
 unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
 
 struct radv_multisample_state {
-	uint32_t db_eqaa;
-	uint32_t pa_sc_mode_cntl_0;
-	uint32_t pa_sc_mode_cntl_1;
-	uint32_t pa_sc_aa_config;
-	uint32_t pa_sc_aa_mask[2];
-	unsigned num_samples;
+   uint32_t db_eqaa;
+   uint32_t pa_sc_mode_cntl_0;
+   uint32_t pa_sc_mode_cntl_1;
+   uint32_t pa_sc_aa_config;
+   uint32_t pa_sc_aa_mask[2];
+   unsigned num_samples;
 };
 
 struct radv_vrs_state {
-	uint32_t pa_cl_vrs_cntl;
+   uint32_t pa_cl_vrs_cntl;
 };
 
 struct radv_prim_vertex_count {
-	uint8_t min;
-	uint8_t incr;
+   uint8_t min;
+   uint8_t incr;
 };
 
 struct radv_ia_multi_vgt_param_helpers {
-	uint32_t base;
-	bool partial_es_wave;
-	uint8_t primgroup_size;
-	bool ia_switch_on_eoi;
-	bool partial_vs_wave;
+   uint32_t base;
+   bool partial_es_wave;
+   uint8_t primgroup_size;
+   bool ia_switch_on_eoi;
+   bool partial_vs_wave;
 };
 
 struct radv_binning_state {
-	uint32_t pa_sc_binner_cntl_0;
-	uint32_t db_dfsm_control;
+   uint32_t pa_sc_binner_cntl_0;
+   uint32_t db_dfsm_control;
 };
 
 #define SI_GS_PER_ES 128
 
 struct radv_pipeline {
-	struct vk_object_base                         base;
-	struct radv_device *                          device;
-	struct radv_dynamic_state                     dynamic_state;
-
-	struct radv_pipeline_layout *                 layout;
-
-	bool					     need_indirect_descriptor_sets;
-	struct radv_shader_variant *                 shaders[MESA_SHADER_STAGES];
-	struct radv_shader_variant *gs_copy_shader;
-	VkShaderStageFlags                           active_stages;
-
-	struct radeon_cmdbuf                      cs;
-	uint32_t                                  ctx_cs_hash;
-	struct radeon_cmdbuf                      ctx_cs;
-
-	uint32_t                                     binding_stride[MAX_VBS];
-	uint8_t                                      num_vertex_bindings;
-
-	uint32_t user_data_0[MESA_SHADER_STAGES];
-	union {
-		struct {
-			struct radv_multisample_state ms;
-			struct radv_binning_state binning;
-			struct radv_vrs_state vrs;
-			uint32_t spi_baryc_cntl;
-			bool prim_restart_enable;
-			unsigned esgs_ring_size;
-			unsigned gsvs_ring_size;
-			uint32_t vtx_base_sgpr;
-			struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
-			uint8_t vtx_emit_num;
-			bool uses_drawid;
-			bool uses_baseinstance;
- 			bool can_use_guardband;
-			uint32_t needed_dynamic_state;
-			bool disable_out_of_order_rast_for_occlusion;
-			unsigned tess_patch_control_points;
-			unsigned pa_su_sc_mode_cntl;
-			unsigned db_depth_control;
-			bool uses_dynamic_stride;
-
-			/* Used for rbplus */
-			uint32_t col_format;
-			uint32_t cb_target_mask;
-		} graphics;
-	};
-
-	unsigned max_waves;
-	unsigned scratch_bytes_per_wave;
-
-	/* Not NULL if graphics pipeline uses streamout. */
-	struct radv_shader_variant *streamout_shader;
-
-	/* Unique pipeline hash identifier. */
-	uint64_t pipeline_hash;
-};
-
-static inline bool radv_pipeline_has_gs(const struct radv_pipeline *pipeline)
+   struct vk_object_base base;
+   struct radv_device *device;
+   struct radv_dynamic_state dynamic_state;
+
+   struct radv_pipeline_layout *layout;
+
+   bool need_indirect_descriptor_sets;
+   struct radv_shader_variant *shaders[MESA_SHADER_STAGES];
+   struct radv_shader_variant *gs_copy_shader;
+   VkShaderStageFlags active_stages;
+
+   struct radeon_cmdbuf cs;
+   uint32_t ctx_cs_hash;
+   struct radeon_cmdbuf ctx_cs;
+
+   uint32_t binding_stride[MAX_VBS];
+   uint8_t num_vertex_bindings;
+
+   uint32_t user_data_0[MESA_SHADER_STAGES];
+   union {
+      struct {
+         struct radv_multisample_state ms;
+         struct radv_binning_state binning;
+         struct radv_vrs_state vrs;
+         uint32_t spi_baryc_cntl;
+         bool prim_restart_enable;
+         unsigned esgs_ring_size;
+         unsigned gsvs_ring_size;
+         uint32_t vtx_base_sgpr;
+         struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
+         uint8_t vtx_emit_num;
+         bool uses_drawid;
+         bool uses_baseinstance;
+         bool can_use_guardband;
+         uint32_t needed_dynamic_state;
+         bool disable_out_of_order_rast_for_occlusion;
+         unsigned tess_patch_control_points;
+         unsigned pa_su_sc_mode_cntl;
+         unsigned db_depth_control;
+         bool uses_dynamic_stride;
+
+         /* Used for rbplus */
+         uint32_t col_format;
+         uint32_t cb_target_mask;
+      } graphics;
+   };
+
+   unsigned max_waves;
+   unsigned scratch_bytes_per_wave;
+
+   /* Not NULL if graphics pipeline uses streamout. */
+   struct radv_shader_variant *streamout_shader;
+
+   /* Unique pipeline hash identifier. */
+   uint64_t pipeline_hash;
+};
+
+static inline bool
+radv_pipeline_has_gs(const struct radv_pipeline *pipeline)
 {
-	return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false;
+   return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false;
 }
 
-static inline bool radv_pipeline_has_tess(const struct radv_pipeline *pipeline)
+static inline bool
+radv_pipeline_has_tess(const struct radv_pipeline *pipeline)
 {
-	return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false;
+   return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false;
 }
 
 bool radv_pipeline_has_ngg(const struct radv_pipeline *pipeline);
@@ -1784,125 +1731,115 @@ bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline);
 bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
 
 struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
-						 gl_shader_stage stage,
-						 int idx);
+                                                 gl_shader_stage stage, int idx);
 
 struct radv_shader_variant *radv_get_shader(const struct radv_pipeline *pipeline,
-					    gl_shader_stage stage);
+                                            gl_shader_stage stage);
 
 struct radv_graphics_pipeline_create_info {
-	bool use_rectlist;
-	bool db_depth_clear;
-	bool db_stencil_clear;
-	bool db_depth_disable_expclear;
-	bool db_stencil_disable_expclear;
-	bool depth_compress_disable;
-	bool stencil_compress_disable;
-	bool resummarize_enable;
-	uint32_t custom_blend_mode;
-};
-
-VkResult
-radv_graphics_pipeline_create(VkDevice device,
-			      VkPipelineCache cache,
-			      const VkGraphicsPipelineCreateInfo *pCreateInfo,
-			      const struct radv_graphics_pipeline_create_info *extra,
-			      const VkAllocationCallbacks *alloc,
-			      VkPipeline *pPipeline);
+   bool use_rectlist;
+   bool db_depth_clear;
+   bool db_stencil_clear;
+   bool db_depth_disable_expclear;
+   bool db_stencil_disable_expclear;
+   bool depth_compress_disable;
+   bool stencil_compress_disable;
+   bool resummarize_enable;
+   uint32_t custom_blend_mode;
+};
+
+VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
+                                       const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                                       const struct radv_graphics_pipeline_create_info *extra,
+                                       const VkAllocationCallbacks *alloc, VkPipeline *pPipeline);
 
 struct radv_binning_settings {
-	unsigned context_states_per_bin; /* allowed range: [1, 6] */
-	unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
-	unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+   unsigned context_states_per_bin;    /* allowed range: [1, 6] */
+   unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
+   unsigned fpovs_per_batch;           /* allowed range: [0, 255], 0 = unlimited */
 };
 
-struct radv_binning_settings
-radv_get_binning_settings(const struct radv_physical_device *pdev);
+struct radv_binning_settings radv_get_binning_settings(const struct radv_physical_device *pdev);
 
 struct vk_format_description;
 uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
-					  int first_non_void);
+                                          int first_non_void);
 uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
-					 int first_non_void);
+                                         int first_non_void);
 bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
 uint32_t radv_translate_colorformat(VkFormat format);
-uint32_t radv_translate_color_numformat(VkFormat format,
-					const struct util_format_description *desc,
-					int first_non_void);
+uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
+                                        int first_non_void);
 uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
 unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
 uint32_t radv_translate_dbformat(VkFormat format);
-uint32_t radv_translate_tex_dataformat(VkFormat format,
-				       const struct util_format_description *desc,
-				       int first_non_void);
-uint32_t radv_translate_tex_numformat(VkFormat format,
-				      const struct util_format_description *desc,
-				      int first_non_void);
-bool radv_format_pack_clear_color(VkFormat format,
-				  uint32_t clear_vals[2],
-				  VkClearColorValue *value);
+uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
+                                       int first_non_void);
+uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
+                                      int first_non_void);
+bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2],
+                                  VkClearColorValue *value);
 bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
                                           VkFormat format, bool *blendable);
-bool radv_dcc_formats_compatible(VkFormat format1,
-                                 VkFormat format2);
+bool radv_dcc_formats_compatible(VkFormat format1, VkFormat format2);
 bool radv_is_atomic_format_supported(VkFormat format);
 bool radv_device_supports_etc(struct radv_physical_device *physical_device);
 
 struct radv_image_plane {
-	VkFormat format;
-	struct radeon_surf surface;
+   VkFormat format;
+   struct radeon_surf surface;
 };
 
 struct radv_image {
-	struct vk_object_base base;
-	VkImageType type;
-	/* The original VkFormat provided by the client.  This may not match any
-	 * of the actual surface formats.
-	 */
-	VkFormat vk_format;
-	VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
-	struct ac_surf_info info;
-	VkImageTiling tiling; /** VkImageCreateInfo::tiling */
-	VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
-
-	VkDeviceSize size;
-	uint32_t alignment;
-
-	unsigned queue_family_mask;
-	bool exclusive;
-	bool shareable;
-
-	/* Set when bound */
-	struct radeon_winsys_bo *bo;
-	VkDeviceSize offset;
-	bool tc_compatible_cmask;
-
-	uint64_t clear_value_offset;
-	uint64_t fce_pred_offset;
-	uint64_t dcc_pred_offset;
-
-	/* On some GPUs DCC needs different tiling of the metadata for
-	 * rendering and for display, so we're stuck with having the metadata
-	 * two times and then occasionally copying one into the other.
-	 * 
-	 * The retile map is an array of (src index, dst index) pairs to
-	 * determine how it should be copied between the two.
-	 */
-	struct radeon_winsys_bo *retile_map;
-
-	/*
-	 * Metadata for the TC-compat zrange workaround. If the 32-bit value
-	 * stored at this offset is UINT_MAX, the driver will emit
-	 * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
-	 * SET_CONTEXT_REG packet.
-	 */
-	uint64_t tc_compat_zrange_offset;
-
-	/* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
-	VkDeviceMemory owned_memory;
-
-	unsigned plane_count;
-	struct radv_image_plane planes[0];
+   struct vk_object_base base;
+   VkImageType type;
+   /* The original VkFormat provided by the client.  This may not match any
+    * of the actual surface formats.
+    */
+   VkFormat vk_format;
+   VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
+   struct ac_surf_info info;
+   VkImageTiling tiling;     /** VkImageCreateInfo::tiling */
+   VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
+
+   VkDeviceSize size;
+   uint32_t alignment;
+
+   unsigned queue_family_mask;
+   bool exclusive;
+   bool shareable;
+
+   /* Set when bound */
+   struct radeon_winsys_bo *bo;
+   VkDeviceSize offset;
+   bool tc_compatible_cmask;
+
+   uint64_t clear_value_offset;
+   uint64_t fce_pred_offset;
+   uint64_t dcc_pred_offset;
+
+   /* On some GPUs DCC needs different tiling of the metadata for
+    * rendering and for display, so we're stuck with having the metadata
+    * two times and then occasionally copying one into the other.
+    *
+    * The retile map is an array of (src index, dst index) pairs to
+    * determine how it should be copied between the two.
+    */
+   struct radeon_winsys_bo *retile_map;
+
+   /*
+    * Metadata for the TC-compat zrange workaround. If the 32-bit value
+    * stored at this offset is UINT_MAX, the driver will emit
+    * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
+    * SET_CONTEXT_REG packet.
+    */
+   uint64_t tc_compat_zrange_offset;
+
+   /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
+   VkDeviceMemory owned_memory;
+
+   unsigned plane_count;
+   struct radv_image_plane planes[0];
 };
 
 /* Whether the image has a htile  that is known consistent with the contents of
@@ -1912,27 +1849,17 @@ struct radv_image {
  * correct results.
  */
 bool radv_layout_is_htile_compressed(const struct radv_device *device,
-				     const struct radv_image *image,
-                                     VkImageLayout layout,
-                                     bool in_render_loop,
-                                     unsigned queue_mask);
-
-bool radv_layout_can_fast_clear(const struct radv_device *device,
-				const struct radv_image *image,
-			        VkImageLayout layout,
-			        bool in_render_loop,
-			        unsigned queue_mask);
-
-bool radv_layout_dcc_compressed(const struct radv_device *device,
-				const struct radv_image *image,
-			        VkImageLayout layout,
-			        bool in_render_loop,
-			        unsigned queue_mask);
-
-bool radv_layout_fmask_compressed(const struct radv_device *device,
-				  const struct radv_image *image,
-				  VkImageLayout layout,
-				  unsigned queue_mask);
+                                     const struct radv_image *image, VkImageLayout layout,
+                                     bool in_render_loop, unsigned queue_mask);
+
+bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
+                                VkImageLayout layout, bool in_render_loop, unsigned queue_mask);
+
+bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
+                                VkImageLayout layout, bool in_render_loop, unsigned queue_mask);
+
+bool radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
+                                  VkImageLayout layout, unsigned queue_mask);
 
 /**
  * Return whether the image has CMASK metadata for color surfaces.
@@ -1940,7 +1867,7 @@ bool radv_layout_fmask_compressed(const struct radv_device *device,
 static inline bool
 radv_image_has_cmask(const struct radv_image *image)
 {
-	return image->planes[0].surface.cmask_offset;
+   return image->planes[0].surface.cmask_offset;
 }
 
 /**
@@ -1949,7 +1876,7 @@ radv_image_has_cmask(const struct radv_image *image)
 static inline bool
 radv_image_has_fmask(const struct radv_image *image)
 {
-	return image->planes[0].surface.fmask_offset;
+   return image->planes[0].surface.fmask_offset;
 }
 
 /**
@@ -1958,7 +1885,7 @@ radv_image_has_fmask(const struct radv_image *image)
 static inline bool
 radv_image_has_dcc(const struct radv_image *image)
 {
-	return image->planes[0].surface.dcc_offset;
+   return image->planes[0].surface.dcc_offset;
 }
 
 /**
@@ -1967,7 +1894,7 @@ radv_image_has_dcc(const struct radv_image *image)
 static inline bool
 radv_image_is_tc_compat_cmask(const struct radv_image *image)
 {
-	return radv_image_has_fmask(image) && image->tc_compatible_cmask;
+   return radv_image_has_fmask(image) && image->tc_compatible_cmask;
 }
 
 /**
@@ -1976,8 +1903,7 @@ radv_image_is_tc_compat_cmask(const struct radv_image *image)
 static inline bool
 radv_dcc_enabled(const struct radv_image *image, unsigned level)
 {
-	return radv_image_has_dcc(image) &&
-	       level < image->planes[0].surface.num_dcc_levels;
+   return radv_image_has_dcc(image) && level < image->planes[0].surface.num_dcc_levels;
 }
 
 /**
@@ -1986,9 +1912,7 @@ radv_dcc_enabled(const struct radv_image *image, unsigned level)
 static inline bool
 radv_image_has_CB_metadata(const struct radv_image *image)
 {
-	return radv_image_has_cmask(image) ||
-	       radv_image_has_fmask(image) ||
-	       radv_image_has_dcc(image);
+   return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image);
 }
 
 /**
@@ -1997,7 +1921,7 @@ radv_image_has_CB_metadata(const struct radv_image *image)
 static inline bool
 radv_image_has_htile(const struct radv_image *image)
 {
-	return image->planes[0].surface.htile_size;
+   return image->planes[0].surface.htile_size;
 }
 
 /**
@@ -2006,8 +1930,7 @@ radv_image_has_htile(const struct radv_image *image)
 static inline bool
 radv_htile_enabled(const struct radv_image *image, unsigned level)
 {
-	return radv_image_has_htile(image) &&
-	       level < image->planes[0].surface.num_htile_levels;
+   return radv_image_has_htile(image) && level < image->planes[0].surface.num_htile_levels;
 }
 
 /**
@@ -2016,8 +1939,8 @@ radv_htile_enabled(const struct radv_image *image, unsigned level)
 static inline bool
 radv_image_is_tc_compat_htile(const struct radv_image *image)
 {
-	return radv_image_has_htile(image) &&
-	       (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
+   return radv_image_has_htile(image) &&
+          (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
 }
 
 /**
@@ -2025,559 +1948,509 @@ radv_image_is_tc_compat_htile(const struct radv_image *image)
  * improve HiZ Z-Range precision.
  */
 static inline bool
-radv_image_tile_stencil_disabled(const struct radv_device *device,
-				 const struct radv_image *image)
+radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
 {
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		return !vk_format_has_stencil(image->vk_format);
-	} else {
-		/* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
-		 * the TC-compat ZRANGE issue even if no stencil is used.
-		 */
-		return !vk_format_has_stencil(image->vk_format) &&
-		       !radv_image_is_tc_compat_htile(image);
-	}
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      return !vk_format_has_stencil(image->vk_format);
+   } else {
+      /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
+       * the TC-compat ZRANGE issue even if no stencil is used.
+       */
+      return !vk_format_has_stencil(image->vk_format) && !radv_image_is_tc_compat_htile(image);
+   }
 }
 
 static inline bool
 radv_image_has_clear_value(const struct radv_image *image)
 {
-	return image->clear_value_offset != 0;
+   return image->clear_value_offset != 0;
 }
 
 static inline uint64_t
-radv_image_get_fast_clear_va(const struct radv_image *image,
-			     uint32_t base_level)
+radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level)
 {
-	assert(radv_image_has_clear_value(image));
+   assert(radv_image_has_clear_value(image));
 
-	uint64_t va = radv_buffer_get_va(image->bo);
-	va += image->offset + image->clear_value_offset + base_level * 8;
-	return va;
+   uint64_t va = radv_buffer_get_va(image->bo);
+   va += image->offset + image->clear_value_offset + base_level * 8;
+   return va;
 }
 
 static inline uint64_t
-radv_image_get_fce_pred_va(const struct radv_image *image,
-			   uint32_t base_level)
+radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level)
 {
-	assert(image->fce_pred_offset != 0);
+   assert(image->fce_pred_offset != 0);
 
-	uint64_t va = radv_buffer_get_va(image->bo);
-	va += image->offset + image->fce_pred_offset + base_level * 8;
-	return va;
+   uint64_t va = radv_buffer_get_va(image->bo);
+   va += image->offset + image->fce_pred_offset + base_level * 8;
+   return va;
 }
 
 static inline uint64_t
-radv_image_get_dcc_pred_va(const struct radv_image *image,
-			   uint32_t base_level)
+radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level)
 {
-	assert(image->dcc_pred_offset != 0);
+   assert(image->dcc_pred_offset != 0);
 
-	uint64_t va = radv_buffer_get_va(image->bo);
-	va += image->offset + image->dcc_pred_offset + base_level * 8;
-	return va;
+   uint64_t va = radv_buffer_get_va(image->bo);
+   va += image->offset + image->dcc_pred_offset + base_level * 8;
+   return va;
 }
 
 static inline uint64_t
-radv_get_tc_compat_zrange_va(const struct radv_image *image,
-			     uint32_t base_level)
+radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level)
 {
-	assert(image->tc_compat_zrange_offset != 0);
+   assert(image->tc_compat_zrange_offset != 0);
 
-	uint64_t va = radv_buffer_get_va(image->bo);
-	va += image->offset + image->tc_compat_zrange_offset + base_level * 4;
-	return va;
+   uint64_t va = radv_buffer_get_va(image->bo);
+   va += image->offset + image->tc_compat_zrange_offset + base_level * 4;
+   return va;
 }
 
 static inline uint64_t
-radv_get_ds_clear_value_va(const struct radv_image *image,
-			   uint32_t base_level)
+radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level)
 {
-	assert(radv_image_has_clear_value(image));
+   assert(radv_image_has_clear_value(image));
 
-	uint64_t va = radv_buffer_get_va(image->bo);
-	va += image->offset + image->clear_value_offset + base_level * 8;
-	return va;
+   uint64_t va = radv_buffer_get_va(image->bo);
+   va += image->offset + image->clear_value_offset + base_level * 8;
+   return va;
 }
 
 static inline uint32_t
-radv_get_htile_initial_value(const struct radv_device *device,
-			     const struct radv_image *image)
+radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
 {
-	uint32_t initial_value;
-
-	if (radv_image_tile_stencil_disabled(device, image)) {
-		/* Z only (no stencil):
-		 *
-		 * |31     18|17      4|3     0|
-		 * +---------+---------+-------+
-		 * |  Max Z  |  Min Z  | ZMask |
-		 */
-		initial_value = 0xfffc000f;
-	} else {
-		/* Z and stencil:
-		 *
-		 * |31       12|11 10|9    8|7   6|5   4|3     0|
-		 * +-----------+-----+------+-----+-----+-------+
-		 * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
-		 *
-		 * SR0/SR1 contains the stencil test results. Initializing
-		 * SR0/SR1 to 0x3 means the stencil test result is unknown.
-		 */
-		initial_value = 0xfffff3ff;
-	}
-
-	return initial_value;
+   uint32_t initial_value;
+
+   if (radv_image_tile_stencil_disabled(device, image)) {
+      /* Z only (no stencil):
+       *
+       * |31     18|17      4|3     0|
+       * +---------+---------+-------+
+       * |  Max Z  |  Min Z  | ZMask |
+       */
+      initial_value = 0xfffc000f;
+   } else {
+      /* Z and stencil:
+       *
+       * |31       12|11 10|9    8|7   6|5   4|3     0|
+       * +-----------+-----+------+-----+-----+-------+
+       * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
+       *
+       * SR0/SR1 contains the stencil test results. Initializing
+       * SR0/SR1 to 0x3 means the stencil test result is unknown.
+       */
+      initial_value = 0xfffff3ff;
+   }
+
+   return initial_value;
 }
 
-unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family);
+unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family,
+                                      uint32_t queue_family);
 
 static inline uint32_t
-radv_get_layerCount(const struct radv_image *image,
-		    const VkImageSubresourceRange *range)
+radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range)
 {
-	return range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
-		image->info.array_size - range->baseArrayLayer : range->layerCount;
+   return range->layerCount == VK_REMAINING_ARRAY_LAYERS
+             ? image->info.array_size - range->baseArrayLayer
+             : range->layerCount;
 }
 
 static inline uint32_t
-radv_get_levelCount(const struct radv_image *image,
-		    const VkImageSubresourceRange *range)
+radv_get_levelCount(const struct radv_image *image, const VkImageSubresourceRange *range)
 {
-	return range->levelCount == VK_REMAINING_MIP_LEVELS ?
-		image->info.levels - range->baseMipLevel : range->levelCount;
+   return range->levelCount == VK_REMAINING_MIP_LEVELS ? image->info.levels - range->baseMipLevel
+                                                       : range->levelCount;
 }
 
-bool
-radv_image_is_renderable(struct radv_device *device, struct radv_image *image);
+bool radv_image_is_renderable(struct radv_device *device, struct radv_image *image);
 
 struct radeon_bo_metadata;
-void
-radv_init_metadata(struct radv_device *device,
-		   struct radv_image *image,
-		   struct radeon_bo_metadata *metadata);
+void radv_init_metadata(struct radv_device *device, struct radv_image *image,
+                        struct radeon_bo_metadata *metadata);
 
-void
-radv_image_override_offset_stride(struct radv_device *device,
-                                  struct radv_image *image,
-                                  uint64_t offset, uint32_t stride);
+void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
+                                       uint64_t offset, uint32_t stride);
 
 union radv_descriptor {
-	struct {
-		uint32_t plane0_descriptor[8];
-		uint32_t fmask_descriptor[8];
-	};
-	struct {
-		uint32_t plane_descriptors[3][8];
-	};
+   struct {
+      uint32_t plane0_descriptor[8];
+      uint32_t fmask_descriptor[8];
+   };
+   struct {
+      uint32_t plane_descriptors[3][8];
+   };
 };
 
 struct radv_image_view {
-	struct vk_object_base base;
-	struct radv_image *image; /**< VkImageViewCreateInfo::image */
-	struct radeon_winsys_bo *bo;
+   struct vk_object_base base;
+   struct radv_image *image; /**< VkImageViewCreateInfo::image */
+   struct radeon_winsys_bo *bo;
 
-	VkImageViewType type;
-	VkImageAspectFlags aspect_mask;
-	VkFormat vk_format;
-	unsigned plane_id;
-	bool multiple_planes;
-	uint32_t base_layer;
-	uint32_t layer_count;
-	uint32_t base_mip;
-	uint32_t level_count;
-	VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
+   VkImageViewType type;
+   VkImageAspectFlags aspect_mask;
+   VkFormat vk_format;
+   unsigned plane_id;
+   bool multiple_planes;
+   uint32_t base_layer;
+   uint32_t layer_count;
+   uint32_t base_mip;
+   uint32_t level_count;
+   VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
 
-	/* Whether the image iview supports fast clear. */
-	bool support_fast_clear;
+   /* Whether the image iview supports fast clear. */
+   bool support_fast_clear;
 
-	union radv_descriptor descriptor;
+   union radv_descriptor descriptor;
 
-	/* Descriptor for use as a storage image as opposed to a sampled image.
-	 * This has a few differences for cube maps (e.g. type).
-	 */
-	union radv_descriptor storage_descriptor;
+   /* Descriptor for use as a storage image as opposed to a sampled image.
+    * This has a few differences for cube maps (e.g. type).
+    */
+   union radv_descriptor storage_descriptor;
 };
 
 struct radv_image_create_info {
-	const VkImageCreateInfo *vk_info;
-	bool scanout;
-	bool no_metadata_planes;
-	const struct radeon_bo_metadata *bo_metadata;
+   const VkImageCreateInfo *vk_info;
+   bool scanout;
+   bool no_metadata_planes;
+   const struct radeon_bo_metadata *bo_metadata;
 };
 
 VkResult
-radv_image_create_layout(struct radv_device *device,
-                         struct radv_image_create_info create_info,
+radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
                          const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
                          struct radv_image *image);
 
-VkResult radv_image_create(VkDevice _device,
-			   const struct radv_image_create_info *info,
-			   const VkAllocationCallbacks* alloc,
-			   VkImage *pImage);
+VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
+                           const VkAllocationCallbacks *alloc, VkImage *pImage);
 
-bool
-radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev,
-                                const void *pNext, VkFormat format,
-                                VkImageCreateFlags flags);
+bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
+                                     VkFormat format, VkImageCreateFlags flags);
 
 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
 
-VkResult
-radv_image_from_gralloc(VkDevice device_h,
-                       const VkImageCreateInfo *base_info,
-                       const VkNativeBufferANDROID *gralloc_info,
-                       const VkAllocationCallbacks *alloc,
-                       VkImage *out_image_h);
-uint64_t
-radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
-                             const VkImageUsageFlags vk_usage);
-VkResult
-radv_import_ahb_memory(struct radv_device *device,
-                       struct radv_device_memory *mem,
-                       unsigned priority,
-                       const VkImportAndroidHardwareBufferInfoANDROID *info);
-VkResult
-radv_create_ahb_memory(struct radv_device *device,
-                       struct radv_device_memory *mem,
-                       unsigned priority,
-                       const VkMemoryAllocateInfo *pAllocateInfo);
+VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
+                                 const VkNativeBufferANDROID *gralloc_info,
+                                 const VkAllocationCallbacks *alloc, VkImage *out_image_h);
+uint64_t radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
+                                      const VkImageUsageFlags vk_usage);
+VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
+                                unsigned priority,
+                                const VkImportAndroidHardwareBufferInfoANDROID *info);
+VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
+                                unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo);
 
-VkFormat
-radv_select_android_external_format(const void *next, VkFormat default_format);
+VkFormat radv_select_android_external_format(const void *next, VkFormat default_format);
 
 bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
 
 struct radv_image_view_extra_create_info {
-	bool disable_compression;
+   bool disable_compression;
 };
 
-void radv_image_view_init(struct radv_image_view *view,
-			  struct radv_device *device,
-			  const VkImageViewCreateInfo *pCreateInfo,
-			  const struct radv_image_view_extra_create_info* extra_create_info);
+void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
+                          const VkImageViewCreateInfo *pCreateInfo,
+                          const struct radv_image_view_extra_create_info *extra_create_info);
 
 VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
 
 struct radv_sampler_ycbcr_conversion {
-	struct vk_object_base base;
-	VkFormat format;
-	VkSamplerYcbcrModelConversion ycbcr_model;
-	VkSamplerYcbcrRange ycbcr_range;
-	VkComponentMapping components;
-	VkChromaLocation chroma_offsets[2];
-	VkFilter chroma_filter;
+   struct vk_object_base base;
+   VkFormat format;
+   VkSamplerYcbcrModelConversion ycbcr_model;
+   VkSamplerYcbcrRange ycbcr_range;
+   VkComponentMapping components;
+   VkChromaLocation chroma_offsets[2];
+   VkFilter chroma_filter;
 };
 
 struct radv_buffer_view {
-	struct vk_object_base base;
-	struct radeon_winsys_bo *bo;
-	VkFormat vk_format;
-	uint64_t range; /**< VkBufferViewCreateInfo::range */
-	uint32_t state[4];
+   struct vk_object_base base;
+   struct radeon_winsys_bo *bo;
+   VkFormat vk_format;
+   uint64_t range; /**< VkBufferViewCreateInfo::range */
+   uint32_t state[4];
 };
-void radv_buffer_view_init(struct radv_buffer_view *view,
-			   struct radv_device *device,
-			   const VkBufferViewCreateInfo* pCreateInfo);
+void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
+                           const VkBufferViewCreateInfo *pCreateInfo);
 
 static inline struct VkExtent3D
-radv_sanitize_image_extent(const VkImageType imageType,
-			   const struct VkExtent3D imageExtent)
+radv_sanitize_image_extent(const VkImageType imageType, const struct VkExtent3D imageExtent)
 {
-	switch (imageType) {
-	case VK_IMAGE_TYPE_1D:
-		return (VkExtent3D) { imageExtent.width, 1, 1 };
-	case VK_IMAGE_TYPE_2D:
-		return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
-	case VK_IMAGE_TYPE_3D:
-		return imageExtent;
-	default:
-		unreachable("invalid image type");
-	}
+   switch (imageType) {
+   case VK_IMAGE_TYPE_1D:
+      return (VkExtent3D){imageExtent.width, 1, 1};
+   case VK_IMAGE_TYPE_2D:
+      return (VkExtent3D){imageExtent.width, imageExtent.height, 1};
+   case VK_IMAGE_TYPE_3D:
+      return imageExtent;
+   default:
+      unreachable("invalid image type");
+   }
 }
 
 static inline struct VkOffset3D
-radv_sanitize_image_offset(const VkImageType imageType,
-			   const struct VkOffset3D imageOffset)
+radv_sanitize_image_offset(const VkImageType imageType, const struct VkOffset3D imageOffset)
 {
-	switch (imageType) {
-	case VK_IMAGE_TYPE_1D:
-		return (VkOffset3D) { imageOffset.x, 0, 0 };
-	case VK_IMAGE_TYPE_2D:
-		return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
-	case VK_IMAGE_TYPE_3D:
-		return imageOffset;
-	default:
-		unreachable("invalid image type");
-	}
+   switch (imageType) {
+   case VK_IMAGE_TYPE_1D:
+      return (VkOffset3D){imageOffset.x, 0, 0};
+   case VK_IMAGE_TYPE_2D:
+      return (VkOffset3D){imageOffset.x, imageOffset.y, 0};
+   case VK_IMAGE_TYPE_3D:
+      return imageOffset;
+   default:
+      unreachable("invalid image type");
+   }
 }
 
 static inline bool
-radv_image_extent_compare(const struct radv_image *image,
-			  const VkExtent3D *extent)
+radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent)
 {
-	if (extent->width != image->info.width ||
-	    extent->height != image->info.height ||
-	    extent->depth != image->info.depth)
-		return false;
-	return true;
+   if (extent->width != image->info.width || extent->height != image->info.height ||
+       extent->depth != image->info.depth)
+      return false;
+   return true;
 }
 
 struct radv_sampler {
-	struct vk_object_base base;
-	uint32_t state[4];
-	struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
-	uint32_t border_color_slot;
+   struct vk_object_base base;
+   uint32_t state[4];
+   struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
+   uint32_t border_color_slot;
 };
 
 struct radv_framebuffer {
-	struct vk_object_base                        base;
-	uint32_t                                     width;
-	uint32_t                                     height;
-	uint32_t                                     layers;
+   struct vk_object_base base;
+   uint32_t width;
+   uint32_t height;
+   uint32_t layers;
 
-	bool                                         imageless;
+   bool imageless;
 
-	uint32_t                                     attachment_count;
-	struct radv_image_view                       *attachments[0];
+   uint32_t attachment_count;
+   struct radv_image_view *attachments[0];
 };
 
 struct radv_subpass_barrier {
-	VkPipelineStageFlags src_stage_mask;
-	VkAccessFlags        src_access_mask;
-	VkAccessFlags        dst_access_mask;
+   VkPipelineStageFlags src_stage_mask;
+   VkAccessFlags src_access_mask;
+   VkAccessFlags dst_access_mask;
 };
 
 void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
-			  const struct radv_subpass_barrier *barrier);
+                          const struct radv_subpass_barrier *barrier);
 
 struct radv_subpass_attachment {
-	uint32_t         attachment;
-	VkImageLayout    layout;
-	VkImageLayout    stencil_layout;
-	bool             in_render_loop;
+   uint32_t attachment;
+   VkImageLayout layout;
+   VkImageLayout stencil_layout;
+   bool in_render_loop;
 };
 
 struct radv_subpass {
-	uint32_t                                     attachment_count;
-	struct radv_subpass_attachment *             attachments;
+   uint32_t attachment_count;
+   struct radv_subpass_attachment *attachments;
 
-	uint32_t                                     input_count;
-	uint32_t                                     color_count;
-	struct radv_subpass_attachment *             input_attachments;
-	struct radv_subpass_attachment *             color_attachments;
-	struct radv_subpass_attachment *             resolve_attachments;
-	struct radv_subpass_attachment *             depth_stencil_attachment;
-	struct radv_subpass_attachment *             ds_resolve_attachment;
-	VkResolveModeFlagBits                        depth_resolve_mode;
-	VkResolveModeFlagBits                        stencil_resolve_mode;
+   uint32_t input_count;
+   uint32_t color_count;
+   struct radv_subpass_attachment *input_attachments;
+   struct radv_subpass_attachment *color_attachments;
+   struct radv_subpass_attachment *resolve_attachments;
+   struct radv_subpass_attachment *depth_stencil_attachment;
+   struct radv_subpass_attachment *ds_resolve_attachment;
+   VkResolveModeFlagBits depth_resolve_mode;
+   VkResolveModeFlagBits stencil_resolve_mode;
 
-	/** Subpass has at least one color resolve attachment */
-	bool                                         has_color_resolve;
+   /** Subpass has at least one color resolve attachment */
+   bool has_color_resolve;
 
-	/** Subpass has at least one color attachment */
-	bool                                         has_color_att;
+   /** Subpass has at least one color attachment */
+   bool has_color_att;
 
-	struct radv_subpass_barrier                  start_barrier;
+   struct radv_subpass_barrier start_barrier;
 
-	uint32_t                                     view_mask;
+   uint32_t view_mask;
 
-	VkSampleCountFlagBits                        color_sample_count;
-	VkSampleCountFlagBits                        depth_sample_count;
-	VkSampleCountFlagBits                        max_sample_count;
+   VkSampleCountFlagBits color_sample_count;
+   VkSampleCountFlagBits depth_sample_count;
+   VkSampleCountFlagBits max_sample_count;
 
-	/* Whether the subpass has ingoing/outgoing external dependencies. */
-	bool                                         has_ingoing_dep;
-	bool                                         has_outgoing_dep;
+   /* Whether the subpass has ingoing/outgoing external dependencies. */
+   bool has_ingoing_dep;
+   bool has_outgoing_dep;
 };
 
-uint32_t
-radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer);
+uint32_t radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer);
 
 struct radv_render_pass_attachment {
-	VkFormat                                     format;
-	uint32_t                                     samples;
-	VkAttachmentLoadOp                           load_op;
-	VkAttachmentLoadOp                           stencil_load_op;
-	VkImageLayout                                initial_layout;
-	VkImageLayout                                final_layout;
-	VkImageLayout                                stencil_initial_layout;
-	VkImageLayout                                stencil_final_layout;
+   VkFormat format;
+   uint32_t samples;
+   VkAttachmentLoadOp load_op;
+   VkAttachmentLoadOp stencil_load_op;
+   VkImageLayout initial_layout;
+   VkImageLayout final_layout;
+   VkImageLayout stencil_initial_layout;
+   VkImageLayout stencil_final_layout;
 
-	/* The subpass id in which the attachment will be used first/last. */
-	uint32_t				     first_subpass_idx;
-	uint32_t                                     last_subpass_idx;
+   /* The subpass id in which the attachment will be used first/last. */
+   uint32_t first_subpass_idx;
+   uint32_t last_subpass_idx;
 };
 
 struct radv_render_pass {
-	struct vk_object_base                        base;
-	uint32_t                                     attachment_count;
-	uint32_t                                     subpass_count;
-	struct radv_subpass_attachment *             subpass_attachments;
-	struct radv_render_pass_attachment *         attachments;
-	struct radv_subpass_barrier                  end_barrier;
-	struct radv_subpass                          subpasses[0];
+   struct vk_object_base base;
+   uint32_t attachment_count;
+   uint32_t subpass_count;
+   struct radv_subpass_attachment *subpass_attachments;
+   struct radv_render_pass_attachment *attachments;
+   struct radv_subpass_barrier end_barrier;
+   struct radv_subpass subpasses[0];
 };
 
 VkResult radv_device_init_meta(struct radv_device *device);
 void radv_device_finish_meta(struct radv_device *device);
 
 struct radv_query_pool {
-	struct vk_object_base base;
-	struct radeon_winsys_bo *bo;
-	uint32_t stride;
-	uint32_t availability_offset;
-	uint64_t size;
-	char *ptr;
-	VkQueryType type;
-	uint32_t pipeline_stats_mask;
+   struct vk_object_base base;
+   struct radeon_winsys_bo *bo;
+   uint32_t stride;
+   uint32_t availability_offset;
+   uint64_t size;
+   char *ptr;
+   VkQueryType type;
+   uint32_t pipeline_stats_mask;
 };
 
 typedef enum {
-	RADV_SEMAPHORE_NONE,
-	RADV_SEMAPHORE_SYNCOBJ,
-	RADV_SEMAPHORE_TIMELINE_SYNCOBJ,
-	RADV_SEMAPHORE_TIMELINE,
+   RADV_SEMAPHORE_NONE,
+   RADV_SEMAPHORE_SYNCOBJ,
+   RADV_SEMAPHORE_TIMELINE_SYNCOBJ,
+   RADV_SEMAPHORE_TIMELINE,
 } radv_semaphore_kind;
 
 struct radv_deferred_queue_submission;
 
 struct radv_timeline_waiter {
-	struct list_head list;
-	struct radv_deferred_queue_submission *submission;
-	uint64_t value;
+   struct list_head list;
+   struct radv_deferred_queue_submission *submission;
+   uint64_t value;
 };
 
 struct radv_timeline_point {
-	struct list_head list;
+   struct list_head list;
 
-	uint64_t value;
-	uint32_t syncobj;
+   uint64_t value;
+   uint32_t syncobj;
 
-	/* Separate from the list to accomodate CPU wait being async, as well
-	 * as prevent point deletion during submission. */
-	unsigned wait_count;
+   /* Separate from the list to accomodate CPU wait being async, as well
+    * as prevent point deletion during submission. */
+   unsigned wait_count;
 };
 
 struct radv_timeline {
-	mtx_t mutex;
+   mtx_t mutex;
 
-	uint64_t highest_signaled;
-	uint64_t highest_submitted;
+   uint64_t highest_signaled;
+   uint64_t highest_submitted;
 
-	struct list_head points;
+   struct list_head points;
 
-	/* Keep free points on hand so we do not have to recreate syncobjs all
-	 * the time. */
-	struct list_head free_points;
+   /* Keep free points on hand so we do not have to recreate syncobjs all
+    * the time. */
+   struct list_head free_points;
 
-	/* Submissions that are deferred waiting for a specific value to be
-	 * submitted. */
-	struct list_head waiters;
+   /* Submissions that are deferred waiting for a specific value to be
+    * submitted. */
+   struct list_head waiters;
 };
 
 struct radv_timeline_syncobj {
-	/* Keep syncobj first, so common-code can just handle this as
-	 * non-timeline syncobj. */
-	uint32_t syncobj;
-	uint64_t max_point; /* max submitted point. */
+   /* Keep syncobj first, so common-code can just handle this as
+    * non-timeline syncobj. */
+   uint32_t syncobj;
+   uint64_t max_point; /* max submitted point. */
 };
 
 struct radv_semaphore_part {
-	radv_semaphore_kind kind;
-	union {
-		uint32_t syncobj;
-		struct radv_timeline timeline;
-		struct radv_timeline_syncobj timeline_syncobj;
-	};
+   radv_semaphore_kind kind;
+   union {
+      uint32_t syncobj;
+      struct radv_timeline timeline;
+      struct radv_timeline_syncobj timeline_syncobj;
+   };
 };
 
 struct radv_semaphore {
-	struct vk_object_base base;
-	struct radv_semaphore_part permanent;
-	struct radv_semaphore_part temporary;
-};
-
-bool radv_queue_internal_submit(struct radv_queue *queue,
-				struct radeon_cmdbuf *cs);
-
-void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
-			     VkPipelineBindPoint bind_point,
-			     struct radv_descriptor_set *set,
-			     unsigned idx);
-
-void
-radv_update_descriptor_sets(struct radv_device *device,
-                            struct radv_cmd_buffer *cmd_buffer,
-                            VkDescriptorSet overrideSet,
-                            uint32_t descriptorWriteCount,
-                            const VkWriteDescriptorSet *pDescriptorWrites,
-                            uint32_t descriptorCopyCount,
-                            const VkCopyDescriptorSet *pDescriptorCopies);
-
-void
-radv_update_descriptor_set_with_template(struct radv_device *device,
-                                         struct radv_cmd_buffer *cmd_buffer,
-                                         struct radv_descriptor_set *set,
-                                         VkDescriptorUpdateTemplate descriptorUpdateTemplate,
-                                         const void *pData);
+   struct vk_object_base base;
+   struct radv_semaphore_part permanent;
+   struct radv_semaphore_part temporary;
+};
+
+bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
+
+void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
+                             struct radv_descriptor_set *set, unsigned idx);
+
+void radv_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+                                 VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
+                                 const VkWriteDescriptorSet *pDescriptorWrites,
+                                 uint32_t descriptorCopyCount,
+                                 const VkCopyDescriptorSet *pDescriptorCopies);
+
+void radv_update_descriptor_set_with_template(struct radv_device *device,
+                                              struct radv_cmd_buffer *cmd_buffer,
+                                              struct radv_descriptor_set *set,
+                                              VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+                                              const void *pData);
 
 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
-                                   VkPipelineBindPoint pipelineBindPoint,
-                                   VkPipelineLayout _layout,
-                                   uint32_t set,
-                                   uint32_t descriptorWriteCount,
+                                   VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
+                                   uint32_t set, uint32_t descriptorWriteCount,
                                    const VkWriteDescriptorSet *pDescriptorWrites);
 
-uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer,
-		       struct radv_image *image,
-		       const VkImageSubresourceRange *range,
-		       uint32_t value);
+uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                       const VkImageSubresourceRange *range, uint32_t value);
 
-uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer,
-			 struct radv_image *image,
-			 const VkImageSubresourceRange *range);
+uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                         const VkImageSubresourceRange *range);
 
 typedef enum {
-	RADV_FENCE_NONE,
-	RADV_FENCE_SYNCOBJ,
+   RADV_FENCE_NONE,
+   RADV_FENCE_SYNCOBJ,
 } radv_fence_kind;
 
 struct radv_fence_part {
-	radv_fence_kind kind;
+   radv_fence_kind kind;
 
-	/* DRM syncobj handle for syncobj-based fences. */
-	uint32_t syncobj;
+   /* DRM syncobj handle for syncobj-based fences. */
+   uint32_t syncobj;
 };
 
 struct radv_fence {
-	struct vk_object_base base;
-	struct radv_fence_part permanent;
-	struct radv_fence_part temporary;
+   struct vk_object_base base;
+   struct radv_fence_part permanent;
+   struct radv_fence_part temporary;
 };
 
 /* radv_nir_to_llvm.c */
 struct radv_shader_args;
 
-void llvm_compile_shader(struct radv_device *device,
-			 unsigned shader_count,
-			 struct nir_shader *const *shaders,
-			 struct radv_shader_binary **binary,
-			 struct radv_shader_args *args);
+void llvm_compile_shader(struct radv_device *device, unsigned shader_count,
+                         struct nir_shader *const *shaders, struct radv_shader_binary **binary,
+                         struct radv_shader_args *args);
 
-unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
-					 gl_shader_stage stage,
-					 const struct nir_shader *nir);
+unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
+                                         const struct nir_shader *nir);
 
 /* radv_shader_info.h */
 struct radv_shader_info;
 struct radv_shader_variant_key;
 
 void radv_nir_shader_info_pass(const struct nir_shader *nir,
-			       const struct radv_pipeline_layout *layout,
-			       const struct radv_shader_variant_key *key,
-			       struct radv_shader_info *info);
+                               const struct radv_pipeline_layout *layout,
+                               const struct radv_shader_variant_key *key,
+                               struct radv_shader_info *info);
 
 void radv_nir_shader_info_init(struct radv_shader_info *info);
 
@@ -2585,27 +2458,25 @@ bool radv_thread_trace_init(struct radv_device *device);
 void radv_thread_trace_finish(struct radv_device *device);
 bool radv_begin_thread_trace(struct radv_queue *queue);
 bool radv_end_thread_trace(struct radv_queue *queue);
-bool radv_get_thread_trace(struct radv_queue *queue,
-			   struct ac_thread_trace *thread_trace);
-void radv_emit_thread_trace_userdata(const struct radv_device *device,
-				     struct radeon_cmdbuf *cs,
-				     const void *data, uint32_t num_dwords);
+bool radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace);
+void radv_emit_thread_trace_userdata(const struct radv_device *device, struct radeon_cmdbuf *cs,
+                                     const void *data, uint32_t num_dwords);
 /* radv_sqtt_layer_.c */
 struct radv_barrier_data {
-	union {
-		struct {
-			uint16_t depth_stencil_expand : 1;
-			uint16_t htile_hiz_range_expand : 1;
-			uint16_t depth_stencil_resummarize : 1;
-			uint16_t dcc_decompress : 1;
-			uint16_t fmask_decompress : 1;
-			uint16_t fast_clear_eliminate : 1;
-			uint16_t fmask_color_expand : 1;
-			uint16_t init_mask_ram : 1;
-			uint16_t reserved : 8;
-		};
-		uint16_t all;
-	} layout_transitions;
+   union {
+      struct {
+         uint16_t depth_stencil_expand : 1;
+         uint16_t htile_hiz_range_expand : 1;
+         uint16_t depth_stencil_resummarize : 1;
+         uint16_t dcc_decompress : 1;
+         uint16_t fmask_decompress : 1;
+         uint16_t fast_clear_eliminate : 1;
+         uint16_t fmask_color_expand : 1;
+         uint16_t init_mask_ram : 1;
+         uint16_t reserved : 8;
+      };
+      uint16_t all;
+   } layout_transitions;
 };
 
 /**
@@ -2613,23 +2484,23 @@ struct radv_barrier_data {
  * the Vulkan client (does not include PAL-defined values). (Table 15)
  */
 enum rgp_barrier_reason {
-	RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
+   RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
 
-	/* External app-generated barrier reasons, i.e. API synchronization
-	 * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
-	 */
-	RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
-	RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC	  = 0x00000002,
-	RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS	  = 0x00000003,
+   /* External app-generated barrier reasons, i.e. API synchronization
+    * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
+    */
+   RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
+   RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
+   RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
 
-	/* Internal barrier reasons, i.e. implicit synchronization inserted by
-	 * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
-	 */
-	RGP_BARRIER_INTERNAL_BASE                             = 0xC0000000,
-	RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC        = RGP_BARRIER_INTERNAL_BASE + 0,
-	RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC       = RGP_BARRIER_INTERNAL_BASE + 1,
-	RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL	      = RGP_BARRIER_INTERNAL_BASE + 2,
-	RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
+   /* Internal barrier reasons, i.e. implicit synchronization inserted by
+    * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
+    */
+   RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
+   RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
+   RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
+   RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
+   RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
 };
 
 void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
@@ -2637,106 +2508,107 @@ void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
 void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
 void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z);
 void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
-					   VkImageAspectFlagBits aspects);
+                                           VkImageAspectFlagBits aspects);
 void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
 void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
 void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
 void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
-				 enum rgp_barrier_reason reason);
+                                 enum rgp_barrier_reason reason);
 void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
 void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
 void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
-				     const struct radv_barrier_data *barrier);
+                                     const struct radv_barrier_data *barrier);
 
 uint64_t radv_get_current_time(void);
 
 static inline uint32_t
 si_conv_gl_prim_to_vertices(unsigned gl_prim)
 {
-	switch (gl_prim) {
-	case 0: /* GL_POINTS */
-		return 1;
-	case 1: /* GL_LINES */
-	case 3: /* GL_LINE_STRIP */
-		return 2;
-	case 4: /* GL_TRIANGLES */
-	case 5: /* GL_TRIANGLE_STRIP */
-		return 3;
-	case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */
-		return 4;
-	case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */
-		return 6;
-	case 7: /* GL_QUADS */
-		return V_028A6C_TRISTRIP;
-	default:
-		assert(0);
-		return 0;
-	}
+   switch (gl_prim) {
+   case 0: /* GL_POINTS */
+      return 1;
+   case 1: /* GL_LINES */
+   case 3: /* GL_LINE_STRIP */
+      return 2;
+   case 4: /* GL_TRIANGLES */
+   case 5: /* GL_TRIANGLE_STRIP */
+      return 3;
+   case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */
+      return 4;
+   case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */
+      return 6;
+   case 7: /* GL_QUADS */
+      return V_028A6C_TRISTRIP;
+   default:
+      assert(0);
+      return 0;
+   }
 }
 
-
 struct radv_extra_render_pass_begin_info {
-	bool disable_dcc;
+   bool disable_dcc;
 };
 
 void radv_cmd_buffer_begin_render_pass(struct radv_cmd_buffer *cmd_buffer,
-				       const VkRenderPassBeginInfo *pRenderPassBegin,
-				       const struct radv_extra_render_pass_begin_info *extra_info);
+                                       const VkRenderPassBeginInfo *pRenderPassBegin,
+                                       const struct radv_extra_render_pass_begin_info *extra_info);
 void radv_cmd_buffer_end_render_pass(struct radv_cmd_buffer *cmd_buffer);
 
-static inline uint32_t si_translate_prim(unsigned topology)
+static inline uint32_t
+si_translate_prim(unsigned topology)
 {
-	switch (topology) {
-	case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
-		return V_008958_DI_PT_POINTLIST;
-	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
-		return V_008958_DI_PT_LINELIST;
-	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
-		return V_008958_DI_PT_LINESTRIP;
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
-		return V_008958_DI_PT_TRILIST;
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
-		return V_008958_DI_PT_TRISTRIP;
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
-		return V_008958_DI_PT_TRIFAN;
-	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
-		return V_008958_DI_PT_LINELIST_ADJ;
-	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
-		return V_008958_DI_PT_LINESTRIP_ADJ;
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
-		return V_008958_DI_PT_TRILIST_ADJ;
-	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
-		return V_008958_DI_PT_TRISTRIP_ADJ;
-	case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
-		return V_008958_DI_PT_PATCH;
-	default:
-		assert(0);
-		return 0;
-	}
+   switch (topology) {
+   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+      return V_008958_DI_PT_POINTLIST;
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+      return V_008958_DI_PT_LINELIST;
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+      return V_008958_DI_PT_LINESTRIP;
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+      return V_008958_DI_PT_TRILIST;
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+      return V_008958_DI_PT_TRISTRIP;
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+      return V_008958_DI_PT_TRIFAN;
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+      return V_008958_DI_PT_LINELIST_ADJ;
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+      return V_008958_DI_PT_LINESTRIP_ADJ;
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+      return V_008958_DI_PT_TRILIST_ADJ;
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+      return V_008958_DI_PT_TRISTRIP_ADJ;
+   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+      return V_008958_DI_PT_PATCH;
+   default:
+      assert(0);
+      return 0;
+   }
 }
 
-static inline uint32_t si_translate_stencil_op(enum VkStencilOp op)
+static inline uint32_t
+si_translate_stencil_op(enum VkStencilOp op)
 {
-	switch (op) {
-	case VK_STENCIL_OP_KEEP:
-		return V_02842C_STENCIL_KEEP;
-	case VK_STENCIL_OP_ZERO:
-		return V_02842C_STENCIL_ZERO;
-	case VK_STENCIL_OP_REPLACE:
-		return V_02842C_STENCIL_REPLACE_TEST;
-	case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
-		return V_02842C_STENCIL_ADD_CLAMP;
-	case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
-		return V_02842C_STENCIL_SUB_CLAMP;
-	case VK_STENCIL_OP_INVERT:
-		return V_02842C_STENCIL_INVERT;
-	case VK_STENCIL_OP_INCREMENT_AND_WRAP:
-		return V_02842C_STENCIL_ADD_WRAP;
-	case VK_STENCIL_OP_DECREMENT_AND_WRAP:
-		return V_02842C_STENCIL_SUB_WRAP;
-	default:
-		return 0;
-	}
+   switch (op) {
+   case VK_STENCIL_OP_KEEP:
+      return V_02842C_STENCIL_KEEP;
+   case VK_STENCIL_OP_ZERO:
+      return V_02842C_STENCIL_ZERO;
+   case VK_STENCIL_OP_REPLACE:
+      return V_02842C_STENCIL_REPLACE_TEST;
+   case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
+      return V_02842C_STENCIL_ADD_CLAMP;
+   case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
+      return V_02842C_STENCIL_SUB_CLAMP;
+   case VK_STENCIL_OP_INVERT:
+      return V_02842C_STENCIL_INVERT;
+   case VK_STENCIL_OP_INCREMENT_AND_WRAP:
+      return V_02842C_STENCIL_ADD_WRAP;
+   case VK_STENCIL_OP_DECREMENT_AND_WRAP:
+      return V_02842C_STENCIL_SUB_WRAP;
+   default:
+      return 0;
+   }
 }
 
 /**
@@ -2746,39 +2618,35 @@ static inline uint32_t si_translate_stencil_op(enum VkStencilOp op)
 static inline bool
 radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage)
 {
-	return device->physical_device->use_llvm;
+   return device->physical_device->use_llvm;
 }
 
-#define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)		\
-								\
-	static inline struct __radv_type *			\
-	__radv_type ## _from_handle(__VkType _handle)		\
-	{							\
-		return (struct __radv_type *) _handle;		\
-	}							\
-								\
-	static inline __VkType					\
-	__radv_type ## _to_handle(struct __radv_type *_obj)	\
-	{							\
-		return (__VkType) _obj;				\
-	}
-
-#define RADV_DEFINE_NONDISP_HANDLE_CASTS(__radv_type, __VkType)		\
-									\
-	static inline struct __radv_type *				\
-	__radv_type ## _from_handle(__VkType _handle)			\
-	{								\
-		return (struct __radv_type *)(uintptr_t) _handle;	\
-	}								\
-									\
-	static inline __VkType						\
-	__radv_type ## _to_handle(struct __radv_type *_obj)		\
-	{								\
-		return (__VkType)(uintptr_t) _obj;			\
-	}
-
-#define RADV_FROM_HANDLE(__radv_type, __name, __handle)			\
-	struct __radv_type *__name = __radv_type ## _from_handle(__handle)
+#define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)                                            \
+                                                                                                   \
+   static inline struct __radv_type *__radv_type##_from_handle(__VkType _handle)                   \
+   {                                                                                               \
+      return (struct __radv_type *)_handle;                                                        \
+   }                                                                                               \
+                                                                                                   \
+   static inline __VkType __radv_type##_to_handle(struct __radv_type *_obj)                        \
+   {                                                                                               \
+      return (__VkType)_obj;                                                                       \
+   }
+
+#define RADV_DEFINE_NONDISP_HANDLE_CASTS(__radv_type, __VkType)                                    \
+                                                                                                   \
+   static inline struct __radv_type *__radv_type##_from_handle(__VkType _handle)                   \
+   {                                                                                               \
+      return (struct __radv_type *)(uintptr_t)_handle;                                             \
+   }                                                                                               \
+                                                                                                   \
+   static inline __VkType __radv_type##_to_handle(struct __radv_type *_obj)                        \
+   {                                                                                               \
+      return (__VkType)(uintptr_t)_obj;                                                            \
+   }
+
+#define RADV_FROM_HANDLE(__radv_type, __name, __handle)                                            \
+   struct __radv_type *__name = __radv_type##_from_handle(__handle)
 
 RADV_DEFINE_HANDLE_CASTS(radv_cmd_buffer, VkCommandBuffer)
 RADV_DEFINE_HANDLE_CASTS(radv_device, VkDevice)
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index b4814d24f45..3038cfde6a4 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -24,16 +24,16 @@
  */
 
 #include <assert.h>
+#include <fcntl.h>
 #include <stdbool.h>
 #include <string.h>
-#include <fcntl.h>
 
 #include "nir/nir_builder.h"
+#include "util/u_atomic.h"
+#include "radv_cs.h"
 #include "radv_meta.h"
 #include "radv_private.h"
-#include "radv_cs.h"
 #include "sid.h"
-#include "util/u_atomic.h"
 
 #define TIMESTAMP_NOT_READY UINT64_MAX
 
@@ -43,1763 +43,1631 @@ static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 8
 static unsigned
 radv_get_pipeline_statistics_index(const VkQueryPipelineStatisticFlagBits flag)
 {
-	int offset = ffs(flag) - 1;
-	assert(offset < ARRAY_SIZE(pipeline_statistics_indices));
-	return pipeline_statistics_indices[offset];
+   int offset = ffs(flag) - 1;
+   assert(offset < ARRAY_SIZE(pipeline_statistics_indices));
+   return pipeline_statistics_indices[offset];
 }
 
-static nir_ssa_def *nir_test_flag(nir_builder *b, nir_ssa_def *flags, uint32_t flag)
+static nir_ssa_def *
+nir_test_flag(nir_builder *b, nir_ssa_def *flags, uint32_t flag)
 {
-	return nir_i2b(b, nir_iand(b, flags, nir_imm_int(b, flag)));
+   return nir_i2b(b, nir_iand(b, flags, nir_imm_int(b, flag)));
 }
 
-static void radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count)
+static void
+radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count)
 {
-	nir_ssa_def *counter = nir_load_var(b, var);
+   nir_ssa_def *counter = nir_load_var(b, var);
 
-	nir_push_if(b, nir_uge(b, counter, count));
-	nir_jump(b, nir_jump_break);
-	nir_pop_if(b, NULL);
+   nir_push_if(b, nir_uge(b, counter, count));
+   nir_jump(b, nir_jump_break);
+   nir_pop_if(b, NULL);
 
-	counter = nir_iadd(b, counter, nir_imm_int(b, 1));
-	nir_store_var(b, var, counter, 0x1);
+   counter = nir_iadd(b, counter, nir_imm_int(b, 1));
+   nir_store_var(b, var, counter, 0x1);
 }
 
 static void
 radv_store_availability(nir_builder *b, nir_ssa_def *flags, nir_ssa_def *dst_buf,
                         nir_ssa_def *offset, nir_ssa_def *value32)
 {
-	nir_push_if(b, nir_test_flag(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
+   nir_push_if(b, nir_test_flag(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
 
-	nir_push_if(b, nir_test_flag(b, flags, VK_QUERY_RESULT_64_BIT));
+   nir_push_if(b, nir_test_flag(b, flags, VK_QUERY_RESULT_64_BIT));
 
-	nir_store_ssbo(b, nir_vec2(b, value32, nir_imm_int(b, 0)),
-			  dst_buf, offset, .write_mask=0x3, .align_mul=8);
+   nir_store_ssbo(b, nir_vec2(b, value32, nir_imm_int(b, 0)), dst_buf, offset, .write_mask = 0x3,
+                  .align_mul = 8);
 
-	nir_push_else(b, NULL);
+   nir_push_else(b, NULL);
 
-	nir_store_ssbo(b, value32, dst_buf, offset, .write_mask=0x1, .align_mul=4);
+   nir_store_ssbo(b, value32, dst_buf, offset, .write_mask = 0x1, .align_mul = 4);
 
-	nir_pop_if(b, NULL);
+   nir_pop_if(b, NULL);
 
-	nir_pop_if(b, NULL);
+   nir_pop_if(b, NULL);
 }
 
 static nir_shader *
-build_occlusion_query_shader(struct radv_device *device) {
-	/* the shader this builds is roughly
-	 *
-	 * push constants {
-	 * 	uint32_t flags;
-	 * 	uint32_t dst_stride;
-	 * };
-	 *
-	 * uint32_t src_stride = 16 * db_count;
-	 *
-	 * location(binding = 0) buffer dst_buf;
-	 * location(binding = 1) buffer src_buf;
-	 *
-	 * void main() {
-	 * 	uint64_t result = 0;
-	 * 	uint64_t src_offset = src_stride * global_id.x;
-	 * 	uint64_t dst_offset = dst_stride * global_id.x;
-	 * 	bool available = true;
-	 * 	for (int i = 0; i < db_count; ++i) {
-	 *		if (enabled_rb_mask & (1 << i)) {
-	 *			uint64_t start = src_buf[src_offset + 16 * i];
-	 *			uint64_t end = src_buf[src_offset + 16 * i + 8];
-	 *			if ((start & (1ull << 63)) && (end & (1ull << 63)))
-	 *				result += end - start;
-	 *			else
-	 *				available = false;
-	 *		}
-	 * 	}
-	 * 	uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
-	 * 	if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
-	 * 		if (flags & VK_QUERY_RESULT_64_BIT)
-	 * 			dst_buf[dst_offset] = result;
-	 * 		else
-	 * 			dst_buf[dst_offset] = (uint32_t)result.
-	 * 	}
-	 * 	if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
-	 * 		dst_buf[dst_offset + elem_size] = available;
-	 * 	}
-	 * }
-	 */
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "occlusion_query");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
-
-	nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
-	nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
-	nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
-	nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
-	nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
-	unsigned enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
-	unsigned db_count = device->physical_device->rad_info.max_render_backends;
-
-	nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=16);
-
-	nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
-	nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-	                                        b.shader->info.cs.local_size[0],
-	                                        b.shader->info.cs.local_size[1],
-	                                        b.shader->info.cs.local_size[2], 0);
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-	global_id = nir_channel(&b, global_id, 0); // We only care about x here.
-
-	nir_ssa_def *input_stride = nir_imm_int(&b, db_count * 16);
-	nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
-	nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range=16);
-	nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
-
-
-	nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
-	nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
-	nir_store_var(&b, available, nir_imm_true(&b), 0x1);
-
-	nir_push_loop(&b);
-
-	nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter);
-	radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count));
-
-	nir_ssa_def *enabled_cond =
-		nir_iand(&b, nir_imm_int(&b, enabled_rb_mask),
-			     nir_ishl(&b, nir_imm_int(&b, 1), current_outer_count));
-
-	nir_push_if(&b, nir_i2b(&b, enabled_cond));
-
-	nir_ssa_def *load_offset = nir_imul(&b, current_outer_count, nir_imm_int(&b, 16));
-	load_offset = nir_iadd(&b, input_base, load_offset);
-
-	nir_ssa_def *load = nir_load_ssbo(&b, 2, 64, src_buf, load_offset, .align_mul=16);
-
-	nir_store_var(&b, start, nir_channel(&b, load, 0), 0x1);
-	nir_store_var(&b, end, nir_channel(&b, load, 1), 0x1);
-
-	nir_ssa_def *start_done = nir_ilt(&b, nir_load_var(&b, start), nir_imm_int64(&b, 0));
-	nir_ssa_def *end_done = nir_ilt(&b, nir_load_var(&b, end), nir_imm_int64(&b, 0));
-
-	nir_push_if(&b, nir_iand(&b, start_done, end_done));
-
-	nir_store_var(&b, result,
-	              nir_iadd(&b, nir_load_var(&b, result),
-	                           nir_isub(&b, nir_load_var(&b, end),
-	                                        nir_load_var(&b, start))), 0x1);
-
-	nir_push_else(&b, NULL);
-
-	nir_store_var(&b, available, nir_imm_false(&b), 0x1);
-
-	nir_pop_if(&b, NULL);
-	nir_pop_if(&b, NULL);
-	nir_pop_loop(&b, NULL);
-
-	/* Store the result if complete or if partial results have been requested. */
-
-	nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
-	nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
-	nir_push_if(&b,
-		    nir_ior(&b,
-			    nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
-			    nir_load_var(&b, available)));
-
-	nir_push_if(&b, result_is_64bit);
-
-	nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base,
-			   .write_mask=0x1, .align_mul=8);
+build_occlusion_query_shader(struct radv_device *device)
+{
+   /* the shader this builds is roughly
+    *
+    * push constants {
+    * 	uint32_t flags;
+    * 	uint32_t dst_stride;
+    * };
+    *
+    * uint32_t src_stride = 16 * db_count;
+    *
+    * location(binding = 0) buffer dst_buf;
+    * location(binding = 1) buffer src_buf;
+    *
+    * void main() {
+    * 	uint64_t result = 0;
+    * 	uint64_t src_offset = src_stride * global_id.x;
+    * 	uint64_t dst_offset = dst_stride * global_id.x;
+    * 	bool available = true;
+    * 	for (int i = 0; i < db_count; ++i) {
+    *		if (enabled_rb_mask & (1 << i)) {
+    *			uint64_t start = src_buf[src_offset + 16 * i];
+    *			uint64_t end = src_buf[src_offset + 16 * i + 8];
+    *			if ((start & (1ull << 63)) && (end & (1ull << 63)))
+    *				result += end - start;
+    *			else
+    *				available = false;
+    *		}
+    * 	}
+    * 	uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
+    * 	if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
+    * 		if (flags & VK_QUERY_RESULT_64_BIT)
+    * 			dst_buf[dst_offset] = result;
+    * 		else
+    * 			dst_buf[dst_offset] = (uint32_t)result.
+    * 	}
+    * 	if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+    * 		dst_buf[dst_offset + elem_size] = available;
+    * 	}
+    * }
+    */
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "occlusion_query");
+   b.shader->info.cs.local_size[0] = 64;
+   b.shader->info.cs.local_size[1] = 1;
+   b.shader->info.cs.local_size[2] = 1;
+
+   nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
+   nir_variable *outer_counter =
+      nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
+   nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
+   nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
+   nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
+   unsigned enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
+   unsigned db_count = device->physical_device->rad_info.max_render_backends;
+
+   nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
+
+   nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+   nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+   global_id = nir_channel(&b, global_id, 0); // We only care about x here.
+
+   nir_ssa_def *input_stride = nir_imm_int(&b, db_count * 16);
+   nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
+   nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
+   nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
+
+   nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
+   nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
+   nir_store_var(&b, available, nir_imm_true(&b), 0x1);
+
+   nir_push_loop(&b);
+
+   nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter);
+   radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count));
+
+   nir_ssa_def *enabled_cond = nir_iand(&b, nir_imm_int(&b, enabled_rb_mask),
+                                        nir_ishl(&b, nir_imm_int(&b, 1), current_outer_count));
+
+   nir_push_if(&b, nir_i2b(&b, enabled_cond));
+
+   nir_ssa_def *load_offset = nir_imul(&b, current_outer_count, nir_imm_int(&b, 16));
+   load_offset = nir_iadd(&b, input_base, load_offset);
+
+   nir_ssa_def *load = nir_load_ssbo(&b, 2, 64, src_buf, load_offset, .align_mul = 16);
+
+   nir_store_var(&b, start, nir_channel(&b, load, 0), 0x1);
+   nir_store_var(&b, end, nir_channel(&b, load, 1), 0x1);
+
+   nir_ssa_def *start_done = nir_ilt(&b, nir_load_var(&b, start), nir_imm_int64(&b, 0));
+   nir_ssa_def *end_done = nir_ilt(&b, nir_load_var(&b, end), nir_imm_int64(&b, 0));
+
+   nir_push_if(&b, nir_iand(&b, start_done, end_done));
+
+   nir_store_var(&b, result,
+                 nir_iadd(&b, nir_load_var(&b, result),
+                          nir_isub(&b, nir_load_var(&b, end), nir_load_var(&b, start))),
+                 0x1);
+
+   nir_push_else(&b, NULL);
+
+   nir_store_var(&b, available, nir_imm_false(&b), 0x1);
+
+   nir_pop_if(&b, NULL);
+   nir_pop_if(&b, NULL);
+   nir_pop_loop(&b, NULL);
+
+   /* Store the result if complete or if partial results have been requested. */
+
+   nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
+   nir_ssa_def *result_size =
+      nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
+   nir_push_if(&b, nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
+                           nir_load_var(&b, available)));
+
+   nir_push_if(&b, result_is_64bit);
+
+   nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base, .write_mask = 0x1,
+                  .align_mul = 8);
 
-	nir_push_else(&b, NULL);
+   nir_push_else(&b, NULL);
 
-	nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf,
-			   output_base, .write_mask=0x1, .align_mul=8);
+   nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf, output_base,
+                  .write_mask = 0x1, .align_mul = 8);
 
-	nir_pop_if(&b, NULL);
-	nir_pop_if(&b, NULL);
+   nir_pop_if(&b, NULL);
+   nir_pop_if(&b, NULL);
 
-	radv_store_availability(&b, flags, dst_buf,
-	                        nir_iadd(&b, result_size, output_base),
-	                        nir_b2i32(&b, nir_load_var(&b, available)));
+   radv_store_availability(&b, flags, dst_buf, nir_iadd(&b, result_size, output_base),
+                           nir_b2i32(&b, nir_load_var(&b, available)));
 
-	return b.shader;
+   return b.shader;
 }
 
 static nir_shader *
-build_pipeline_statistics_query_shader(struct radv_device *device) {
-	/* the shader this builds is roughly
-	 *
-	 * push constants {
-	 * 	uint32_t flags;
-	 * 	uint32_t dst_stride;
-	 * 	uint32_t stats_mask;
-	 * 	uint32_t avail_offset;
-	 * };
-	 *
-	 * uint32_t src_stride = pipelinestat_block_size * 2;
-	 *
-	 * location(binding = 0) buffer dst_buf;
-	 * location(binding = 1) buffer src_buf;
-	 *
-	 * void main() {
-	 * 	uint64_t src_offset = src_stride * global_id.x;
-	 * 	uint64_t dst_base = dst_stride * global_id.x;
-	 * 	uint64_t dst_offset = dst_base;
-	 * 	uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
-	 * 	uint32_t elem_count = stats_mask >> 16;
-	 * 	uint32_t available32 = src_buf[avail_offset + 4 * global_id.x];
-	 * 	if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
-	 * 		dst_buf[dst_offset + elem_count * elem_size] = available32;
-	 * 	}
-	 * 	if ((bool)available32) {
-	 * 		// repeat 11 times:
-	 * 		if (stats_mask & (1 << 0)) {
-	 * 			uint64_t start = src_buf[src_offset + 8 * indices[0]];
-	 * 			uint64_t end = src_buf[src_offset + 8 * indices[0] + pipelinestat_block_size];
-	 * 			uint64_t result = end - start;
-	 * 			if (flags & VK_QUERY_RESULT_64_BIT)
-	 * 				dst_buf[dst_offset] = result;
-	 * 			else
-	 * 				dst_buf[dst_offset] = (uint32_t)result.
-	 * 			dst_offset += elem_size;
-	 * 		}
-	 * 	} else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
-	 *              // Set everything to 0 as we don't know what is valid.
-	 * 		for (int i = 0; i < elem_count; ++i)
-	 * 			dst_buf[dst_base + elem_size * i] = 0;
-	 * 	}
-	 * }
-	 */
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "pipeline_statistics_query");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
-
-	nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
-
-	nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=16);
-	nir_ssa_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range=16);
-	nir_ssa_def *avail_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range=16);
-
-	nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
-	nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
-
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-	                                        b.shader->info.cs.local_size[0],
-	                                        b.shader->info.cs.local_size[1],
-	                                        b.shader->info.cs.local_size[2], 0);
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-	global_id = nir_channel(&b, global_id, 0); // We only care about x here.
-
-	nir_ssa_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2);
-	nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
-	nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range=16);
-	nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
-
-
-	avail_offset = nir_iadd(&b, avail_offset,
-	                            nir_imul(&b, global_id, nir_imm_int(&b, 4)));
-
-	nir_ssa_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset, .align_mul=4);
-
-	nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
-	nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
-	nir_ssa_def *elem_count = nir_ushr(&b, stats_mask, nir_imm_int(&b, 16));
-
-	radv_store_availability(&b, flags, dst_buf,
-	                        nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)),
-				available32);
-
-	nir_push_if(&b, nir_i2b(&b, available32));
-
-	nir_store_var(&b, output_offset, output_base, 0x1);
-	for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
-		nir_push_if(&b, nir_test_flag(&b, stats_mask, 1u << i));
-
-		nir_ssa_def *start_offset = nir_iadd(&b, input_base,
-							 nir_imm_int(&b, pipeline_statistics_indices[i] * 8));
-		nir_ssa_def *start = nir_load_ssbo(&b, 1, 64, src_buf, start_offset, .align_mul=8);
-
-		nir_ssa_def *end_offset = nir_iadd(&b, input_base,
-						       nir_imm_int(&b, pipeline_statistics_indices[i] * 8 + pipelinestat_block_size));
-		nir_ssa_def *end = nir_load_ssbo(&b, 1, 64, src_buf, end_offset, .align_mul=8);
-
-		nir_ssa_def *result = nir_isub(&b, end, start);
-
-		/* Store result */
-		nir_push_if(&b, result_is_64bit);
-
-		nir_store_ssbo(&b, result, dst_buf, nir_load_var(&b, output_offset),
-				   .write_mask=0x1, .align_mul=8);
-
-		nir_push_else(&b, NULL);
-
-		nir_store_ssbo(&b, nir_u2u32(&b, result), dst_buf, nir_load_var(&b, output_offset),
-				   .write_mask=0x1, .align_mul=4);
-
-		nir_pop_if(&b, NULL);
-
-		nir_store_var(&b, output_offset,
-		                  nir_iadd(&b, nir_load_var(&b, output_offset),
-		                               elem_size), 0x1);
+build_pipeline_statistics_query_shader(struct radv_device *device)
+{
+   /* the shader this builds is roughly
+    *
+    * push constants {
+    * 	uint32_t flags;
+    * 	uint32_t dst_stride;
+    * 	uint32_t stats_mask;
+    * 	uint32_t avail_offset;
+    * };
+    *
+    * uint32_t src_stride = pipelinestat_block_size * 2;
+    *
+    * location(binding = 0) buffer dst_buf;
+    * location(binding = 1) buffer src_buf;
+    *
+    * void main() {
+    * 	uint64_t src_offset = src_stride * global_id.x;
+    * 	uint64_t dst_base = dst_stride * global_id.x;
+    * 	uint64_t dst_offset = dst_base;
+    * 	uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
+    * 	uint32_t elem_count = stats_mask >> 16;
+    * 	uint32_t available32 = src_buf[avail_offset + 4 * global_id.x];
+    * 	if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+    * 		dst_buf[dst_offset + elem_count * elem_size] = available32;
+    * 	}
+    * 	if ((bool)available32) {
+    * 		// repeat 11 times:
+    * 		if (stats_mask & (1 << 0)) {
+    * 			uint64_t start = src_buf[src_offset + 8 * indices[0]];
+    * 			uint64_t end = src_buf[src_offset + 8 * indices[0] +
+    * pipelinestat_block_size]; uint64_t result = end - start; if (flags & VK_QUERY_RESULT_64_BIT)
+    * 				dst_buf[dst_offset] = result;
+    * 			else
+    * 				dst_buf[dst_offset] = (uint32_t)result.
+    * 			dst_offset += elem_size;
+    * 		}
+    * 	} else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
+    *              // Set everything to 0 as we don't know what is valid.
+    * 		for (int i = 0; i < elem_count; ++i)
+    * 			dst_buf[dst_base + elem_size * i] = 0;
+    * 	}
+    * }
+    */
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "pipeline_statistics_query");
+   b.shader->info.cs.local_size[0] = 64;
+   b.shader->info.cs.local_size[1] = 1;
+   b.shader->info.cs.local_size[2] = 1;
+
+   nir_variable *output_offset =
+      nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
+
+   nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
+   nir_ssa_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 16);
+   nir_ssa_def *avail_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
+
+   nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+   nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
+
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+   global_id = nir_channel(&b, global_id, 0); // We only care about x here.
+
+   nir_ssa_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2);
+   nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
+   nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
+   nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
+
+   avail_offset = nir_iadd(&b, avail_offset, nir_imul(&b, global_id, nir_imm_int(&b, 4)));
+
+   nir_ssa_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset, .align_mul = 4);
+
+   nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
+   nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
+   nir_ssa_def *elem_count = nir_ushr(&b, stats_mask, nir_imm_int(&b, 16));
+
+   radv_store_availability(&b, flags, dst_buf,
+                           nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)),
+                           available32);
+
+   nir_push_if(&b, nir_i2b(&b, available32));
+
+   nir_store_var(&b, output_offset, output_base, 0x1);
+   for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
+      nir_push_if(&b, nir_test_flag(&b, stats_mask, 1u << i));
+
+      nir_ssa_def *start_offset =
+         nir_iadd(&b, input_base, nir_imm_int(&b, pipeline_statistics_indices[i] * 8));
+      nir_ssa_def *start = nir_load_ssbo(&b, 1, 64, src_buf, start_offset, .align_mul = 8);
+
+      nir_ssa_def *end_offset =
+         nir_iadd(&b, input_base,
+                  nir_imm_int(&b, pipeline_statistics_indices[i] * 8 + pipelinestat_block_size));
+      nir_ssa_def *end = nir_load_ssbo(&b, 1, 64, src_buf, end_offset, .align_mul = 8);
+
+      nir_ssa_def *result = nir_isub(&b, end, start);
+
+      /* Store result */
+      nir_push_if(&b, result_is_64bit);
+
+      nir_store_ssbo(&b, result, dst_buf, nir_load_var(&b, output_offset), .write_mask = 0x1,
+                     .align_mul = 8);
+
+      nir_push_else(&b, NULL);
+
+      nir_store_ssbo(&b, nir_u2u32(&b, result), dst_buf, nir_load_var(&b, output_offset),
+                     .write_mask = 0x1, .align_mul = 4);
+
+      nir_pop_if(&b, NULL);
+
+      nir_store_var(&b, output_offset, nir_iadd(&b, nir_load_var(&b, output_offset), elem_size),
+                    0x1);
 
-		nir_pop_if(&b, NULL);
-	}
+      nir_pop_if(&b, NULL);
+   }
 
-	nir_push_else(&b, NULL); /* nir_i2b(&b, available32) */
+   nir_push_else(&b, NULL); /* nir_i2b(&b, available32) */
 
-	nir_push_if(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT));
+   nir_push_if(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT));
 
-	/* Stores zeros in all outputs. */
+   /* Stores zeros in all outputs. */
 
-	nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
-	nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
+   nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
+   nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
 
-	nir_loop *loop = nir_push_loop(&b);
+   nir_loop *loop = nir_push_loop(&b);
 
-	nir_ssa_def *current_counter = nir_load_var(&b, counter);
-	radv_break_on_count(&b, counter, elem_count);
+   nir_ssa_def *current_counter = nir_load_var(&b, counter);
+   radv_break_on_count(&b, counter, elem_count);
 
-	nir_ssa_def *output_elem = nir_iadd(&b, output_base,
-	                                        nir_imul(&b, elem_size, current_counter));
-	nir_push_if(&b, result_is_64bit);
+   nir_ssa_def *output_elem = nir_iadd(&b, output_base, nir_imul(&b, elem_size, current_counter));
+   nir_push_if(&b, result_is_64bit);
 
-	nir_store_ssbo(&b, nir_imm_int64(&b, 0), dst_buf, output_elem,
-			   .write_mask=0x1, .align_mul=8);
+   nir_store_ssbo(&b, nir_imm_int64(&b, 0), dst_buf, output_elem, .write_mask = 0x1,
+                  .align_mul = 8);
 
-	nir_push_else(&b, NULL);
+   nir_push_else(&b, NULL);
 
-	nir_store_ssbo(&b, nir_imm_int(&b, 0), dst_buf, output_elem,
-			   .write_mask=0x1, .align_mul=4);
+   nir_store_ssbo(&b, nir_imm_int(&b, 0), dst_buf, output_elem, .write_mask = 0x1, .align_mul = 4);
 
-	nir_pop_if(&b, NULL);
+   nir_pop_if(&b, NULL);
 
-	nir_pop_loop(&b, loop);
-	nir_pop_if(&b, NULL); /* VK_QUERY_RESULT_PARTIAL_BIT */
-	nir_pop_if(&b, NULL); /* nir_i2b(&b, available32) */
-	return b.shader;
+   nir_pop_loop(&b, loop);
+   nir_pop_if(&b, NULL); /* VK_QUERY_RESULT_PARTIAL_BIT */
+   nir_pop_if(&b, NULL); /* nir_i2b(&b, available32) */
+   return b.shader;
 }
 
 static nir_shader *
 build_tfb_query_shader(struct radv_device *device)
 {
-	/* the shader this builds is roughly
-	 *
-	 * uint32_t src_stride = 32;
-	 *
-	 * location(binding = 0) buffer dst_buf;
-	 * location(binding = 1) buffer src_buf;
-	 *
-	 * void main() {
-	 *	uint64_t result[2] = {};
-	 *	bool available = false;
-	 *	uint64_t src_offset = src_stride * global_id.x;
-	 * 	uint64_t dst_offset = dst_stride * global_id.x;
-	 * 	uint64_t *src_data = src_buf[src_offset];
-	 *	uint32_t avail = (src_data[0] >> 32) &
-	 *			 (src_data[1] >> 32) &
-	 *			 (src_data[2] >> 32) &
-	 *			 (src_data[3] >> 32);
-	 *	if (avail & 0x80000000) {
-	 *		result[0] = src_data[3] - src_data[1];
-	 *		result[1] = src_data[2] - src_data[0];
-	 *		available = true;
-	 *	}
-	 * 	uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 16 : 8;
-	 * 	if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
-	 *		if (flags & VK_QUERY_RESULT_64_BIT) {
-	 *			dst_buf[dst_offset] = result;
-	 *		} else {
-	 *			dst_buf[dst_offset] = (uint32_t)result;
-	 *		}
-	 *	}
-	 *	if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
-	 *		dst_buf[dst_offset + result_size] = available;
-	 * 	}
-	 * }
-	 */
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "tfb_query");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
-
-	/* Create and initialize local variables. */
-	nir_variable *result =
-		nir_local_variable_create(b.impl,
-					  glsl_vector_type(GLSL_TYPE_UINT64, 2),
-					  "result");
-	nir_variable *available =
-		nir_local_variable_create(b.impl, glsl_bool_type(), "available");
-
-	nir_store_var(&b, result,
-		      nir_vec2(&b, nir_imm_int64(&b, 0),
-				   nir_imm_int64(&b, 0)), 0x3);
-	nir_store_var(&b, available, nir_imm_false(&b), 0x1);
-
-	nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=16);
-
-	/* Load resources. */
-	nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
-	nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
-
-	/* Compute global ID. */
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-	                                        b.shader->info.cs.local_size[0],
-	                                        b.shader->info.cs.local_size[1],
-	                                        b.shader->info.cs.local_size[2], 0);
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-	global_id = nir_channel(&b, global_id, 0); // We only care about x here.
-
-	/* Compute src/dst strides. */
-	nir_ssa_def *input_stride = nir_imm_int(&b, 32);
-	nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
-	nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range=16);
-	nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
-
-	/* Load data from the query pool. */
-	nir_ssa_def *load1 = nir_load_ssbo(&b, 4, 32, src_buf, input_base, .align_mul=32);
-	nir_ssa_def *load2 = nir_load_ssbo(&b, 4, 32, src_buf,
-					       nir_iadd(&b, input_base, nir_imm_int(&b, 16)),
-					       .align_mul=16);
-
-	/* Check if result is available. */
-	nir_ssa_def *avails[2];
-	avails[0] = nir_iand(&b, nir_channel(&b, load1, 1),
-				 nir_channel(&b, load1, 3));
-	avails[1] = nir_iand(&b, nir_channel(&b, load2, 1),
-				 nir_channel(&b, load2, 3));
-	nir_ssa_def *result_is_available =
-		nir_i2b(&b, nir_iand(&b, nir_iand(&b, avails[0], avails[1]),
-			                 nir_imm_int(&b, 0x80000000)));
-
-	/* Only compute result if available. */
-	nir_push_if(&b, result_is_available);
-
-	/* Pack values. */
-	nir_ssa_def *packed64[4];
-	packed64[0] = nir_pack_64_2x32(&b, nir_vec2(&b,
-						    nir_channel(&b, load1, 0),
-						    nir_channel(&b, load1, 1)));
-	packed64[1] = nir_pack_64_2x32(&b, nir_vec2(&b,
-						    nir_channel(&b, load1, 2),
-						    nir_channel(&b, load1, 3)));
-	packed64[2] = nir_pack_64_2x32(&b, nir_vec2(&b,
-						    nir_channel(&b, load2, 0),
-						    nir_channel(&b, load2, 1)));
-	packed64[3] = nir_pack_64_2x32(&b, nir_vec2(&b,
-						    nir_channel(&b, load2, 2),
-						    nir_channel(&b, load2, 3)));
-
-	/* Compute result. */
-	nir_ssa_def *num_primitive_written =
-		nir_isub(&b, packed64[3], packed64[1]);
-	nir_ssa_def *primitive_storage_needed =
-		nir_isub(&b, packed64[2], packed64[0]);
-
-	nir_store_var(&b, result,
-		      nir_vec2(&b, num_primitive_written,
-				   primitive_storage_needed), 0x3);
-	nir_store_var(&b, available, nir_imm_true(&b), 0x1);
-
-	nir_pop_if(&b, NULL);
-
-	/* Determine if result is 64 or 32 bit. */
-	nir_ssa_def *result_is_64bit =
-		nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
-	nir_ssa_def *result_size =
-		nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16),
-			  nir_imm_int(&b, 8));
-
-	/* Store the result if complete or partial results have been requested. */
-	nir_push_if(&b,
-		    nir_ior(&b,
-			    nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
-			    nir_load_var(&b, available)));
-
-	/* Store result. */
-	nir_push_if(&b, result_is_64bit);
-
-	nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base,
-			   .write_mask=0x3, .align_mul=8);
-
-	nir_push_else(&b, NULL);
-
-	nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf,
-			   output_base, .write_mask=0x3, .align_mul=4);
-
-	nir_pop_if(&b, NULL);
-	nir_pop_if(&b, NULL);
-
-	radv_store_availability(&b, flags, dst_buf,
-	                        nir_iadd(&b, result_size, output_base),
-	                        nir_b2i32(&b, nir_load_var(&b, available)));
-
-	return b.shader;
+   /* the shader this builds is roughly
+    *
+    * uint32_t src_stride = 32;
+    *
+    * location(binding = 0) buffer dst_buf;
+    * location(binding = 1) buffer src_buf;
+    *
+    * void main() {
+    *	uint64_t result[2] = {};
+    *	bool available = false;
+    *	uint64_t src_offset = src_stride * global_id.x;
+    * 	uint64_t dst_offset = dst_stride * global_id.x;
+    * 	uint64_t *src_data = src_buf[src_offset];
+    *	uint32_t avail = (src_data[0] >> 32) &
+    *			 (src_data[1] >> 32) &
+    *			 (src_data[2] >> 32) &
+    *			 (src_data[3] >> 32);
+    *	if (avail & 0x80000000) {
+    *		result[0] = src_data[3] - src_data[1];
+    *		result[1] = src_data[2] - src_data[0];
+    *		available = true;
+    *	}
+    * 	uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 16 : 8;
+    * 	if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
+    *		if (flags & VK_QUERY_RESULT_64_BIT) {
+    *			dst_buf[dst_offset] = result;
+    *		} else {
+    *			dst_buf[dst_offset] = (uint32_t)result;
+    *		}
+    *	}
+    *	if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+    *		dst_buf[dst_offset + result_size] = available;
+    * 	}
+    * }
+    */
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "tfb_query");
+   b.shader->info.cs.local_size[0] = 64;
+   b.shader->info.cs.local_size[1] = 1;
+   b.shader->info.cs.local_size[2] = 1;
+
+   /* Create and initialize local variables. */
+   nir_variable *result =
+      nir_local_variable_create(b.impl, glsl_vector_type(GLSL_TYPE_UINT64, 2), "result");
+   nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
+
+   nir_store_var(&b, result, nir_vec2(&b, nir_imm_int64(&b, 0), nir_imm_int64(&b, 0)), 0x3);
+   nir_store_var(&b, available, nir_imm_false(&b), 0x1);
+
+   nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
+
+   /* Load resources. */
+   nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+   nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
+
+   /* Compute global ID. */
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+   global_id = nir_channel(&b, global_id, 0); // We only care about x here.
+
+   /* Compute src/dst strides. */
+   nir_ssa_def *input_stride = nir_imm_int(&b, 32);
+   nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
+   nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
+   nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
+
+   /* Load data from the query pool. */
+   nir_ssa_def *load1 = nir_load_ssbo(&b, 4, 32, src_buf, input_base, .align_mul = 32);
+   nir_ssa_def *load2 = nir_load_ssbo(
+      &b, 4, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 16)), .align_mul = 16);
+
+   /* Check if result is available. */
+   nir_ssa_def *avails[2];
+   avails[0] = nir_iand(&b, nir_channel(&b, load1, 1), nir_channel(&b, load1, 3));
+   avails[1] = nir_iand(&b, nir_channel(&b, load2, 1), nir_channel(&b, load2, 3));
+   nir_ssa_def *result_is_available =
+      nir_i2b(&b, nir_iand(&b, nir_iand(&b, avails[0], avails[1]), nir_imm_int(&b, 0x80000000)));
+
+   /* Only compute result if available. */
+   nir_push_if(&b, result_is_available);
+
+   /* Pack values. */
+   nir_ssa_def *packed64[4];
+   packed64[0] =
+      nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load1, 0), nir_channel(&b, load1, 1)));
+   packed64[1] =
+      nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load1, 2), nir_channel(&b, load1, 3)));
+   packed64[2] =
+      nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load2, 0), nir_channel(&b, load2, 1)));
+   packed64[3] =
+      nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load2, 2), nir_channel(&b, load2, 3)));
+
+   /* Compute result. */
+   nir_ssa_def *num_primitive_written = nir_isub(&b, packed64[3], packed64[1]);
+   nir_ssa_def *primitive_storage_needed = nir_isub(&b, packed64[2], packed64[0]);
+
+   nir_store_var(&b, result, nir_vec2(&b, num_primitive_written, primitive_storage_needed), 0x3);
+   nir_store_var(&b, available, nir_imm_true(&b), 0x1);
+
+   nir_pop_if(&b, NULL);
+
+   /* Determine if result is 64 or 32 bit. */
+   nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
+   nir_ssa_def *result_size =
+      nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8));
+
+   /* Store the result if complete or partial results have been requested. */
+   nir_push_if(&b, nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
+                           nir_load_var(&b, available)));
+
+   /* Store result. */
+   nir_push_if(&b, result_is_64bit);
+
+   nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base, .write_mask = 0x3,
+                  .align_mul = 8);
+
+   nir_push_else(&b, NULL);
+
+   nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf, output_base,
+                  .write_mask = 0x3, .align_mul = 4);
+
+   nir_pop_if(&b, NULL);
+   nir_pop_if(&b, NULL);
+
+   radv_store_availability(&b, flags, dst_buf, nir_iadd(&b, result_size, output_base),
+                           nir_b2i32(&b, nir_load_var(&b, available)));
+
+   return b.shader;
 }
 
 static nir_shader *
 build_timestamp_query_shader(struct radv_device *device)
 {
-	/* the shader this builds is roughly
-	 *
-	 * uint32_t src_stride = 8;
-	 *
-	 * location(binding = 0) buffer dst_buf;
-	 * location(binding = 1) buffer src_buf;
-	 *
-	 * void main() {
-	 *	uint64_t result = 0;
-	 *	bool available = false;
-	 *	uint64_t src_offset = src_stride * global_id.x;
-	 * 	uint64_t dst_offset = dst_stride * global_id.x;
-	 * 	uint64_t timestamp = src_buf[src_offset];
-	 *	if (timestamp != TIMESTAMP_NOT_READY) {
-	 *		result = timestamp;
-	 *		available = true;
-	 *	}
-	 * 	uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
-	 * 	if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
-	 *		if (flags & VK_QUERY_RESULT_64_BIT) {
-	 *			dst_buf[dst_offset] = result;
-	 *		} else {
-	 *			dst_buf[dst_offset] = (uint32_t)result;
-	 *		}
-	 *	}
-	 *	if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
-	 *		dst_buf[dst_offset + result_size] = available;
-	 * 	}
-	 * }
-	 */
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "timestamp_query");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
-
-	/* Create and initialize local variables. */
-	nir_variable *result =
-		nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
-	nir_variable *available =
-		nir_local_variable_create(b.impl, glsl_bool_type(), "available");
-
-	nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
-	nir_store_var(&b, available, nir_imm_false(&b), 0x1);
-
-	nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=16);
-
-	/* Load resources. */
-	nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
-	nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
-
-	/* Compute global ID. */
-	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-	nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-	                                        b.shader->info.cs.local_size[0],
-	                                        b.shader->info.cs.local_size[1],
-	                                        b.shader->info.cs.local_size[2], 0);
-	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-	global_id = nir_channel(&b, global_id, 0); // We only care about x here.
-
-	/* Compute src/dst strides. */
-	nir_ssa_def *input_stride = nir_imm_int(&b, 8);
-	nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
-	nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range=16);
-	nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
-
-	/* Load data from the query pool. */
-	nir_ssa_def *load = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul=8);
-
-	/* Pack the timestamp. */
-	nir_ssa_def *timestamp;
-	timestamp = nir_pack_64_2x32(&b, nir_vec2(&b,
-						  nir_channel(&b, load, 0),
-						  nir_channel(&b, load, 1)));
-
-	/* Check if result is available. */
-	nir_ssa_def *result_is_available =
-		nir_i2b(&b, nir_ine(&b, timestamp,
-			            nir_imm_int64(&b, TIMESTAMP_NOT_READY)));
-
-	/* Only store result if available. */
-	nir_push_if(&b, result_is_available);
-
-	nir_store_var(&b, result, timestamp, 0x1);
-	nir_store_var(&b, available, nir_imm_true(&b), 0x1);
-
-	nir_pop_if(&b, NULL);
-
-	/* Determine if result is 64 or 32 bit. */
-	nir_ssa_def *result_is_64bit =
-		nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
-	nir_ssa_def *result_size =
-		nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8),
-			  nir_imm_int(&b, 4));
-
-	/* Store the result if complete or partial results have been requested. */
-	nir_push_if(&b, nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
-				nir_load_var(&b, available)));
-
-	/* Store result. */
-	nir_push_if(&b, result_is_64bit);
-
-	nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base, .write_mask=0x1, .align_mul=8);
-
-	nir_push_else(&b, NULL);
-
-	nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf,
-			   output_base, .write_mask=0x1, .align_mul=4);
-
-	nir_pop_if(&b, NULL);
-
-	nir_pop_if(&b, NULL);
-
-	radv_store_availability(&b, flags, dst_buf,
-	                        nir_iadd(&b, result_size, output_base),
-	                        nir_b2i32(&b, nir_load_var(&b, available)));
-
-	return b.shader;
+   /* the shader this builds is roughly
+    *
+    * uint32_t src_stride = 8;
+    *
+    * location(binding = 0) buffer dst_buf;
+    * location(binding = 1) buffer src_buf;
+    *
+    * void main() {
+    *	uint64_t result = 0;
+    *	bool available = false;
+    *	uint64_t src_offset = src_stride * global_id.x;
+    * 	uint64_t dst_offset = dst_stride * global_id.x;
+    * 	uint64_t timestamp = src_buf[src_offset];
+    *	if (timestamp != TIMESTAMP_NOT_READY) {
+    *		result = timestamp;
+    *		available = true;
+    *	}
+    * 	uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
+    * 	if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
+    *		if (flags & VK_QUERY_RESULT_64_BIT) {
+    *			dst_buf[dst_offset] = result;
+    *		} else {
+    *			dst_buf[dst_offset] = (uint32_t)result;
+    *		}
+    *	}
+    *	if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+    *		dst_buf[dst_offset + result_size] = available;
+    * 	}
+    * }
+    */
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "timestamp_query");
+   b.shader->info.cs.local_size[0] = 64;
+   b.shader->info.cs.local_size[1] = 1;
+   b.shader->info.cs.local_size[2] = 1;
+
+   /* Create and initialize local variables. */
+   nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
+   nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
+
+   nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
+   nir_store_var(&b, available, nir_imm_false(&b), 0x1);
+
+   nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
+
+   /* Load resources. */
+   nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+   nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
+
+   /* Compute global ID. */
+   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+   nir_ssa_def *block_size =
+      nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+                    b.shader->info.cs.local_size[2], 0);
+   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+   global_id = nir_channel(&b, global_id, 0); // We only care about x here.
+
+   /* Compute src/dst strides. */
+   nir_ssa_def *input_stride = nir_imm_int(&b, 8);
+   nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
+   nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
+   nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
+
+   /* Load data from the query pool. */
+   nir_ssa_def *load = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 8);
+
+   /* Pack the timestamp. */
+   nir_ssa_def *timestamp;
+   timestamp =
+      nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load, 0), nir_channel(&b, load, 1)));
+
+   /* Check if result is available. */
+   nir_ssa_def *result_is_available =
+      nir_i2b(&b, nir_ine(&b, timestamp, nir_imm_int64(&b, TIMESTAMP_NOT_READY)));
+
+   /* Only store result if available. */
+   nir_push_if(&b, result_is_available);
+
+   nir_store_var(&b, result, timestamp, 0x1);
+   nir_store_var(&b, available, nir_imm_true(&b), 0x1);
+
+   nir_pop_if(&b, NULL);
+
+   /* Determine if result is 64 or 32 bit. */
+   nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
+   nir_ssa_def *result_size =
+      nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
+
+   /* Store the result if complete or partial results have been requested. */
+   nir_push_if(&b, nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
+                           nir_load_var(&b, available)));
+
+   /* Store result. */
+   nir_push_if(&b, result_is_64bit);
+
+   nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base, .write_mask = 0x1,
+                  .align_mul = 8);
+
+   nir_push_else(&b, NULL);
+
+   nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf, output_base,
+                  .write_mask = 0x1, .align_mul = 4);
+
+   nir_pop_if(&b, NULL);
+
+   nir_pop_if(&b, NULL);
+
+   radv_store_availability(&b, flags, dst_buf, nir_iadd(&b, result_size, output_base),
+                           nir_b2i32(&b, nir_load_var(&b, available)));
+
+   return b.shader;
 }
 
-static VkResult radv_device_init_meta_query_state_internal(struct radv_device *device)
+static VkResult
+radv_device_init_meta_query_state_internal(struct radv_device *device)
 {
-	VkResult result;
-	nir_shader *occlusion_cs = NULL;
-	nir_shader *pipeline_statistics_cs = NULL;
-	nir_shader *tfb_cs = NULL;
-	nir_shader *timestamp_cs = NULL;
-
-	mtx_lock(&device->meta_state.mtx);
-	if (device->meta_state.query.pipeline_statistics_query_pipeline) {
-		mtx_unlock(&device->meta_state.mtx);
-		return VK_SUCCESS;
-	}
-	occlusion_cs = build_occlusion_query_shader(device);
-	pipeline_statistics_cs = build_pipeline_statistics_query_shader(device);
-	tfb_cs = build_tfb_query_shader(device);
-	timestamp_cs = build_timestamp_query_shader(device);
-
-	VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
-		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-		.bindingCount = 2,
-		.pBindings = (VkDescriptorSetLayoutBinding[]) {
-			{
-				.binding = 0,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-			{
-				.binding = 1,
-				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				.descriptorCount = 1,
-				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-				.pImmutableSamplers = NULL
-			},
-		}
-	};
-
-	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
-						&occlusion_ds_create_info,
-						&device->meta_state.alloc,
-						&device->meta_state.query.ds_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineLayoutCreateInfo occlusion_pl_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-		.setLayoutCount = 1,
-		.pSetLayouts = &device->meta_state.query.ds_layout,
-		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
-	};
-
-	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
-					  &occlusion_pl_create_info,
-					  &device->meta_state.alloc,
-					  &device->meta_state.query.p_layout);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineShaderStageCreateInfo occlusion_pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(occlusion_cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo occlusion_vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = occlusion_pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.query.p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &occlusion_vk_pipeline_info, NULL,
-					     &device->meta_state.query.occlusion_query_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineShaderStageCreateInfo pipeline_statistics_pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(pipeline_statistics_cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo pipeline_statistics_vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = pipeline_statistics_pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.query.p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &pipeline_statistics_vk_pipeline_info, NULL,
-					     &device->meta_state.query.pipeline_statistics_query_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineShaderStageCreateInfo tfb_pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(tfb_cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo tfb_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = tfb_pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.query.p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &tfb_pipeline_info, NULL,
-					     &device->meta_state.query.tfb_query_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	VkPipelineShaderStageCreateInfo timestamp_pipeline_shader_stage = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
-		.module = vk_shader_module_handle_from_nir(timestamp_cs),
-		.pName = "main",
-		.pSpecializationInfo = NULL,
-	};
-
-	VkComputePipelineCreateInfo timestamp_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-		.stage = timestamp_pipeline_shader_stage,
-		.flags = 0,
-		.layout = device->meta_state.query.p_layout,
-	};
-
-	result = radv_CreateComputePipelines(radv_device_to_handle(device),
-					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					     1, &timestamp_pipeline_info, NULL,
-					     &device->meta_state.query.timestamp_query_pipeline);
+   VkResult result;
+   nir_shader *occlusion_cs = NULL;
+   nir_shader *pipeline_statistics_cs = NULL;
+   nir_shader *tfb_cs = NULL;
+   nir_shader *timestamp_cs = NULL;
+
+   mtx_lock(&device->meta_state.mtx);
+   if (device->meta_state.query.pipeline_statistics_query_pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+   occlusion_cs = build_occlusion_query_shader(device);
+   pipeline_statistics_cs = build_pipeline_statistics_query_shader(device);
+   tfb_cs = build_tfb_query_shader(device);
+   timestamp_cs = build_timestamp_query_shader(device);
+
+   VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &occlusion_ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.query.ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo occlusion_pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.query.ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+   };
+
+   result =
+      radv_CreatePipelineLayout(radv_device_to_handle(device), &occlusion_pl_create_info,
+                                &device->meta_state.alloc, &device->meta_state.query.p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineShaderStageCreateInfo occlusion_pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(occlusion_cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo occlusion_vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = occlusion_pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.query.p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &occlusion_vk_pipeline_info, NULL, &device->meta_state.query.occlusion_query_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineShaderStageCreateInfo pipeline_statistics_pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(pipeline_statistics_cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo pipeline_statistics_vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_statistics_pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.query.p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &pipeline_statistics_vk_pipeline_info, NULL,
+      &device->meta_state.query.pipeline_statistics_query_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineShaderStageCreateInfo tfb_pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(tfb_cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo tfb_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = tfb_pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.query.p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &tfb_pipeline_info, NULL, &device->meta_state.query.tfb_query_pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineShaderStageCreateInfo timestamp_pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(timestamp_cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo timestamp_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = timestamp_pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.query.p_layout,
+   };
+
+   result = radv_CreateComputePipelines(
+      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+      &timestamp_pipeline_info, NULL, &device->meta_state.query.timestamp_query_pipeline);
 
 fail:
-	if (result != VK_SUCCESS)
-		radv_device_finish_meta_query_state(device);
-	ralloc_free(occlusion_cs);
-	ralloc_free(pipeline_statistics_cs);
-	ralloc_free(tfb_cs);
-	ralloc_free(timestamp_cs);
-	mtx_unlock(&device->meta_state.mtx);
-	return result;
+   if (result != VK_SUCCESS)
+      radv_device_finish_meta_query_state(device);
+   ralloc_free(occlusion_cs);
+   ralloc_free(pipeline_statistics_cs);
+   ralloc_free(tfb_cs);
+   ralloc_free(timestamp_cs);
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
 }
 
-VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_demand)
+VkResult
+radv_device_init_meta_query_state(struct radv_device *device, bool on_demand)
 {
-	if (on_demand)
-		return VK_SUCCESS;
+   if (on_demand)
+      return VK_SUCCESS;
 
-	return radv_device_init_meta_query_state_internal(device);
+   return radv_device_init_meta_query_state_internal(device);
 }
 
-void radv_device_finish_meta_query_state(struct radv_device *device)
+void
+radv_device_finish_meta_query_state(struct radv_device *device)
 {
-	if (device->meta_state.query.tfb_query_pipeline)
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     device->meta_state.query.tfb_query_pipeline,
-				     &device->meta_state.alloc);
-
-	if (device->meta_state.query.pipeline_statistics_query_pipeline)
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     device->meta_state.query.pipeline_statistics_query_pipeline,
-				     &device->meta_state.alloc);
-
-	if (device->meta_state.query.occlusion_query_pipeline)
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     device->meta_state.query.occlusion_query_pipeline,
-				     &device->meta_state.alloc);
-
-	if (device->meta_state.query.timestamp_query_pipeline)
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     device->meta_state.query.timestamp_query_pipeline,
-				     &device->meta_state.alloc);
-
-	if (device->meta_state.query.p_layout)
-		radv_DestroyPipelineLayout(radv_device_to_handle(device),
-					   device->meta_state.query.p_layout,
-					   &device->meta_state.alloc);
-
-	if (device->meta_state.query.ds_layout)
-		radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-						device->meta_state.query.ds_layout,
-						&device->meta_state.alloc);
+   if (device->meta_state.query.tfb_query_pipeline)
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           device->meta_state.query.tfb_query_pipeline, &device->meta_state.alloc);
+
+   if (device->meta_state.query.pipeline_statistics_query_pipeline)
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           device->meta_state.query.pipeline_statistics_query_pipeline,
+                           &device->meta_state.alloc);
+
+   if (device->meta_state.query.occlusion_query_pipeline)
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           device->meta_state.query.occlusion_query_pipeline,
+                           &device->meta_state.alloc);
+
+   if (device->meta_state.query.timestamp_query_pipeline)
+      radv_DestroyPipeline(radv_device_to_handle(device),
+                           device->meta_state.query.timestamp_query_pipeline,
+                           &device->meta_state.alloc);
+
+   if (device->meta_state.query.p_layout)
+      radv_DestroyPipelineLayout(radv_device_to_handle(device), device->meta_state.query.p_layout,
+                                 &device->meta_state.alloc);
+
+   if (device->meta_state.query.ds_layout)
+      radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+                                      device->meta_state.query.ds_layout,
+                                      &device->meta_state.alloc);
 }
 
-static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
-                              VkPipeline *pipeline,
-                              struct radeon_winsys_bo *src_bo,
-                              struct radeon_winsys_bo *dst_bo,
-                              uint64_t src_offset, uint64_t dst_offset,
-                              uint32_t src_stride, uint32_t dst_stride,
-                              uint32_t count, uint32_t flags,
-                              uint32_t pipeline_stats_mask, uint32_t avail_offset)
+static void
+radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline,
+                  struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo,
+                  uint64_t src_offset, uint64_t dst_offset, uint32_t src_stride,
+                  uint32_t dst_stride, uint32_t count, uint32_t flags, uint32_t pipeline_stats_mask,
+                  uint32_t avail_offset)
 {
-	struct radv_device *device = cmd_buffer->device;
-	struct radv_meta_saved_state saved_state;
-	bool old_predicating;
-
-	if (!*pipeline) {
-		VkResult ret = radv_device_init_meta_query_state_internal(device);
-		if (ret != VK_SUCCESS) {
-			cmd_buffer->record_result = ret;
-			return;
-		}
-	}
-
-	radv_meta_save(&saved_state, cmd_buffer,
-		       RADV_META_SAVE_COMPUTE_PIPELINE |
-		       RADV_META_SAVE_CONSTANTS |
-		       RADV_META_SAVE_DESCRIPTORS);
-
-	/* VK_EXT_conditional_rendering says that copy commands should not be
-	 * affected by conditional rendering.
-	 */
-	old_predicating = cmd_buffer->state.predicating;
-	cmd_buffer->state.predicating = false;
-
-	struct radv_buffer dst_buffer = {
-		.bo = dst_bo,
-		.offset = dst_offset,
-		.size = dst_stride * count
-	};
-
-	struct radv_buffer src_buffer = {
-		.bo = src_bo,
-		.offset = src_offset,
-		.size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset)
-	};
-
-	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
-
-	radv_meta_push_descriptor_set(cmd_buffer,
-				      VK_PIPELINE_BIND_POINT_COMPUTE,
-				      device->meta_state.query.p_layout,
-				      0, /* set */
-				      2, /* descriptorWriteCount */
-				      (VkWriteDescriptorSet[]) {
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 0,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
-				                              .buffer = radv_buffer_to_handle(&dst_buffer),
-				                              .offset = 0,
-				                              .range = VK_WHOLE_SIZE
-				                      }
-				              },
-				              {
-				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-				                      .dstBinding = 1,
-				                      .dstArrayElement = 0,
-				                      .descriptorCount = 1,
-				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
-				                              .buffer = radv_buffer_to_handle(&src_buffer),
-				                              .offset = 0,
-				                              .range = VK_WHOLE_SIZE
-				                      }
-				              }
-				      });
-
-	/* Encode the number of elements for easy access by the shader. */
-	pipeline_stats_mask &= 0x7ff;
-	pipeline_stats_mask |= util_bitcount(pipeline_stats_mask) << 16;
-
-	avail_offset -= src_offset;
-
-	struct {
-		uint32_t flags;
-		uint32_t dst_stride;
-		uint32_t pipeline_stats_mask;
-		uint32_t avail_offset;
-	} push_constants = {
-		flags,
-		dst_stride,
-		pipeline_stats_mask,
-		avail_offset
-	};
-
-	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.query.p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
-				      &push_constants);
-
-	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2 |
-	                                RADV_CMD_FLAG_INV_VCACHE;
-
-	if (flags & VK_QUERY_RESULT_WAIT_BIT)
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
-
-	radv_unaligned_dispatch(cmd_buffer, count, 1, 1);
-
-	/* Restore conditional rendering. */
-	cmd_buffer->state.predicating = old_predicating;
-
-	radv_meta_restore(&saved_state, cmd_buffer);
+   struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_saved_state saved_state;
+   bool old_predicating;
+
+   if (!*pipeline) {
+      VkResult ret = radv_device_init_meta_query_state_internal(device);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return;
+      }
+   }
+
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   /* VK_EXT_conditional_rendering says that copy commands should not be
+    * affected by conditional rendering.
+    */
+   old_predicating = cmd_buffer->state.predicating;
+   cmd_buffer->state.predicating = false;
+
+   struct radv_buffer dst_buffer = {.bo = dst_bo, .offset = dst_offset, .size = dst_stride * count};
+
+   struct radv_buffer src_buffer = {
+      .bo = src_bo,
+      .offset = src_offset,
+      .size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset)};
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        *pipeline);
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.query.p_layout, 0, /* set */
+      2, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){
+         {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+          .dstBinding = 0,
+          .dstArrayElement = 0,
+          .descriptorCount = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
+                                                   .offset = 0,
+                                                   .range = VK_WHOLE_SIZE}},
+         {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+          .dstBinding = 1,
+          .dstArrayElement = 0,
+          .descriptorCount = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+          .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&src_buffer),
+                                                   .offset = 0,
+                                                   .range = VK_WHOLE_SIZE}}});
+
+   /* Encode the number of elements for easy access by the shader. */
+   pipeline_stats_mask &= 0x7ff;
+   pipeline_stats_mask |= util_bitcount(pipeline_stats_mask) << 16;
+
+   avail_offset -= src_offset;
+
+   struct {
+      uint32_t flags;
+      uint32_t dst_stride;
+      uint32_t pipeline_stats_mask;
+      uint32_t avail_offset;
+   } push_constants = {flags, dst_stride, pipeline_stats_mask, avail_offset};
+
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.query.p_layout,
+                         VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants);
+
+   cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
+
+   if (flags & VK_QUERY_RESULT_WAIT_BIT)
+      cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
+
+   radv_unaligned_dispatch(cmd_buffer, count, 1, 1);
+
+   /* Restore conditional rendering. */
+   cmd_buffer->state.predicating = old_predicating;
+
+   radv_meta_restore(&saved_state, cmd_buffer);
 }
 
 static bool
-radv_query_pool_needs_gds(struct radv_device *device,
-			  struct radv_query_pool *pool)
+radv_query_pool_needs_gds(struct radv_device *device, struct radv_query_pool *pool)
 {
-	/* The number of primitives generated by geometry shader invocations is
-	 * only counted by the hardware if GS uses the legacy path. When NGG GS
-	 * is used, the hardware can't know the number of generated primitives
-	 * and we have to it manually inside the shader. To achieve that, the
-	 * driver does a plain GDS atomic to accumulate that value.
-	 * TODO: fix use of NGG GS and non-NGG GS inside the same begin/end
-	 * query.
-	 */
-	return device->physical_device->use_ngg &&
-	       (pool->pipeline_stats_mask & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
+   /* The number of primitives generated by geometry shader invocations is
+    * only counted by the hardware if GS uses the legacy path. When NGG GS
+    * is used, the hardware can't know the number of generated primitives
+    * and we have to it manually inside the shader. To achieve that, the
+    * driver does a plain GDS atomic to accumulate that value.
+    * TODO: fix use of NGG GS and non-NGG GS inside the same begin/end
+    * query.
+    */
+   return device->physical_device->use_ngg &&
+          (pool->pipeline_stats_mask & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
 }
 
 static void
-radv_destroy_query_pool(struct radv_device *device,
-			const VkAllocationCallbacks *pAllocator,
-			struct radv_query_pool *pool)
+radv_destroy_query_pool(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+                        struct radv_query_pool *pool)
 {
-	if (pool->bo)
-		device->ws->buffer_destroy(device->ws, pool->bo);
-	vk_object_base_finish(&pool->base);
-	vk_free2(&device->vk.alloc, pAllocator, pool);
+   if (pool->bo)
+      device->ws->buffer_destroy(device->ws, pool->bo);
+   vk_object_base_finish(&pool->base);
+   vk_free2(&device->vk.alloc, pAllocator, pool);
 }
 
-VkResult radv_CreateQueryPool(
-	VkDevice                                    _device,
-	const VkQueryPoolCreateInfo*                pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkQueryPool*                                pQueryPool)
+VkResult
+radv_CreateQueryPool(VkDevice _device, const VkQueryPoolCreateInfo *pCreateInfo,
+                     const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_query_pool *pool = vk_alloc2(&device->vk.alloc, pAllocator,
-					       sizeof(*pool), 8,
-					       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
-	if (!pool)
-		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	vk_object_base_init(&device->vk, &pool->base,
-			    VK_OBJECT_TYPE_QUERY_POOL);
-
-	switch(pCreateInfo->queryType) {
-	case VK_QUERY_TYPE_OCCLUSION:
-		pool->stride = 16 * device->physical_device->rad_info.max_render_backends;
-		break;
-	case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-		pool->stride = pipelinestat_block_size * 2;
-		break;
-	case VK_QUERY_TYPE_TIMESTAMP:
-		pool->stride = 8;
-		break;
-	case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
-		pool->stride = 32;
-		break;
-	default:
-		unreachable("creating unhandled query type");
-	}
-
-	pool->type = pCreateInfo->queryType;
-	pool->pipeline_stats_mask = pCreateInfo->pipelineStatistics;
-	pool->availability_offset = pool->stride * pCreateInfo->queryCount;
-	pool->size = pool->availability_offset;
-	if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS)
-		pool->size += 4 * pCreateInfo->queryCount;
-
-	pool->bo = device->ws->buffer_create(device->ws, pool->size,
-					     64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING,
-					     RADV_BO_PRIORITY_QUERY_POOL);
-	if (!pool->bo) {
-		radv_destroy_query_pool(device, pAllocator, pool);
-		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-	}
-
-	pool->ptr = device->ws->buffer_map(pool->bo);
-	if (!pool->ptr) {
-		radv_destroy_query_pool(device, pAllocator, pool);
-		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-	}
-
-	*pQueryPool = radv_query_pool_to_handle(pool);
-	return VK_SUCCESS;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_query_pool *pool =
+      vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+   if (!pool)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_QUERY_POOL);
+
+   switch (pCreateInfo->queryType) {
+   case VK_QUERY_TYPE_OCCLUSION:
+      pool->stride = 16 * device->physical_device->rad_info.max_render_backends;
+      break;
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+      pool->stride = pipelinestat_block_size * 2;
+      break;
+   case VK_QUERY_TYPE_TIMESTAMP:
+      pool->stride = 8;
+      break;
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      pool->stride = 32;
+      break;
+   default:
+      unreachable("creating unhandled query type");
+   }
+
+   pool->type = pCreateInfo->queryType;
+   pool->pipeline_stats_mask = pCreateInfo->pipelineStatistics;
+   pool->availability_offset = pool->stride * pCreateInfo->queryCount;
+   pool->size = pool->availability_offset;
+   if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS)
+      pool->size += 4 * pCreateInfo->queryCount;
+
+   pool->bo =
+      device->ws->buffer_create(device->ws, pool->size, 64, RADEON_DOMAIN_GTT,
+                                RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_QUERY_POOL);
+   if (!pool->bo) {
+      radv_destroy_query_pool(device, pAllocator, pool);
+      return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   }
+
+   pool->ptr = device->ws->buffer_map(pool->bo);
+   if (!pool->ptr) {
+      radv_destroy_query_pool(device, pAllocator, pool);
+      return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   }
+
+   *pQueryPool = radv_query_pool_to_handle(pool);
+   return VK_SUCCESS;
 }
 
-void radv_DestroyQueryPool(
-	VkDevice                                    _device,
-	VkQueryPool                                 _pool,
-	const VkAllocationCallbacks*                pAllocator)
+void
+radv_DestroyQueryPool(VkDevice _device, VkQueryPool _pool, const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_query_pool, pool, _pool);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_query_pool, pool, _pool);
 
-	if (!pool)
-		return;
+   if (!pool)
+      return;
 
-	radv_destroy_query_pool(device, pAllocator, pool);
+   radv_destroy_query_pool(device, pAllocator, pool);
 }
 
-VkResult radv_GetQueryPoolResults(
-	VkDevice                                    _device,
-	VkQueryPool                                 queryPool,
-	uint32_t                                    firstQuery,
-	uint32_t                                    queryCount,
-	size_t                                      dataSize,
-	void*                                       pData,
-	VkDeviceSize                                stride,
-	VkQueryResultFlags                          flags)
+VkResult
+radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery,
+                         uint32_t queryCount, size_t dataSize, void *pData, VkDeviceSize stride,
+                         VkQueryResultFlags flags)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
-	char *data = pData;
-	VkResult result = VK_SUCCESS;
-
-	if (radv_device_is_lost(device))
-		return VK_ERROR_DEVICE_LOST;
-
-	for(unsigned query_idx = 0; query_idx < queryCount; ++query_idx, data += stride) {
-		char *dest = data;
-		unsigned query = firstQuery + query_idx;
-		char *src = pool->ptr + query * pool->stride;
-		uint32_t available;
-
-		switch (pool->type) {
-		case VK_QUERY_TYPE_TIMESTAMP: {
-			uint64_t const *src64 = (uint64_t const *)src;
-			uint64_t value;
-
-			do {
-				value = p_atomic_read(src64);
-			} while (value == TIMESTAMP_NOT_READY &&
-			         (flags & VK_QUERY_RESULT_WAIT_BIT));
-
-			available = value != TIMESTAMP_NOT_READY;
-
-			if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
-				result = VK_NOT_READY;
-
-			if (flags & VK_QUERY_RESULT_64_BIT) {
-				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint64_t*)dest = value;
-				dest += 8;
-			} else {
-				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint32_t*)dest = (uint32_t)value;
-				dest += 4;
-			}
-			break;
-		}
-		case VK_QUERY_TYPE_OCCLUSION: {
-			uint64_t const *src64 = (uint64_t const *)src;
-			uint32_t db_count = device->physical_device->rad_info.max_render_backends;
-			uint32_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
-			uint64_t sample_count = 0;
-			available = 1;
-
-			for (int i = 0; i < db_count; ++i) {
-				uint64_t start, end;
-
-				if (!(enabled_rb_mask & (1 << i)))
-					continue;
-
-				do {
-					start = p_atomic_read(src64 + 2 * i);
-					end = p_atomic_read(src64 + 2 * i + 1);
-				} while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT));
-
-				if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
-					available = 0;
-				else {
-					sample_count += end - start;
-				}
-			}
-
-			if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
-				result = VK_NOT_READY;
-
-			if (flags & VK_QUERY_RESULT_64_BIT) {
-				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint64_t*)dest = sample_count;
-				dest += 8;
-			} else {
-				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint32_t*)dest = sample_count;
-				dest += 4;
-			}
-			break;
-		}
-		case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
-			const uint32_t *avail_ptr = (const uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
-
-			do {
-				available = p_atomic_read(avail_ptr);
-			} while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT));
-
-			if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
-				result = VK_NOT_READY;
-
-			const uint64_t *start = (uint64_t*)src;
-			const uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
-			if (flags & VK_QUERY_RESULT_64_BIT) {
-				uint64_t *dst = (uint64_t*)dest;
-				dest += util_bitcount(pool->pipeline_stats_mask) * 8;
-				for(int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
-					if(pool->pipeline_stats_mask & (1u << i)) {
-						if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-							*dst = stop[pipeline_statistics_indices[i]] -
-							       start[pipeline_statistics_indices[i]];
-						dst++;
-					}
-				}
-
-			} else {
-				uint32_t *dst = (uint32_t*)dest;
-				dest += util_bitcount(pool->pipeline_stats_mask) * 4;
-				for(int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
-					if(pool->pipeline_stats_mask & (1u << i)) {
-						if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-							*dst = stop[pipeline_statistics_indices[i]] -
-							       start[pipeline_statistics_indices[i]];
-						dst++;
-					}
-				}
-			}
-			break;
-		}
-		case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
-			uint64_t const *src64 = (uint64_t const *)src;
-			uint64_t num_primitives_written;
-			uint64_t primitive_storage_needed;
-
-			/* SAMPLE_STREAMOUTSTATS stores this structure:
-			 * {
-			 *	u64 NumPrimitivesWritten;
-			 *	u64 PrimitiveStorageNeeded;
-			 * }
-			 */
-			available = 1;
-			for (int j = 0; j < 4; j++) {
-				if (!(p_atomic_read(src64 + j) & 0x8000000000000000UL))
-					available = 0;
-			}
-
-			if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
-				result = VK_NOT_READY;
-
-			num_primitives_written = src64[3] - src64[1];
-			primitive_storage_needed = src64[2] - src64[0];
-
-			if (flags & VK_QUERY_RESULT_64_BIT) {
-				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint64_t *)dest = num_primitives_written;
-				dest += 8;
-				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint64_t *)dest = primitive_storage_needed;
-				dest += 8;
-			} else {
-				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint32_t *)dest = num_primitives_written;
-				dest += 4;
-				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint32_t *)dest = primitive_storage_needed;
-				dest += 4;
-			}
-			break;
-		}
-		default:
-			unreachable("trying to get results of unhandled query type");
-		}
-
-		if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
-			if (flags & VK_QUERY_RESULT_64_BIT) {
-				*(uint64_t*)dest = available;
-			} else {
-				*(uint32_t*)dest = available;
-			}
-		}
-	}
-
-	return result;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+   char *data = pData;
+   VkResult result = VK_SUCCESS;
+
+   if (radv_device_is_lost(device))
+      return VK_ERROR_DEVICE_LOST;
+
+   for (unsigned query_idx = 0; query_idx < queryCount; ++query_idx, data += stride) {
+      char *dest = data;
+      unsigned query = firstQuery + query_idx;
+      char *src = pool->ptr + query * pool->stride;
+      uint32_t available;
+
+      switch (pool->type) {
+      case VK_QUERY_TYPE_TIMESTAMP: {
+         uint64_t const *src64 = (uint64_t const *)src;
+         uint64_t value;
+
+         do {
+            value = p_atomic_read(src64);
+         } while (value == TIMESTAMP_NOT_READY && (flags & VK_QUERY_RESULT_WAIT_BIT));
+
+         available = value != TIMESTAMP_NOT_READY;
+
+         if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
+            result = VK_NOT_READY;
+
+         if (flags & VK_QUERY_RESULT_64_BIT) {
+            if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+               *(uint64_t *)dest = value;
+            dest += 8;
+         } else {
+            if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+               *(uint32_t *)dest = (uint32_t)value;
+            dest += 4;
+         }
+         break;
+      }
+      case VK_QUERY_TYPE_OCCLUSION: {
+         uint64_t const *src64 = (uint64_t const *)src;
+         uint32_t db_count = device->physical_device->rad_info.max_render_backends;
+         uint32_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
+         uint64_t sample_count = 0;
+         available = 1;
+
+         for (int i = 0; i < db_count; ++i) {
+            uint64_t start, end;
+
+            if (!(enabled_rb_mask & (1 << i)))
+               continue;
+
+            do {
+               start = p_atomic_read(src64 + 2 * i);
+               end = p_atomic_read(src64 + 2 * i + 1);
+            } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) &&
+                     (flags & VK_QUERY_RESULT_WAIT_BIT));
+
+            if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
+               available = 0;
+            else {
+               sample_count += end - start;
+            }
+         }
+
+         if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
+            result = VK_NOT_READY;
+
+         if (flags & VK_QUERY_RESULT_64_BIT) {
+            if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+               *(uint64_t *)dest = sample_count;
+            dest += 8;
+         } else {
+            if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+               *(uint32_t *)dest = sample_count;
+            dest += 4;
+         }
+         break;
+      }
+      case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
+         const uint32_t *avail_ptr =
+            (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query);
+
+         do {
+            available = p_atomic_read(avail_ptr);
+         } while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT));
+
+         if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
+            result = VK_NOT_READY;
+
+         const uint64_t *start = (uint64_t *)src;
+         const uint64_t *stop = (uint64_t *)(src + pipelinestat_block_size);
+         if (flags & VK_QUERY_RESULT_64_BIT) {
+            uint64_t *dst = (uint64_t *)dest;
+            dest += util_bitcount(pool->pipeline_stats_mask) * 8;
+            for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
+               if (pool->pipeline_stats_mask & (1u << i)) {
+                  if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+                     *dst = stop[pipeline_statistics_indices[i]] -
+                            start[pipeline_statistics_indices[i]];
+                  dst++;
+               }
+            }
+
+         } else {
+            uint32_t *dst = (uint32_t *)dest;
+            dest += util_bitcount(pool->pipeline_stats_mask) * 4;
+            for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
+               if (pool->pipeline_stats_mask & (1u << i)) {
+                  if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+                     *dst = stop[pipeline_statistics_indices[i]] -
+                            start[pipeline_statistics_indices[i]];
+                  dst++;
+               }
+            }
+         }
+         break;
+      }
+      case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
+         uint64_t const *src64 = (uint64_t const *)src;
+         uint64_t num_primitives_written;
+         uint64_t primitive_storage_needed;
+
+         /* SAMPLE_STREAMOUTSTATS stores this structure:
+          * {
+          *	u64 NumPrimitivesWritten;
+          *	u64 PrimitiveStorageNeeded;
+          * }
+          */
+         available = 1;
+         for (int j = 0; j < 4; j++) {
+            if (!(p_atomic_read(src64 + j) & 0x8000000000000000UL))
+               available = 0;
+         }
+
+         if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
+            result = VK_NOT_READY;
+
+         num_primitives_written = src64[3] - src64[1];
+         primitive_storage_needed = src64[2] - src64[0];
+
+         if (flags & VK_QUERY_RESULT_64_BIT) {
+            if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+               *(uint64_t *)dest = num_primitives_written;
+            dest += 8;
+            if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+               *(uint64_t *)dest = primitive_storage_needed;
+            dest += 8;
+         } else {
+            if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+               *(uint32_t *)dest = num_primitives_written;
+            dest += 4;
+            if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+               *(uint32_t *)dest = primitive_storage_needed;
+            dest += 4;
+         }
+         break;
+      }
+      default:
+         unreachable("trying to get results of unhandled query type");
+      }
+
+      if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+         if (flags & VK_QUERY_RESULT_64_BIT) {
+            *(uint64_t *)dest = available;
+         } else {
+            *(uint32_t *)dest = available;
+         }
+      }
+   }
+
+   return result;
 }
 
-static void emit_query_flush(struct radv_cmd_buffer *cmd_buffer,
-			     struct radv_query_pool *pool)
+static void
+emit_query_flush(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool)
 {
-	if (cmd_buffer->pending_reset_query) {
-		if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
-			/* Only need to flush caches if the query pool size is
-			 * large enough to be resetted using the compute shader
-			 * path. Small pools don't need any cache flushes
-			 * because we use a CP dma clear.
-			 */
-			si_emit_cache_flush(cmd_buffer);
-		}
-	}
+   if (cmd_buffer->pending_reset_query) {
+      if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
+         /* Only need to flush caches if the query pool size is
+          * large enough to be resetted using the compute shader
+          * path. Small pools don't need any cache flushes
+          * because we use a CP dma clear.
+          */
+         si_emit_cache_flush(cmd_buffer);
+      }
+   }
 }
 
-void radv_CmdCopyQueryPoolResults(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    firstQuery,
-    uint32_t                                    queryCount,
-    VkBuffer                                    dstBuffer,
-    VkDeviceSize                                dstOffset,
-    VkDeviceSize                                stride,
-    VkQueryResultFlags                          flags)
+void
+radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
+                             uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer,
+                             VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
-	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint64_t va = radv_buffer_get_va(pool->bo);
-	uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
-	dest_va += dst_buffer->offset + dstOffset;
-
-	radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->bo);
-	radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
-
-	/* From the Vulkan spec 1.1.108:
-	 *
-	 * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
-	 *  previous uses of vkCmdResetQueryPool in the same queue, without any
-	 *  additional synchronization."
-	 *
-	 * So, we have to flush the caches if the compute shader path was used.
-	 */
-	emit_query_flush(cmd_buffer, pool);
-
-	switch (pool->type) {
-	case VK_QUERY_TYPE_OCCLUSION:
-		if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-			unsigned enabled_rb_mask = cmd_buffer->device->physical_device->rad_info.enabled_rb_mask;
-			uint32_t rb_avail_offset = 16 * util_last_bit(enabled_rb_mask) - 4;
-			for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
-				unsigned query = firstQuery + i;
-				uint64_t src_va = va + query * pool->stride + rb_avail_offset;
-
-				radeon_check_space(cmd_buffer->device->ws, cs, 7);
-
-				/* Waits on the upper word of the last DB entry */
-				radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL,
-						 src_va, 0x80000000, 0xffffffff);
-			}
-		}
-		radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
-		                  pool->bo, dst_buffer->bo, firstQuery * pool->stride,
-		                  dst_buffer->offset + dstOffset,
-		                  pool->stride, stride,
-		                  queryCount, flags, 0, 0);
-		break;
-	case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-		if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-			for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
-				unsigned query = firstQuery + i;
-
-				radeon_check_space(cmd_buffer->device->ws, cs, 7);
-
-				uint64_t avail_va = va + pool->availability_offset + 4 * query;
-
-				/* This waits on the ME. All copies below are done on the ME */
-				radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL,
-						 avail_va, 1, 0xffffffff);
-			}
-		}
-		radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
-		                  pool->bo, dst_buffer->bo, firstQuery * pool->stride,
-		                  dst_buffer->offset + dstOffset,
-		                  pool->stride, stride, queryCount, flags,
-		                  pool->pipeline_stats_mask,
-		                  pool->availability_offset + 4 * firstQuery);
-		break;
-	case VK_QUERY_TYPE_TIMESTAMP:
-		if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-			for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
-				unsigned query = firstQuery + i;
-				uint64_t local_src_va = va  + query * pool->stride;
-
-				radeon_check_space(cmd_buffer->device->ws, cs, 7);
-
-				/* Wait on the high 32 bits of the timestamp in
-				 * case the low part is 0xffffffff.
-				 */
-				radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL,
-						 local_src_va + 4,
-						 TIMESTAMP_NOT_READY >> 32,
-						 0xffffffff);
-			}
-		}
-
-		radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline,
-		                  pool->bo, dst_buffer->bo,
-				  firstQuery * pool->stride,
-		                  dst_buffer->offset + dstOffset,
-		                  pool->stride, stride,
-				  queryCount, flags, 0, 0);
-		break;
-	case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
-		if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-			for(unsigned i = 0; i < queryCount; i++) {
-				unsigned query = firstQuery + i;
-				uint64_t src_va = va + query * pool->stride;
-
-				radeon_check_space(cmd_buffer->device->ws, cs, 7 * 4);
-
-				/* Wait on the upper word of all results. */
-				for (unsigned j = 0; j < 4; j++, src_va += 8) {
-					radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL,
-							 src_va + 4, 0x80000000,
-							 0xffffffff);
-				}
-			}
-		}
-
-		radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline,
-		                  pool->bo, dst_buffer->bo,
-				  firstQuery * pool->stride,
-		                  dst_buffer->offset + dstOffset,
-		                  pool->stride, stride,
-				  queryCount, flags, 0, 0);
-		break;
-	default:
-		unreachable("trying to get results of unhandled query type");
-	}
-
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+   RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t va = radv_buffer_get_va(pool->bo);
+   uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
+   dest_va += dst_buffer->offset + dstOffset;
+
+   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->bo);
+   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
+
+   /* From the Vulkan spec 1.1.108:
+    *
+    * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
+    *  previous uses of vkCmdResetQueryPool in the same queue, without any
+    *  additional synchronization."
+    *
+    * So, we have to flush the caches if the compute shader path was used.
+    */
+   emit_query_flush(cmd_buffer, pool);
+
+   switch (pool->type) {
+   case VK_QUERY_TYPE_OCCLUSION:
+      if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+         unsigned enabled_rb_mask = cmd_buffer->device->physical_device->rad_info.enabled_rb_mask;
+         uint32_t rb_avail_offset = 16 * util_last_bit(enabled_rb_mask) - 4;
+         for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
+            unsigned query = firstQuery + i;
+            uint64_t src_va = va + query * pool->stride + rb_avail_offset;
+
+            radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
+            /* Waits on the upper word of the last DB entry */
+            radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff);
+         }
+      }
+      radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
+                        pool->bo, dst_buffer->bo, firstQuery * pool->stride,
+                        dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
+                        0);
+      break;
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+      if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+         for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
+            unsigned query = firstQuery + i;
+
+            radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
+            uint64_t avail_va = va + pool->availability_offset + 4 * query;
+
+            /* This waits on the ME. All copies below are done on the ME */
+            radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
+         }
+      }
+      radv_query_shader(cmd_buffer,
+                        &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
+                        pool->bo, dst_buffer->bo, firstQuery * pool->stride,
+                        dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags,
+                        pool->pipeline_stats_mask, pool->availability_offset + 4 * firstQuery);
+      break;
+   case VK_QUERY_TYPE_TIMESTAMP:
+      if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+         for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
+            unsigned query = firstQuery + i;
+            uint64_t local_src_va = va + query * pool->stride;
+
+            radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
+            /* Wait on the high 32 bits of the timestamp in
+             * case the low part is 0xffffffff.
+             */
+            radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL, local_src_va + 4,
+                             TIMESTAMP_NOT_READY >> 32, 0xffffffff);
+         }
+      }
+
+      radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline,
+                        pool->bo, dst_buffer->bo, firstQuery * pool->stride,
+                        dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
+                        0);
+      break;
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+         for (unsigned i = 0; i < queryCount; i++) {
+            unsigned query = firstQuery + i;
+            uint64_t src_va = va + query * pool->stride;
+
+            radeon_check_space(cmd_buffer->device->ws, cs, 7 * 4);
+
+            /* Wait on the upper word of all results. */
+            for (unsigned j = 0; j < 4; j++, src_va += 8) {
+               radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000,
+                                0xffffffff);
+            }
+         }
+      }
+
+      radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline,
+                        pool->bo, dst_buffer->bo, firstQuery * pool->stride,
+                        dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
+                        0);
+      break;
+   default:
+      unreachable("trying to get results of unhandled query type");
+   }
 }
 
-void radv_CmdResetQueryPool(
-	VkCommandBuffer                             commandBuffer,
-	VkQueryPool                                 queryPool,
-	uint32_t                                    firstQuery,
-	uint32_t                                    queryCount)
+void
+radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
+                       uint32_t queryCount)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
-	uint32_t value = pool->type == VK_QUERY_TYPE_TIMESTAMP
-			 ? (uint32_t)TIMESTAMP_NOT_READY : 0;
-	uint32_t flush_bits = 0;
-
-	/* Make sure to sync all previous work if the given command buffer has
-	 * pending active queries. Otherwise the GPU might write queries data
-	 * after the reset operation.
-	 */
-	cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits;
-
-	flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo,
-				       firstQuery * pool->stride,
-				       queryCount * pool->stride, value);
-
-	if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
-		flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo,
-					       pool->availability_offset + firstQuery * 4,
-					       queryCount * 4, 0);
-	}
-
-	if (flush_bits) {
-		/* Only need to flush caches for the compute shader path. */
-		cmd_buffer->pending_reset_query = true;
-		cmd_buffer->state.flush_bits |= flush_bits;
-	}
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+   uint32_t value = pool->type == VK_QUERY_TYPE_TIMESTAMP ? (uint32_t)TIMESTAMP_NOT_READY : 0;
+   uint32_t flush_bits = 0;
+
+   /* Make sure to sync all previous work if the given command buffer has
+    * pending active queries. Otherwise the GPU might write queries data
+    * after the reset operation.
+    */
+   cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits;
+
+   flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo, firstQuery * pool->stride,
+                                  queryCount * pool->stride, value);
+
+   if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
+      flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo,
+                                     pool->availability_offset + firstQuery * 4, queryCount * 4, 0);
+   }
+
+   if (flush_bits) {
+      /* Only need to flush caches for the compute shader path. */
+      cmd_buffer->pending_reset_query = true;
+      cmd_buffer->state.flush_bits |= flush_bits;
+   }
 }
 
-void radv_ResetQueryPool(
-	VkDevice                                   _device,
-	VkQueryPool                                 queryPool,
-	uint32_t                                    firstQuery,
-	uint32_t                                    queryCount)
+void
+radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery,
+                    uint32_t queryCount)
 {
-	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+   RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
 
-	uint32_t value = pool->type == VK_QUERY_TYPE_TIMESTAMP
-			 ? (uint32_t)TIMESTAMP_NOT_READY : 0;
-	uint32_t *data =  (uint32_t*)(pool->ptr + firstQuery * pool->stride);
-	uint32_t *data_end = (uint32_t*)(pool->ptr + (firstQuery + queryCount) * pool->stride);
+   uint32_t value = pool->type == VK_QUERY_TYPE_TIMESTAMP ? (uint32_t)TIMESTAMP_NOT_READY : 0;
+   uint32_t *data = (uint32_t *)(pool->ptr + firstQuery * pool->stride);
+   uint32_t *data_end = (uint32_t *)(pool->ptr + (firstQuery + queryCount) * pool->stride);
 
-	for(uint32_t *p = data; p != data_end; ++p)
-		*p = value;
+   for (uint32_t *p = data; p != data_end; ++p)
+      *p = value;
 
-	if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
-		memset(pool->ptr + pool->availability_offset + firstQuery * 4,
-		       0, queryCount * 4);
-	}
+   if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
+      memset(pool->ptr + pool->availability_offset + firstQuery * 4, 0, queryCount * 4);
+   }
 }
 
-static unsigned event_type_for_stream(unsigned stream)
+static unsigned
+event_type_for_stream(unsigned stream)
 {
-	switch (stream) {
-	default:
-	case 0: return V_028A90_SAMPLE_STREAMOUTSTATS;
-	case 1: return V_028A90_SAMPLE_STREAMOUTSTATS1;
-	case 2: return V_028A90_SAMPLE_STREAMOUTSTATS2;
-	case 3: return V_028A90_SAMPLE_STREAMOUTSTATS3;
-	}
+   switch (stream) {
+   default:
+   case 0:
+      return V_028A90_SAMPLE_STREAMOUTSTATS;
+   case 1:
+      return V_028A90_SAMPLE_STREAMOUTSTATS1;
+   case 2:
+      return V_028A90_SAMPLE_STREAMOUTSTATS2;
+   case 3:
+      return V_028A90_SAMPLE_STREAMOUTSTATS3;
+   }
 }
 
-static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer,
-			     struct radv_query_pool *pool,
-			     uint64_t va,
-			     VkQueryType query_type,
-			     VkQueryControlFlags flags,
-			     uint32_t index)
+static void
+emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va,
+                 VkQueryType query_type, VkQueryControlFlags flags, uint32_t index)
 {
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	switch (query_type) {
-	case VK_QUERY_TYPE_OCCLUSION:
-		radeon_check_space(cmd_buffer->device->ws, cs, 7);
-
-		++cmd_buffer->state.active_occlusion_queries;
-		if (cmd_buffer->state.active_occlusion_queries == 1) {
-			if (flags & VK_QUERY_CONTROL_PRECISE_BIT) {
-				/* This is the first occlusion query, enable
-				 * the hint if the precision bit is set.
-				 */
-				cmd_buffer->state.perfect_occlusion_queries_enabled = true;
-			}
-
-			radv_set_db_count_control(cmd_buffer);
-		} else {
-			if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) &&
-			    !cmd_buffer->state.perfect_occlusion_queries_enabled) {
-				/* This is not the first query, but this one
-				 * needs to enable precision, DB_COUNT_CONTROL
-				 * has to be updated accordingly.
-				 */
-				cmd_buffer->state.perfect_occlusion_queries_enabled = true;
-
-				radv_set_db_count_control(cmd_buffer);
-			}
-		}
-
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		break;
-	case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-		radeon_check_space(cmd_buffer->device->ws, cs, 4);
-
-		++cmd_buffer->state.active_pipeline_queries;
-		if (cmd_buffer->state.active_pipeline_queries == 1) {
-			cmd_buffer->state.flush_bits &= ~RADV_CMD_FLAG_STOP_PIPELINE_STATS;
-			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_START_PIPELINE_STATS;
-		}
-
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-
-		if (radv_query_pool_needs_gds(cmd_buffer->device, pool)) {
-			int idx = radv_get_pipeline_statistics_index(VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
-
-			/* Make sure GDS is idle before copying the value. */
-			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-							RADV_CMD_FLAG_INV_L2;
-			si_emit_cache_flush(cmd_buffer);
-
-			va += 8 * idx;
-
-			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-			radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) |
-					COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-					COPY_DATA_WR_CONFIRM);
-			radeon_emit(cs, 0);
-			radeon_emit(cs, 0);
-			radeon_emit(cs, va);
-			radeon_emit(cs, va >> 32);
-
-			/* Record that the command buffer needs GDS. */
-			cmd_buffer->gds_needed = true;
-
-			cmd_buffer->state.active_pipeline_gds_queries++;
-		}
-		break;
-	case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
-		radeon_check_space(cmd_buffer->device->ws, cs, 4);
-
-		assert(index < MAX_SO_STREAMS);
-
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(index)) | EVENT_INDEX(3));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		break;
-	default:
-		unreachable("beginning unhandled query type");
-	}
-
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   switch (query_type) {
+   case VK_QUERY_TYPE_OCCLUSION:
+      radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
+      ++cmd_buffer->state.active_occlusion_queries;
+      if (cmd_buffer->state.active_occlusion_queries == 1) {
+         if (flags & VK_QUERY_CONTROL_PRECISE_BIT) {
+            /* This is the first occlusion query, enable
+             * the hint if the precision bit is set.
+             */
+            cmd_buffer->state.perfect_occlusion_queries_enabled = true;
+         }
+
+         radv_set_db_count_control(cmd_buffer);
+      } else {
+         if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) &&
+             !cmd_buffer->state.perfect_occlusion_queries_enabled) {
+            /* This is not the first query, but this one
+             * needs to enable precision, DB_COUNT_CONTROL
+             * has to be updated accordingly.
+             */
+            cmd_buffer->state.perfect_occlusion_queries_enabled = true;
+
+            radv_set_db_count_control(cmd_buffer);
+         }
+      }
+
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      break;
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+      radeon_check_space(cmd_buffer->device->ws, cs, 4);
+
+      ++cmd_buffer->state.active_pipeline_queries;
+      if (cmd_buffer->state.active_pipeline_queries == 1) {
+         cmd_buffer->state.flush_bits &= ~RADV_CMD_FLAG_STOP_PIPELINE_STATS;
+         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_START_PIPELINE_STATS;
+      }
+
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+
+      if (radv_query_pool_needs_gds(cmd_buffer->device, pool)) {
+         int idx = radv_get_pipeline_statistics_index(
+            VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
+
+         /* Make sure GDS is idle before copying the value. */
+         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2;
+         si_emit_cache_flush(cmd_buffer);
+
+         va += 8 * idx;
+
+         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+                            COPY_DATA_WR_CONFIRM);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, va);
+         radeon_emit(cs, va >> 32);
+
+         /* Record that the command buffer needs GDS. */
+         cmd_buffer->gds_needed = true;
+
+         cmd_buffer->state.active_pipeline_gds_queries++;
+      }
+      break;
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      radeon_check_space(cmd_buffer->device->ws, cs, 4);
+
+      assert(index < MAX_SO_STREAMS);
+
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+      radeon_emit(cs, EVENT_TYPE(event_type_for_stream(index)) | EVENT_INDEX(3));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+      break;
+   default:
+      unreachable("beginning unhandled query type");
+   }
 }
 
-static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
-			   struct radv_query_pool *pool,
-			   uint64_t va, uint64_t avail_va,
-			   VkQueryType query_type, uint32_t index)
+static void
+emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va,
+               uint64_t avail_va, VkQueryType query_type, uint32_t index)
 {
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	switch (query_type) {
-	case VK_QUERY_TYPE_OCCLUSION:
-		radeon_check_space(cmd_buffer->device->ws, cs, 14);
-
-		cmd_buffer->state.active_occlusion_queries--;
-		if (cmd_buffer->state.active_occlusion_queries == 0) {
-			radv_set_db_count_control(cmd_buffer);
-
-			/* Reset the perfect occlusion queries hint now that no
-			 * queries are active.
-			 */
-			cmd_buffer->state.perfect_occlusion_queries_enabled = false;
-		}
-
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
-		radeon_emit(cs, va + 8);
-		radeon_emit(cs, (va + 8) >> 32);
-
-		break;
-	case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-		radeon_check_space(cmd_buffer->device->ws, cs, 16);
-
-		cmd_buffer->state.active_pipeline_queries--;
-		if (cmd_buffer->state.active_pipeline_queries == 0) {
-			cmd_buffer->state.flush_bits &= ~RADV_CMD_FLAG_START_PIPELINE_STATS;
-			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_STOP_PIPELINE_STATS;
-		}
-		va += pipelinestat_block_size;
-
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-
-		si_cs_emit_write_event_eop(cs,
-					   cmd_buffer->device->physical_device->rad_info.chip_class,
-					   radv_cmd_buffer_uses_mec(cmd_buffer),
-					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
-					   EOP_DST_SEL_MEM,
-					   EOP_DATA_SEL_VALUE_32BIT,
-					   avail_va, 1,
-					   cmd_buffer->gfx9_eop_bug_va);
-
-		if (radv_query_pool_needs_gds(cmd_buffer->device, pool)) {
-			int idx = radv_get_pipeline_statistics_index(VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
-
-			/* Make sure GDS is idle before copying the value. */
-			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-							RADV_CMD_FLAG_INV_L2;
-			si_emit_cache_flush(cmd_buffer);
-
-			va += 8 * idx;
-
-			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-			radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) |
-					COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-					COPY_DATA_WR_CONFIRM);
-			radeon_emit(cs, 0);
-			radeon_emit(cs, 0);
-			radeon_emit(cs, va);
-			radeon_emit(cs, va >> 32);
-
-			cmd_buffer->state.active_pipeline_gds_queries--;
-		}
-		break;
-	case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
-		radeon_check_space(cmd_buffer->device->ws, cs, 4);
-
-		assert(index < MAX_SO_STREAMS);
-
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(index)) | EVENT_INDEX(3));
-		radeon_emit(cs, (va + 16));
-		radeon_emit(cs, (va + 16) >> 32);
-		break;
-	default:
-		unreachable("ending unhandled query type");
-	}
-
-	cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-					       RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-					       RADV_CMD_FLAG_INV_L2 |
-					       RADV_CMD_FLAG_INV_VCACHE;
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
-		cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-						       RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-	}
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   switch (query_type) {
+   case VK_QUERY_TYPE_OCCLUSION:
+      radeon_check_space(cmd_buffer->device->ws, cs, 14);
+
+      cmd_buffer->state.active_occlusion_queries--;
+      if (cmd_buffer->state.active_occlusion_queries == 0) {
+         radv_set_db_count_control(cmd_buffer);
+
+         /* Reset the perfect occlusion queries hint now that no
+          * queries are active.
+          */
+         cmd_buffer->state.perfect_occlusion_queries_enabled = false;
+      }
+
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
+      radeon_emit(cs, va + 8);
+      radeon_emit(cs, (va + 8) >> 32);
+
+      break;
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+      radeon_check_space(cmd_buffer->device->ws, cs, 16);
+
+      cmd_buffer->state.active_pipeline_queries--;
+      if (cmd_buffer->state.active_pipeline_queries == 0) {
+         cmd_buffer->state.flush_bits &= ~RADV_CMD_FLAG_START_PIPELINE_STATS;
+         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_STOP_PIPELINE_STATS;
+      }
+      va += pipelinestat_block_size;
+
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+      radeon_emit(cs, va);
+      radeon_emit(cs, va >> 32);
+
+      si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+                                 radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS,
+                                 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1,
+                                 cmd_buffer->gfx9_eop_bug_va);
+
+      if (radv_query_pool_needs_gds(cmd_buffer->device, pool)) {
+         int idx = radv_get_pipeline_statistics_index(
+            VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
+
+         /* Make sure GDS is idle before copying the value. */
+         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2;
+         si_emit_cache_flush(cmd_buffer);
+
+         va += 8 * idx;
+
+         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+                            COPY_DATA_WR_CONFIRM);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, va);
+         radeon_emit(cs, va >> 32);
+
+         cmd_buffer->state.active_pipeline_gds_queries--;
+      }
+      break;
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      radeon_check_space(cmd_buffer->device->ws, cs, 4);
+
+      assert(index < MAX_SO_STREAMS);
+
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+      radeon_emit(cs, EVENT_TYPE(event_type_for_stream(index)) | EVENT_INDEX(3));
+      radeon_emit(cs, (va + 16));
+      radeon_emit(cs, (va + 16) >> 32);
+      break;
+   default:
+      unreachable("ending unhandled query type");
+   }
+
+   cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+                                          RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 |
+                                          RADV_CMD_FLAG_INV_VCACHE;
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+      cmd_buffer->active_query_flush_bits |=
+         RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+   }
 }
 
-void radv_CmdBeginQueryIndexedEXT(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query,
-    VkQueryControlFlags                         flags,
-    uint32_t                                    index)
+void
+radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
+                             VkQueryControlFlags flags, uint32_t index)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint64_t va = radv_buffer_get_va(pool->bo);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t va = radv_buffer_get_va(pool->bo);
 
-	radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo);
+   radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo);
 
-	emit_query_flush(cmd_buffer, pool);
+   emit_query_flush(cmd_buffer, pool);
 
-	va += pool->stride * query;
+   va += pool->stride * query;
 
-	emit_begin_query(cmd_buffer, pool, va, pool->type, flags, index);
+   emit_begin_query(cmd_buffer, pool, va, pool->type, flags, index);
 }
 
-void radv_CmdBeginQuery(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query,
-    VkQueryControlFlags                         flags)
+void
+radv_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
+                   VkQueryControlFlags flags)
 {
-	radv_CmdBeginQueryIndexedEXT(commandBuffer, queryPool, query, flags, 0);
+   radv_CmdBeginQueryIndexedEXT(commandBuffer, queryPool, query, flags, 0);
 }
 
-void radv_CmdEndQueryIndexedEXT(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query,
-    uint32_t                                    index)
+void
+radv_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
+                           uint32_t index)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
-	uint64_t va = radv_buffer_get_va(pool->bo);
-	uint64_t avail_va = va + pool->availability_offset + 4 * query;
-	va += pool->stride * query;
-
-	/* Do not need to add the pool BO to the list because the query must
-	 * currently be active, which means the BO is already in the list.
-	 */
-	emit_end_query(cmd_buffer, pool, va, avail_va, pool->type, index);
-
-	/*
-	 * For multiview we have to emit a query for each bit in the mask,
-	 * however the first query we emit will get the totals for all the
-	 * operations, so we don't want to get a real value in the other
-	 * queries. This emits a fake begin/end sequence so the waiting
-	 * code gets a completed query value and doesn't hang, but the
-	 * query returns 0.
-	 */
-	if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) {
-		for (unsigned i = 1; i < util_bitcount(cmd_buffer->state.subpass->view_mask); i++) {
-			va += pool->stride;
-			avail_va += 4;
-			emit_begin_query(cmd_buffer, pool, va, pool->type, 0, 0);
-			emit_end_query(cmd_buffer, pool, va, avail_va, pool->type, 0);
-		}
-	}
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+   uint64_t va = radv_buffer_get_va(pool->bo);
+   uint64_t avail_va = va + pool->availability_offset + 4 * query;
+   va += pool->stride * query;
+
+   /* Do not need to add the pool BO to the list because the query must
+    * currently be active, which means the BO is already in the list.
+    */
+   emit_end_query(cmd_buffer, pool, va, avail_va, pool->type, index);
+
+   /*
+    * For multiview we have to emit a query for each bit in the mask,
+    * however the first query we emit will get the totals for all the
+    * operations, so we don't want to get a real value in the other
+    * queries. This emits a fake begin/end sequence so the waiting
+    * code gets a completed query value and doesn't hang, but the
+    * query returns 0.
+    */
+   if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) {
+      for (unsigned i = 1; i < util_bitcount(cmd_buffer->state.subpass->view_mask); i++) {
+         va += pool->stride;
+         avail_va += 4;
+         emit_begin_query(cmd_buffer, pool, va, pool->type, 0, 0);
+         emit_end_query(cmd_buffer, pool, va, avail_va, pool->type, 0);
+      }
+   }
 }
 
-void radv_CmdEndQuery(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query)
+void
+radv_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
 {
-	radv_CmdEndQueryIndexedEXT(commandBuffer, queryPool, query, 0);
+   radv_CmdEndQueryIndexedEXT(commandBuffer, queryPool, query, 0);
 }
 
-void radv_CmdWriteTimestamp(
-    VkCommandBuffer                             commandBuffer,
-    VkPipelineStageFlagBits                     pipelineStage,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query)
+void
+radv_CmdWriteTimestamp(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage,
+                       VkQueryPool queryPool, uint32_t query)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
-	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint64_t va = radv_buffer_get_va(pool->bo);
-	uint64_t query_va = va + pool->stride * query;
-
-	radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo);
-
-	emit_query_flush(cmd_buffer, pool);
-
-	int num_queries = 1;
-	if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask)
-		num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask);
-
-	ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28 * num_queries);
-
-	for (unsigned i = 0; i < num_queries; i++) {
-		switch(pipelineStage) {
-		case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
-			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-			radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
-				    COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
-				    COPY_DATA_DST_SEL(V_370_MEM));
-			radeon_emit(cs, 0);
-			radeon_emit(cs, 0);
-			radeon_emit(cs, query_va);
-			radeon_emit(cs, query_va >> 32);
-			break;
-		default:
-			si_cs_emit_write_event_eop(cs,
-						   cmd_buffer->device->physical_device->rad_info.chip_class,
-						   mec,
-						   V_028A90_BOTTOM_OF_PIPE_TS, 0,
-						   EOP_DST_SEL_MEM,
-						   EOP_DATA_SEL_TIMESTAMP,
-						   query_va, 0,
-						   cmd_buffer->gfx9_eop_bug_va);
-			break;
-		}
-		query_va += pool->stride;
-	}
-
-	cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-					       RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-					       RADV_CMD_FLAG_INV_L2 |
-					       RADV_CMD_FLAG_INV_VCACHE;
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
-		cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-						       RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-	}
-
-	assert(cmd_buffer->cs->cdw <= cdw_max);
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+   bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t va = radv_buffer_get_va(pool->bo);
+   uint64_t query_va = va + pool->stride * query;
+
+   radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo);
+
+   emit_query_flush(cmd_buffer, pool);
+
+   int num_queries = 1;
+   if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask)
+      num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask);
+
+   ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28 * num_queries);
+
+   for (unsigned i = 0; i < num_queries; i++) {
+      switch (pipelineStage) {
+      case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
+         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+         radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
+                            COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | COPY_DATA_DST_SEL(V_370_MEM));
+         radeon_emit(cs, 0);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, query_va);
+         radeon_emit(cs, query_va >> 32);
+         break;
+      default:
+         si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+                                    mec, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
+                                    EOP_DATA_SEL_TIMESTAMP, query_va, 0,
+                                    cmd_buffer->gfx9_eop_bug_va);
+         break;
+      }
+      query_va += pool->stride;
+   }
+
+   cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+                                          RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 |
+                                          RADV_CMD_FLAG_INV_VCACHE;
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+      cmd_buffer->active_query_flush_bits |=
+         RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+   }
+
+   assert(cmd_buffer->cs->cdw <= cdw_max);
 }
diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index 4189e3b3dba..5a05c3ee440 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -29,337 +29,313 @@
 #ifndef RADV_RADEON_WINSYS_H
 #define RADV_RADEON_WINSYS_H
 
-#include <stdio.h>
-#include <stdint.h>
 #include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include <vulkan/vulkan.h>
 #include "amd_family.h"
-#include "util/u_memory.h"
-#include "util/u_math.h"
 
 struct radeon_info;
 struct ac_surf_info;
 struct radeon_surf;
 
 enum radeon_bo_domain { /* bitfield */
-	RADEON_DOMAIN_GTT  = 2,
-	RADEON_DOMAIN_VRAM = 4,
-	RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
-	RADEON_DOMAIN_GDS = 8,
-	RADEON_DOMAIN_OA = 16,
+                        RADEON_DOMAIN_GTT = 2,
+                        RADEON_DOMAIN_VRAM = 4,
+                        RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
+                        RADEON_DOMAIN_GDS = 8,
+                        RADEON_DOMAIN_OA = 16,
 };
 
 enum radeon_bo_flag { /* bitfield */
-	RADEON_FLAG_GTT_WC =        (1 << 0),
-	RADEON_FLAG_CPU_ACCESS =    (1 << 1),
-	RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
-	RADEON_FLAG_VIRTUAL =       (1 << 3),
-	RADEON_FLAG_VA_UNCACHED =   (1 << 4),
-	RADEON_FLAG_IMPLICIT_SYNC = (1 << 5),
-	RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
-	RADEON_FLAG_READ_ONLY =     (1 << 7),
-	RADEON_FLAG_32BIT =         (1 << 8),
-	RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9),
-	RADEON_FLAG_ZERO_VRAM = (1 << 10),
+                      RADEON_FLAG_GTT_WC = (1 << 0),
+                      RADEON_FLAG_CPU_ACCESS = (1 << 1),
+                      RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
+                      RADEON_FLAG_VIRTUAL = (1 << 3),
+                      RADEON_FLAG_VA_UNCACHED = (1 << 4),
+                      RADEON_FLAG_IMPLICIT_SYNC = (1 << 5),
+                      RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
+                      RADEON_FLAG_READ_ONLY = (1 << 7),
+                      RADEON_FLAG_32BIT = (1 << 8),
+                      RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9),
+                      RADEON_FLAG_ZERO_VRAM = (1 << 10),
 };
 
 enum radeon_ctx_priority {
-	RADEON_CTX_PRIORITY_INVALID = -1,
-	RADEON_CTX_PRIORITY_LOW = 0,
-	RADEON_CTX_PRIORITY_MEDIUM,
-	RADEON_CTX_PRIORITY_HIGH,
-	RADEON_CTX_PRIORITY_REALTIME,
+   RADEON_CTX_PRIORITY_INVALID = -1,
+   RADEON_CTX_PRIORITY_LOW = 0,
+   RADEON_CTX_PRIORITY_MEDIUM,
+   RADEON_CTX_PRIORITY_HIGH,
+   RADEON_CTX_PRIORITY_REALTIME,
 };
 
 enum radeon_value_id {
-	RADEON_ALLOCATED_VRAM,
-	RADEON_ALLOCATED_VRAM_VIS,
-	RADEON_ALLOCATED_GTT,
-	RADEON_TIMESTAMP,
-	RADEON_NUM_BYTES_MOVED,
-	RADEON_NUM_EVICTIONS,
-	RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
-	RADEON_VRAM_USAGE,
-	RADEON_VRAM_VIS_USAGE,
-	RADEON_GTT_USAGE,
-	RADEON_GPU_TEMPERATURE,
-	RADEON_CURRENT_SCLK,
-	RADEON_CURRENT_MCLK,
+   RADEON_ALLOCATED_VRAM,
+   RADEON_ALLOCATED_VRAM_VIS,
+   RADEON_ALLOCATED_GTT,
+   RADEON_TIMESTAMP,
+   RADEON_NUM_BYTES_MOVED,
+   RADEON_NUM_EVICTIONS,
+   RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
+   RADEON_VRAM_USAGE,
+   RADEON_VRAM_VIS_USAGE,
+   RADEON_GTT_USAGE,
+   RADEON_GPU_TEMPERATURE,
+   RADEON_CURRENT_SCLK,
+   RADEON_CURRENT_MCLK,
 };
 
 struct radeon_cmdbuf {
-	unsigned cdw;  /* Number of used dwords. */
-	unsigned max_dw; /* Maximum number of dwords. */
-	uint32_t *buf; /* The base pointer of the chunk. */
+   unsigned cdw;    /* Number of used dwords. */
+   unsigned max_dw; /* Maximum number of dwords. */
+   uint32_t *buf;   /* The base pointer of the chunk. */
 };
 
-#define RADEON_SURF_TYPE_MASK                   0xFF
-#define RADEON_SURF_TYPE_SHIFT                  0
-#define     RADEON_SURF_TYPE_1D                     0
-#define     RADEON_SURF_TYPE_2D                     1
-#define     RADEON_SURF_TYPE_3D                     2
-#define     RADEON_SURF_TYPE_CUBEMAP                3
-#define     RADEON_SURF_TYPE_1D_ARRAY               4
-#define     RADEON_SURF_TYPE_2D_ARRAY               5
-#define RADEON_SURF_MODE_MASK                   0xFF
-#define RADEON_SURF_MODE_SHIFT                  8
-
-#define RADEON_SURF_GET(v, field)   (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
-#define RADEON_SURF_SET(v, field)   (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
-#define RADEON_SURF_CLR(v, field)   ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))
+#define RADEON_SURF_TYPE_MASK     0xFF
+#define RADEON_SURF_TYPE_SHIFT    0
+#define RADEON_SURF_TYPE_1D       0
+#define RADEON_SURF_TYPE_2D       1
+#define RADEON_SURF_TYPE_3D       2
+#define RADEON_SURF_TYPE_CUBEMAP  3
+#define RADEON_SURF_TYPE_1D_ARRAY 4
+#define RADEON_SURF_TYPE_2D_ARRAY 5
+#define RADEON_SURF_MODE_MASK     0xFF
+#define RADEON_SURF_MODE_SHIFT    8
+
+#define RADEON_SURF_GET(v, field)                                                                  \
+   (((v) >> RADEON_SURF_##field##_SHIFT) & RADEON_SURF_##field##_MASK)
+#define RADEON_SURF_SET(v, field) (((v)&RADEON_SURF_##field##_MASK) << RADEON_SURF_##field##_SHIFT)
+#define RADEON_SURF_CLR(v, field)                                                                  \
+   ((v) & ~(RADEON_SURF_##field##_MASK << RADEON_SURF_##field##_SHIFT))
 
 enum radeon_bo_layout {
-	RADEON_LAYOUT_LINEAR = 0,
-	RADEON_LAYOUT_TILED,
-	RADEON_LAYOUT_SQUARETILED,
+   RADEON_LAYOUT_LINEAR = 0,
+   RADEON_LAYOUT_TILED,
+   RADEON_LAYOUT_SQUARETILED,
 
-	RADEON_LAYOUT_UNKNOWN
+   RADEON_LAYOUT_UNKNOWN
 };
 
 /* Tiling info for display code, DRI sharing, and other data. */
 struct radeon_bo_metadata {
-	/* Tiling flags describing the texture layout for display code
-	 * and DRI sharing.
-	 */
-	union {
-		struct {
-			enum radeon_bo_layout   microtile;
-			enum radeon_bo_layout   macrotile;
-			unsigned                pipe_config;
-			unsigned                bankw;
-			unsigned                bankh;
-			unsigned                tile_split;
-			unsigned                mtilea;
-			unsigned                num_banks;
-			unsigned                stride;
-			bool                    scanout;
-		} legacy;
-
-		struct {
-			/* surface flags */
-			unsigned swizzle_mode:5;
-			bool scanout;
-			uint32_t dcc_offset_256b;
-			uint32_t dcc_pitch_max;
-			bool dcc_independent_64b_blocks;
-			bool dcc_independent_128b_blocks;
-			unsigned dcc_max_compressed_block_size;
-		} gfx9;
-	} u;
-
-	/* Additional metadata associated with the buffer, in bytes.
-	 * The maximum size is 64 * 4. This is opaque for the winsys & kernel.
-	 * Supported by amdgpu only.
-	 */
-	uint32_t                size_metadata;
-	uint32_t                metadata[64];
+   /* Tiling flags describing the texture layout for display code
+    * and DRI sharing.
+    */
+   union {
+      struct {
+         enum radeon_bo_layout microtile;
+         enum radeon_bo_layout macrotile;
+         unsigned pipe_config;
+         unsigned bankw;
+         unsigned bankh;
+         unsigned tile_split;
+         unsigned mtilea;
+         unsigned num_banks;
+         unsigned stride;
+         bool scanout;
+      } legacy;
+
+      struct {
+         /* surface flags */
+         unsigned swizzle_mode : 5;
+         bool scanout;
+         uint32_t dcc_offset_256b;
+         uint32_t dcc_pitch_max;
+         bool dcc_independent_64b_blocks;
+         bool dcc_independent_128b_blocks;
+         unsigned dcc_max_compressed_block_size;
+      } gfx9;
+   } u;
+
+   /* Additional metadata associated with the buffer, in bytes.
+    * The maximum size is 64 * 4. This is opaque for the winsys & kernel.
+    * Supported by amdgpu only.
+    */
+   uint32_t size_metadata;
+   uint32_t metadata[64];
 };
 
 struct radeon_winsys_ctx;
 
 struct radeon_winsys_bo {
-	uint64_t va;
-	bool is_local;
-	bool vram_no_cpu_access;
-	bool use_global_list;
-	enum radeon_bo_domain initial_domain;
+   uint64_t va;
+   bool is_local;
+   bool vram_no_cpu_access;
+   bool use_global_list;
+   enum radeon_bo_domain initial_domain;
 };
 struct radv_winsys_sem_counts {
-	uint32_t syncobj_count;
-	uint32_t syncobj_reset_count; /* for wait only, whether to reset the syncobj */
-	uint32_t timeline_syncobj_count;
-	uint32_t *syncobj;
-	uint64_t *points;
+   uint32_t syncobj_count;
+   uint32_t syncobj_reset_count; /* for wait only, whether to reset the syncobj */
+   uint32_t timeline_syncobj_count;
+   uint32_t *syncobj;
+   uint64_t *points;
 };
 
 struct radv_winsys_sem_info {
-	bool cs_emit_signal;
-	bool cs_emit_wait;
-	struct radv_winsys_sem_counts wait;
-	struct radv_winsys_sem_counts signal;
+   bool cs_emit_signal;
+   bool cs_emit_wait;
+   struct radv_winsys_sem_counts wait;
+   struct radv_winsys_sem_counts signal;
 };
 
 struct radv_winsys_bo_list {
-	struct radeon_winsys_bo **bos;
-	unsigned count;
+   struct radeon_winsys_bo **bos;
+   unsigned count;
 };
 
 /* Kernel effectively allows 0-31. This sets some priorities for fixed
  * functionality buffers */
 enum {
-	RADV_BO_PRIORITY_APPLICATION_MAX = 28,
-
-	/* virtual buffers have 0 priority since the priority is not used. */
-	RADV_BO_PRIORITY_VIRTUAL = 0,
-
-	RADV_BO_PRIORITY_METADATA = 10,
-	/* This should be considerably lower than most of the stuff below,
-	 * but how much lower is hard to say since we don't know application
-	 * assignments. Put it pretty high since it is GTT anyway. */
-	RADV_BO_PRIORITY_QUERY_POOL = 29,
-
-	RADV_BO_PRIORITY_DESCRIPTOR = 30,
-	RADV_BO_PRIORITY_UPLOAD_BUFFER = 30,
-	RADV_BO_PRIORITY_FENCE = 30,
-	RADV_BO_PRIORITY_SHADER = 31,
-	RADV_BO_PRIORITY_SCRATCH = 31,
-	RADV_BO_PRIORITY_CS = 31,
+   RADV_BO_PRIORITY_APPLICATION_MAX = 28,
+
+   /* virtual buffers have 0 priority since the priority is not used. */
+   RADV_BO_PRIORITY_VIRTUAL = 0,
+
+   RADV_BO_PRIORITY_METADATA = 10,
+   /* This should be considerably lower than most of the stuff below,
+    * but how much lower is hard to say since we don't know application
+    * assignments. Put it pretty high since it is GTT anyway. */
+   RADV_BO_PRIORITY_QUERY_POOL = 29,
+
+   RADV_BO_PRIORITY_DESCRIPTOR = 30,
+   RADV_BO_PRIORITY_UPLOAD_BUFFER = 30,
+   RADV_BO_PRIORITY_FENCE = 30,
+   RADV_BO_PRIORITY_SHADER = 31,
+   RADV_BO_PRIORITY_SCRATCH = 31,
+   RADV_BO_PRIORITY_CS = 31,
 };
 
 struct radeon_winsys {
-	void (*destroy)(struct radeon_winsys *ws);
-
-	void (*query_info)(struct radeon_winsys *ws,
-			   struct radeon_info *info);
+   void (*destroy)(struct radeon_winsys *ws);
 
-	uint64_t (*query_value)(struct radeon_winsys *ws,
-				enum radeon_value_id value);
+   void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info);
 
-	bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset,
-			       unsigned num_registers, uint32_t *out);
+   uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value);
 
-	const char *(*get_chip_name)(struct radeon_winsys *ws);
+   bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers,
+                          uint32_t *out);
 
-	struct radeon_winsys_bo *(*buffer_create)(struct radeon_winsys *ws,
-						  uint64_t size,
-						  unsigned alignment,
-						  enum radeon_bo_domain domain,
-						  enum radeon_bo_flag flags,
-						  unsigned priority);
+   const char *(*get_chip_name)(struct radeon_winsys *ws);
 
-	void (*buffer_destroy)(struct radeon_winsys *ws,
-			       struct radeon_winsys_bo *bo);
-	void *(*buffer_map)(struct radeon_winsys_bo *bo);
+   struct radeon_winsys_bo *(*buffer_create)(struct radeon_winsys *ws, uint64_t size,
+                                             unsigned alignment, enum radeon_bo_domain domain,
+                                             enum radeon_bo_flag flags, unsigned priority);
 
-	struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws,
-						    void *pointer,
-						    uint64_t size,
-						    unsigned priority);
+   void (*buffer_destroy)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo);
+   void *(*buffer_map)(struct radeon_winsys_bo *bo);
 
-	struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws,
-						   int fd,
-						   unsigned priority,
-						   uint64_t *alloc_size);
+   struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer,
+                                               uint64_t size, unsigned priority);
 
-	bool (*buffer_get_fd)(struct radeon_winsys *ws,
-			      struct radeon_winsys_bo *bo,
-			      int *fd);
+   struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws, int fd, unsigned priority,
+                                              uint64_t *alloc_size);
 
-	bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd,
-	                                 enum radeon_bo_domain *domains,
-	                                 enum radeon_bo_flag *flags);
+   bool (*buffer_get_fd)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, int *fd);
 
-	void (*buffer_unmap)(struct radeon_winsys_bo *bo);
+   bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd,
+                                    enum radeon_bo_domain *domains, enum radeon_bo_flag *flags);
 
-	void (*buffer_set_metadata)(struct radeon_winsys *ws,
-				    struct radeon_winsys_bo *bo,
-				    struct radeon_bo_metadata *md);
-	void (*buffer_get_metadata)(struct radeon_winsys *ws,
-				    struct radeon_winsys_bo *bo,
-				    struct radeon_bo_metadata *md);
+   void (*buffer_unmap)(struct radeon_winsys_bo *bo);
 
-	VkResult (*buffer_virtual_bind)(struct radeon_winsys *ws,
-					struct radeon_winsys_bo *parent,
-					uint64_t offset, uint64_t size,
-					struct radeon_winsys_bo *bo, uint64_t bo_offset);
+   void (*buffer_set_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
+                               struct radeon_bo_metadata *md);
+   void (*buffer_get_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
+                               struct radeon_bo_metadata *md);
 
-	VkResult (*buffer_make_resident)(struct radeon_winsys *ws,
-					 struct radeon_winsys_bo *bo,
-					 bool resident);
+   VkResult (*buffer_virtual_bind)(struct radeon_winsys *ws, struct radeon_winsys_bo *parent,
+                                   uint64_t offset, uint64_t size, struct radeon_winsys_bo *bo,
+                                   uint64_t bo_offset);
 
-	VkResult (*ctx_create)(struct radeon_winsys *ws,
-	                       enum radeon_ctx_priority priority,
-	                       struct radeon_winsys_ctx **ctx);
-	void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
+   VkResult (*buffer_make_resident)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
+                                    bool resident);
 
-	bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx,
-	                      enum ring_type ring_type, int ring_index);
+   VkResult (*ctx_create)(struct radeon_winsys *ws, enum radeon_ctx_priority priority,
+                          struct radeon_winsys_ctx **ctx);
+   void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
 
-	struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws,
-					      enum ring_type ring_type);
+   bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, int ring_index);
 
-	void (*cs_destroy)(struct radeon_cmdbuf *cs);
+   struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type);
 
-	void (*cs_reset)(struct radeon_cmdbuf *cs);
+   void (*cs_destroy)(struct radeon_cmdbuf *cs);
 
-	VkResult (*cs_finalize)(struct radeon_cmdbuf *cs);
+   void (*cs_reset)(struct radeon_cmdbuf *cs);
 
-	void (*cs_grow)(struct radeon_cmdbuf * cs, size_t min_size);
+   VkResult (*cs_finalize)(struct radeon_cmdbuf *cs);
 
-	VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx,
-			      int queue_index,
-			      struct radeon_cmdbuf **cs_array,
-			      unsigned cs_count,
-			      struct radeon_cmdbuf *initial_preamble_cs,
-			      struct radeon_cmdbuf *continue_preamble_cs,
-			      struct radv_winsys_sem_info *sem_info,
-			      bool can_patch);
+   void (*cs_grow)(struct radeon_cmdbuf *cs, size_t min_size);
 
-	void (*cs_add_buffer)(struct radeon_cmdbuf *cs,
-			      struct radeon_winsys_bo *bo);
+   VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx, int queue_index,
+                         struct radeon_cmdbuf **cs_array, unsigned cs_count,
+                         struct radeon_cmdbuf *initial_preamble_cs,
+                         struct radeon_cmdbuf *continue_preamble_cs,
+                         struct radv_winsys_sem_info *sem_info, bool can_patch);
 
-	void (*cs_execute_secondary)(struct radeon_cmdbuf *parent,
-				    struct radeon_cmdbuf *child);
+   void (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct radeon_winsys_bo *bo);
 
-	void (*cs_dump)(struct radeon_cmdbuf *cs, FILE* file, const int *trace_ids, int trace_id_count);
+   void (*cs_execute_secondary)(struct radeon_cmdbuf *parent, struct radeon_cmdbuf *child);
 
-	void (*dump_bo_ranges)(struct radeon_winsys *ws, FILE *file);
+   void (*cs_dump)(struct radeon_cmdbuf *cs, FILE *file, const int *trace_ids, int trace_id_count);
 
-	void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file);
+   void (*dump_bo_ranges)(struct radeon_winsys *ws, FILE *file);
 
-	int (*surface_init)(struct radeon_winsys *ws,
-			    const struct ac_surf_info *surf_info,
-			    struct radeon_surf *surf);
+   void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file);
 
-	int (*create_syncobj)(struct radeon_winsys *ws, bool create_signaled,
-			      uint32_t *handle);
-	void (*destroy_syncobj)(struct radeon_winsys *ws, uint32_t handle);
+   int (*surface_init)(struct radeon_winsys *ws, const struct ac_surf_info *surf_info,
+                       struct radeon_surf *surf);
 
-	void (*reset_syncobj)(struct radeon_winsys *ws, uint32_t handle);
-	void (*signal_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t point);
-	VkResult (*query_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t *point);
-	bool (*wait_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, uint32_t handle_count,
-			     bool wait_all, uint64_t timeout);
-	bool (*wait_timeline_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, const uint64_t *points,
-	                              uint32_t handle_count, bool wait_all, bool available, uint64_t timeout);
+   int (*create_syncobj)(struct radeon_winsys *ws, bool create_signaled, uint32_t *handle);
+   void (*destroy_syncobj)(struct radeon_winsys *ws, uint32_t handle);
 
-	int (*export_syncobj)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
-	int (*import_syncobj)(struct radeon_winsys *ws, int fd, uint32_t *syncobj);
+   void (*reset_syncobj)(struct radeon_winsys *ws, uint32_t handle);
+   void (*signal_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t point);
+   VkResult (*query_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t *point);
+   bool (*wait_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, uint32_t handle_count,
+                        bool wait_all, uint64_t timeout);
+   bool (*wait_timeline_syncobj)(struct radeon_winsys *ws, const uint32_t *handles,
+                                 const uint64_t *points, uint32_t handle_count, bool wait_all,
+                                 bool available, uint64_t timeout);
 
-	int (*export_syncobj_to_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
+   int (*export_syncobj)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
+   int (*import_syncobj)(struct radeon_winsys *ws, int fd, uint32_t *syncobj);
 
-	/* Note that this, unlike the normal import, uses an existing syncobj. */
-	int (*import_syncobj_from_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int fd);
+   int (*export_syncobj_to_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
 
+   /* Note that this, unlike the normal import, uses an existing syncobj. */
+   int (*import_syncobj_from_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int fd);
 };
 
-static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
+static inline void
+radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
 {
-	cs->buf[cs->cdw++] = value;
+   cs->buf[cs->cdw++] = value;
 }
 
-static inline void radeon_emit_array(struct radeon_cmdbuf *cs,
-				     const uint32_t *values, unsigned count)
+static inline void
+radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values, unsigned count)
 {
-	memcpy(cs->buf + cs->cdw, values, count * 4);
-	cs->cdw += count;
+   memcpy(cs->buf + cs->cdw, values, count * 4);
+   cs->cdw += count;
 }
 
-static inline uint64_t radv_buffer_get_va(struct radeon_winsys_bo *bo)
+static inline uint64_t
+radv_buffer_get_va(struct radeon_winsys_bo *bo)
 {
-	return bo->va;
+   return bo->va;
 }
 
-static inline void radv_cs_add_buffer(struct radeon_winsys *ws,
-				      struct radeon_cmdbuf *cs,
-				      struct radeon_winsys_bo *bo)
+static inline void
+radv_cs_add_buffer(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *bo)
 {
-	if (bo->use_global_list)
-		return;
+   if (bo->use_global_list)
+      return;
 
-	ws->cs_add_buffer(cs, bo);
+   ws->cs_add_buffer(cs, bo);
 }
 
 enum radeon_bo_domain radv_cmdbuffer_domain(const struct radeon_info *info, uint32_t perftest);
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index e23bf5f6ad9..eb29a794099 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -25,689 +25,652 @@
  * IN THE SOFTWARE.
  */
 
+#include "radv_shader.h"
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "spirv/nir_spirv.h"
 #include "util/memstream.h"
 #include "util/mesa-sha1.h"
 #include "util/u_atomic.h"
 #include "radv_debug.h"
 #include "radv_private.h"
-#include "radv_shader.h"
-#include "radv_shader_helper.h"
 #include "radv_shader_args.h"
-#include "nir/nir.h"
-#include "nir/nir_builder.h"
-#include "spirv/nir_spirv.h"
+#include "radv_shader_helper.h"
 
-#include "sid.h"
+#include "util/debug.h"
 #include "ac_binary.h"
+#include "ac_exp_param.h"
 #include "ac_llvm_util.h"
 #include "ac_nir.h"
 #include "ac_nir_to_llvm.h"
 #include "ac_rtld.h"
+#include "aco_interface.h"
+#include "sid.h"
 #include "vk_format.h"
-#include "util/debug.h"
-#include "ac_exp_param.h"
 
 static const struct nir_shader_compiler_options nir_options = {
-	.vertex_id_zero_based = true,
-	.lower_scmp = true,
-	.lower_flrp16 = true,
-	.lower_flrp32 = true,
-	.lower_flrp64 = true,
-	.lower_device_index_to_zero = true,
-	.lower_fdiv = true,
-	.lower_fmod = true,
-	.lower_ineg = true,
-	.lower_bitfield_insert_to_bitfield_select = true,
-	.lower_bitfield_extract = true,
-	.lower_pack_snorm_2x16 = true,
-	.lower_pack_snorm_4x8 = true,
-	.lower_pack_unorm_2x16 = true,
-	.lower_pack_unorm_4x8 = true,
-	.lower_pack_half_2x16 = true,
-	.lower_pack_64_2x32 = true,
-	.lower_pack_64_4x16 = true,
-	.lower_pack_32_2x16 = true,
-	.lower_unpack_snorm_2x16 = true,
-	.lower_unpack_snorm_4x8 = true,
-	.lower_unpack_unorm_2x16 = true,
-	.lower_unpack_unorm_4x8 = true,
-	.lower_unpack_half_2x16 = true,
-	.lower_extract_byte = true,
-	.lower_extract_word = true,
-	.lower_ffma16 = true,
-	.lower_ffma32 = true,
-	.lower_ffma64 = true,
-	.lower_fpow = true,
-	.lower_mul_2x32_64 = true,
-	.lower_rotate = true,
-	.has_fsub = true,
-	.has_isub = true,
-	.use_scoped_barrier = true,
-	.max_unroll_iterations = 32,
-	.max_unroll_iterations_aggressive = 128,
-	.use_interpolated_input_intrinsics = true,
-	.vectorize_vec2_16bit = true,
-	/* nir_lower_int64() isn't actually called for the LLVM backend, but
-	 * this helps the loop unrolling heuristics. */
-	.lower_int64_options = nir_lower_imul64 |
-                               nir_lower_imul_high64 |
-                               nir_lower_imul_2x32_64 |
-                               nir_lower_divmod64 |
-                               nir_lower_minmax64 |
-                               nir_lower_iabs64,
-	.lower_doubles_options = nir_lower_drcp |
-				 nir_lower_dsqrt |
-				 nir_lower_drsq |
-				 nir_lower_ddiv,
+   .vertex_id_zero_based = true,
+   .lower_scmp = true,
+   .lower_flrp16 = true,
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_device_index_to_zero = true,
+   .lower_fdiv = true,
+   .lower_fmod = true,
+   .lower_ineg = true,
+   .lower_bitfield_insert_to_bitfield_select = true,
+   .lower_bitfield_extract = true,
+   .lower_pack_snorm_2x16 = true,
+   .lower_pack_snorm_4x8 = true,
+   .lower_pack_unorm_2x16 = true,
+   .lower_pack_unorm_4x8 = true,
+   .lower_pack_half_2x16 = true,
+   .lower_pack_64_2x32 = true,
+   .lower_pack_64_4x16 = true,
+   .lower_pack_32_2x16 = true,
+   .lower_unpack_snorm_2x16 = true,
+   .lower_unpack_snorm_4x8 = true,
+   .lower_unpack_unorm_2x16 = true,
+   .lower_unpack_unorm_4x8 = true,
+   .lower_unpack_half_2x16 = true,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+   .lower_ffma16 = true,
+   .lower_ffma32 = true,
+   .lower_ffma64 = true,
+   .lower_fpow = true,
+   .lower_mul_2x32_64 = true,
+   .lower_rotate = true,
+   .has_fsub = true,
+   .has_isub = true,
+   .use_scoped_barrier = true,
+   .max_unroll_iterations = 32,
+   .max_unroll_iterations_aggressive = 128,
+   .use_interpolated_input_intrinsics = true,
+   .vectorize_vec2_16bit = true,
+   /* nir_lower_int64() isn't actually called for the LLVM backend, but
+    * this helps the loop unrolling heuristics. */
+   .lower_int64_options = nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 |
+                          nir_lower_divmod64 | nir_lower_minmax64 | nir_lower_iabs64,
+   .lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv,
    .divergence_analysis_options = nir_divergence_view_index_uniform,
 };
 
 bool
-radv_can_dump_shader(struct radv_device *device,
-		     struct vk_shader_module *module,
-		     bool is_gs_copy_shader)
+radv_can_dump_shader(struct radv_device *device, struct vk_shader_module *module,
+                     bool is_gs_copy_shader)
 {
-	if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
-		return false;
-	if (module)
-		return !module->nir ||
-			(device->instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS);
+   if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
+      return false;
+   if (module)
+      return !module->nir || (device->instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS);
 
-	return is_gs_copy_shader;
+   return is_gs_copy_shader;
 }
 
 bool
-radv_can_dump_shader_stats(struct radv_device *device,
-			   struct vk_shader_module *module)
+radv_can_dump_shader_stats(struct radv_device *device, struct vk_shader_module *module)
 {
-	/* Only dump non-meta shader stats. */
-	return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS &&
-	       module && !module->nir;
+   /* Only dump non-meta shader stats. */
+   return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS && module && !module->nir;
 }
 
 void
 radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
-		  bool optimize_conservatively, bool allow_copies)
+                  bool optimize_conservatively, bool allow_copies)
 {
-        bool progress;
-        unsigned lower_flrp =
-                (shader->options->lower_flrp16 ? 16 : 0) |
-                (shader->options->lower_flrp32 ? 32 : 0) |
-                (shader->options->lower_flrp64 ? 64 : 0);
-
-        do {
-                progress = false;
-
-		NIR_PASS(progress, shader, nir_split_array_vars, nir_var_function_temp);
-		NIR_PASS(progress, shader, nir_shrink_vec_array_vars, nir_var_function_temp);
-
-                NIR_PASS_V(shader, nir_lower_vars_to_ssa);
-
-		if (allow_copies) {
-			/* Only run this pass in the first call to
-			 * radv_optimize_nir.  Later calls assume that we've
-			 * lowered away any copy_deref instructions and we
-			 *  don't want to introduce any more.
-			*/
-			NIR_PASS(progress, shader, nir_opt_find_array_copies);
-		}
-
-		NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
-		NIR_PASS(progress, shader, nir_opt_dead_write_vars);
-		NIR_PASS(progress, shader, nir_remove_dead_variables,
-			 nir_var_function_temp | nir_var_shader_in | nir_var_shader_out,
-			 NULL);
-
-                NIR_PASS_V(shader, nir_lower_alu_to_scalar, NULL, NULL);
-                NIR_PASS_V(shader, nir_lower_phis_to_scalar);
-
-                NIR_PASS(progress, shader, nir_copy_prop);
-                NIR_PASS(progress, shader, nir_opt_remove_phis);
-                NIR_PASS(progress, shader, nir_opt_dce);
-                if (nir_opt_trivial_continues(shader)) {
-                        progress = true;
-                        NIR_PASS(progress, shader, nir_copy_prop);
-			NIR_PASS(progress, shader, nir_opt_remove_phis);
-                        NIR_PASS(progress, shader, nir_opt_dce);
-                }
-                NIR_PASS(progress, shader, nir_opt_if, true);
-                NIR_PASS(progress, shader, nir_opt_dead_cf);
-                NIR_PASS(progress, shader, nir_opt_cse);
-                NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
-                NIR_PASS(progress, shader, nir_opt_constant_folding);
-                NIR_PASS(progress, shader, nir_opt_algebraic);
-
-                if (lower_flrp != 0) {
-                        bool lower_flrp_progress = false;
-                        NIR_PASS(lower_flrp_progress,
-                                 shader,
-                                 nir_lower_flrp,
-                                 lower_flrp,
-                                 false /* always_precise */);
-                        if (lower_flrp_progress) {
-                                NIR_PASS(progress, shader,
-                                         nir_opt_constant_folding);
-                                progress = true;
-                        }
-
-                        /* Nothing should rematerialize any flrps, so we only
-                         * need to do this lowering once.
-                         */
-                        lower_flrp = 0;
-                }
-
-                NIR_PASS(progress, shader, nir_opt_undef);
-                NIR_PASS(progress, shader, nir_opt_shrink_vectors,
-                         !device->instance->disable_shrink_image_store);
-                if (shader->options->max_unroll_iterations) {
-                        NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
-                }
-        } while (progress && !optimize_conservatively);
-
-	NIR_PASS(progress, shader, nir_opt_conditional_discard);
-        NIR_PASS(progress, shader, nir_opt_move, nir_move_load_ubo);
+   bool progress;
+   unsigned lower_flrp = (shader->options->lower_flrp16 ? 16 : 0) |
+                         (shader->options->lower_flrp32 ? 32 : 0) |
+                         (shader->options->lower_flrp64 ? 64 : 0);
+
+   do {
+      progress = false;
+
+      NIR_PASS(progress, shader, nir_split_array_vars, nir_var_function_temp);
+      NIR_PASS(progress, shader, nir_shrink_vec_array_vars, nir_var_function_temp);
+
+      NIR_PASS_V(shader, nir_lower_vars_to_ssa);
+
+      if (allow_copies) {
+         /* Only run this pass in the first call to
+          * radv_optimize_nir.  Later calls assume that we've
+          * lowered away any copy_deref instructions and we
+          *  don't want to introduce any more.
+          */
+         NIR_PASS(progress, shader, nir_opt_find_array_copies);
+      }
+
+      NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
+      NIR_PASS(progress, shader, nir_opt_dead_write_vars);
+      NIR_PASS(progress, shader, nir_remove_dead_variables,
+               nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
+
+      NIR_PASS_V(shader, nir_lower_alu_to_scalar, NULL, NULL);
+      NIR_PASS_V(shader, nir_lower_phis_to_scalar);
+
+      NIR_PASS(progress, shader, nir_copy_prop);
+      NIR_PASS(progress, shader, nir_opt_remove_phis);
+      NIR_PASS(progress, shader, nir_opt_dce);
+      if (nir_opt_trivial_continues(shader)) {
+         progress = true;
+         NIR_PASS(progress, shader, nir_copy_prop);
+         NIR_PASS(progress, shader, nir_opt_remove_phis);
+         NIR_PASS(progress, shader, nir_opt_dce);
+      }
+      NIR_PASS(progress, shader, nir_opt_if, true);
+      NIR_PASS(progress, shader, nir_opt_dead_cf);
+      NIR_PASS(progress, shader, nir_opt_cse);
+      NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
+      NIR_PASS(progress, shader, nir_opt_constant_folding);
+      NIR_PASS(progress, shader, nir_opt_algebraic);
+
+      if (lower_flrp != 0) {
+         bool lower_flrp_progress = false;
+         NIR_PASS(lower_flrp_progress, shader, nir_lower_flrp, lower_flrp,
+                  false /* always_precise */);
+         if (lower_flrp_progress) {
+            NIR_PASS(progress, shader, nir_opt_constant_folding);
+            progress = true;
+         }
+
+         /* Nothing should rematerialize any flrps, so we only
+          * need to do this lowering once.
+          */
+         lower_flrp = 0;
+      }
+
+      NIR_PASS(progress, shader, nir_opt_undef);
+      NIR_PASS(progress, shader, nir_opt_shrink_vectors,
+               !device->instance->disable_shrink_image_store);
+      if (shader->options->max_unroll_iterations) {
+         NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
+      }
+   } while (progress && !optimize_conservatively);
+
+   NIR_PASS(progress, shader, nir_opt_conditional_discard);
+   NIR_PASS(progress, shader, nir_opt_move, nir_move_load_ubo);
 }
 
 static void
 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
 {
-	assert(glsl_type_is_vector_or_scalar(type));
+   assert(glsl_type_is_vector_or_scalar(type));
 
-	uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
-	unsigned length = glsl_get_vector_elements(type);
-	*size = comp_size * length,
-	*align = comp_size;
+   uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+   unsigned length = glsl_get_vector_elements(type);
+   *size = comp_size * length, *align = comp_size;
 }
 
 struct radv_shader_debug_data {
-	struct radv_device *device;
-	const struct vk_shader_module *module;
+   struct radv_device *device;
+   const struct vk_shader_module *module;
 };
 
-static void radv_spirv_nir_debug(void *private_data,
-				 enum nir_spirv_debug_level level,
-				 size_t spirv_offset,
-				 const char *message)
+static void
+radv_spirv_nir_debug(void *private_data, enum nir_spirv_debug_level level, size_t spirv_offset,
+                     const char *message)
 {
-	struct radv_shader_debug_data *debug_data = private_data;
-	struct radv_instance *instance = debug_data->device->instance;
+   struct radv_shader_debug_data *debug_data = private_data;
+   struct radv_instance *instance = debug_data->device->instance;
 
-	static const VkDebugReportFlagsEXT vk_flags[] = {
-		[NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,
-		[NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,
-		[NIR_SPIRV_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
-	};
-	char buffer[256];
+   static const VkDebugReportFlagsEXT vk_flags[] = {
+      [NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,
+      [NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,
+      [NIR_SPIRV_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
+   };
+   char buffer[256];
 
-	snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s",
-		 (unsigned long)spirv_offset, message);
+   snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s", (unsigned long)spirv_offset, message);
 
-	vk_debug_report(&instance->vk, vk_flags[level],
-			&debug_data->module->base, 0, 0, "radv", buffer);
+   vk_debug_report(&instance->vk, vk_flags[level], &debug_data->module->base, 0, 0, "radv", buffer);
 }
 
-static void radv_compiler_debug(void *private_data,
-				enum radv_compiler_debug_level level,
-				const char *message)
+static void
+radv_compiler_debug(void *private_data, enum radv_compiler_debug_level level, const char *message)
 {
-	struct radv_shader_debug_data *debug_data = private_data;
-	struct radv_instance *instance = debug_data->device->instance;
-
-	static const VkDebugReportFlagsEXT vk_flags[] = {
-		[RADV_COMPILER_DEBUG_LEVEL_PERFWARN] = VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
-		[RADV_COMPILER_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
-	};
-
-	/* VK_DEBUG_REPORT_DEBUG_BIT_EXT specifies diagnostic information
-	 * from the implementation and layers.
-	 */
-	vk_debug_report(&instance->vk,
-			vk_flags[level] | VK_DEBUG_REPORT_DEBUG_BIT_EXT,
-			&debug_data->module->base, 0, 0, "radv", message);
+   struct radv_shader_debug_data *debug_data = private_data;
+   struct radv_instance *instance = debug_data->device->instance;
+
+   static const VkDebugReportFlagsEXT vk_flags[] = {
+      [RADV_COMPILER_DEBUG_LEVEL_PERFWARN] = VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
+      [RADV_COMPILER_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
+   };
+
+   /* VK_DEBUG_REPORT_DEBUG_BIT_EXT specifies diagnostic information
+    * from the implementation and layers.
+    */
+   vk_debug_report(&instance->vk, vk_flags[level] | VK_DEBUG_REPORT_DEBUG_BIT_EXT,
+                   &debug_data->module->base, 0, 0, "radv", message);
 }
 
 static void
 mark_geom_invariant(nir_shader *nir)
 {
-	nir_foreach_shader_out_variable(var, nir) {
-		switch (var->data.location) {
-		case VARYING_SLOT_POS:
-		case VARYING_SLOT_PSIZ:
-		case VARYING_SLOT_CLIP_DIST0:
-		case VARYING_SLOT_CLIP_DIST1:
-		case VARYING_SLOT_CULL_DIST0:
-		case VARYING_SLOT_CULL_DIST1:
-		case VARYING_SLOT_TESS_LEVEL_OUTER:
-		case VARYING_SLOT_TESS_LEVEL_INNER:
-			var->data.invariant = true;
-			break;
-		default:
-			break;
-		}
-	}
+   nir_foreach_shader_out_variable(var, nir)
+   {
+      switch (var->data.location) {
+      case VARYING_SLOT_POS:
+      case VARYING_SLOT_PSIZ:
+      case VARYING_SLOT_CLIP_DIST0:
+      case VARYING_SLOT_CLIP_DIST1:
+      case VARYING_SLOT_CULL_DIST0:
+      case VARYING_SLOT_CULL_DIST1:
+      case VARYING_SLOT_TESS_LEVEL_OUTER:
+      case VARYING_SLOT_TESS_LEVEL_INNER:
+         var->data.invariant = true;
+         break;
+      default:
+         break;
+      }
+   }
 }
 
 static bool
 lower_intrinsics(nir_shader *nir, const struct radv_pipeline_key *key)
 {
-	nir_function_impl *entry = nir_shader_get_entrypoint(nir);
-	bool progress = false;
-	nir_builder b;
-
-	nir_builder_init(&b, entry);
-
-	nir_foreach_block(block, entry) {
-		nir_foreach_instr_safe(instr, block) {
-			if (instr->type != nir_instr_type_intrinsic)
-				continue;
-
-			nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-			b.cursor = nir_before_instr(&intrin->instr);
-
-			nir_ssa_def *def = NULL;
-			if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
-				def = nir_vec2(&b, nir_channel(&b, intrin->src[0].ssa, 0),
-						   nir_imm_int(&b, 0));
-			} else if (intrin->intrinsic == nir_intrinsic_is_sparse_texels_resident) {
-				def = nir_ieq_imm(&b, intrin->src[0].ssa, 0);
-			} else if (intrin->intrinsic == nir_intrinsic_sparse_residency_code_and) {
-				def = nir_ior(&b, intrin->src[0].ssa, intrin->src[1].ssa);
-			} else if (intrin->intrinsic == nir_intrinsic_load_view_index &&
-				   !key->has_multiview_view_index) {
-				def = nir_imm_zero(&b, 1, 32);
-			} else {
-				continue;
-			}
-
-			nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-						 def);
-
-			nir_instr_remove(instr);
-			progress = true;
-		}
-	}
-
-	return progress;
+   nir_function_impl *entry = nir_shader_get_entrypoint(nir);
+   bool progress = false;
+   nir_builder b;
+
+   nir_builder_init(&b, entry);
+
+   nir_foreach_block (block, entry) {
+      nir_foreach_instr_safe (instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         b.cursor = nir_before_instr(&intrin->instr);
+
+         nir_ssa_def *def = NULL;
+         if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
+            def = nir_vec2(&b, nir_channel(&b, intrin->src[0].ssa, 0), nir_imm_int(&b, 0));
+         } else if (intrin->intrinsic == nir_intrinsic_is_sparse_texels_resident) {
+            def = nir_ieq_imm(&b, intrin->src[0].ssa, 0);
+         } else if (intrin->intrinsic == nir_intrinsic_sparse_residency_code_and) {
+            def = nir_ior(&b, intrin->src[0].ssa, intrin->src[1].ssa);
+         } else if (intrin->intrinsic == nir_intrinsic_load_view_index &&
+                    !key->has_multiview_view_index) {
+            def = nir_imm_zero(&b, 1, 32);
+         } else {
+            continue;
+         }
+
+         nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def);
+
+         nir_instr_remove(instr);
+         progress = true;
+      }
+   }
+
+   return progress;
 }
 
 nir_shader *
-radv_shader_compile_to_nir(struct radv_device *device,
-			   struct vk_shader_module *module,
-			   const char *entrypoint_name,
-			   gl_shader_stage stage,
-			   const VkSpecializationInfo *spec_info,
-			   const VkPipelineCreateFlags flags,
-			   const struct radv_pipeline_layout *layout,
-			   const struct radv_pipeline_key *key)
+radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
+                           const char *entrypoint_name, gl_shader_stage stage,
+                           const VkSpecializationInfo *spec_info, const VkPipelineCreateFlags flags,
+                           const struct radv_pipeline_layout *layout,
+                           const struct radv_pipeline_key *key)
 {
-	unsigned subgroup_size = 64, ballot_bit_size = 64;
-	if (key->compute_subgroup_size) {
-		/* Only compute shaders currently support requiring a
-		 * specific subgroup size.
-		 */
-		assert(stage == MESA_SHADER_COMPUTE);
-		subgroup_size = key->compute_subgroup_size;
-		ballot_bit_size = key->compute_subgroup_size;
-	}
-
-	nir_shader *nir;
-
-	if (module->nir) {
-		/* Some things such as our meta clear/blit code will give us a NIR
-		 * shader directly.  In that case, we just ignore the SPIR-V entirely
-		 * and just use the NIR shader */
-		nir = module->nir;
-		nir->options = &nir_options;
-		nir_validate_shader(nir, "in internal shader");
-
-		assert(exec_list_length(&nir->functions) == 1);
-	} else {
-		uint32_t *spirv = (uint32_t *) module->data;
-		assert(module->size % 4 == 0);
-
-		if (device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV)
-			radv_print_spirv(module->data, module->size, stderr);
-
-		uint32_t num_spec_entries = 0;
-		struct nir_spirv_specialization *spec_entries = NULL;
-		if (spec_info && spec_info->mapEntryCount > 0) {
-			num_spec_entries = spec_info->mapEntryCount;
-			spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
-			for (uint32_t i = 0; i < num_spec_entries; i++) {
-				VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
-				const void *data = (uint8_t *)spec_info->pData + entry.offset;
-				assert((uint8_t *)data + entry.size <= (uint8_t *)spec_info->pData + spec_info->dataSize);
-
-				spec_entries[i].id = spec_info->pMapEntries[i].constantID;
-				switch (entry.size) {
-				case 8:
-					memcpy(&spec_entries[i].value.u64, data, sizeof(uint64_t));
-					break;
-				case 4:
-					memcpy(&spec_entries[i].value.u32, data, sizeof(uint32_t));
-					break;
-				case 2:
-					memcpy(&spec_entries[i].value.u16, data, sizeof(uint16_t));
-					break;
-				case 1:
-					memcpy(&spec_entries[i].value.u8, data, sizeof(uint8_t));
-					break;
-				default:
-					assert(!"Invalid spec constant size");
-					break;
-				}
-			}
-		}
-
-		struct radv_shader_debug_data spirv_debug_data = {
-			.device = device,
-			.module = module,
-		};
-		const struct spirv_to_nir_options spirv_options = {
-			.caps = {
-				.amd_fragment_mask = true,
-				.amd_gcn_shader = true,
-				.amd_image_gather_bias_lod = true,
-				.amd_image_read_write_lod = true,
-				.amd_shader_ballot = true,
-				.amd_shader_explicit_vertex_parameter = true,
-				.amd_trinary_minmax = true,
-				.demote_to_helper_invocation = true,
-				.derivative_group = true,
-				.descriptor_array_dynamic_indexing = true,
-				.descriptor_array_non_uniform_indexing = true,
-				.descriptor_indexing = true,
-				.device_group = true,
-				.draw_parameters = true,
-				.float_controls = true,
-				.float16 = device->physical_device->rad_info.has_packed_math_16bit,
-				.float32_atomic_add = true,
-				.float64 = true,
-				.geometry_streams = true,
-				.image_atomic_int64 = true,
-				.image_ms_array = true,
-				.image_read_without_format = true,
-				.image_write_without_format = true,
-				.int8 = true,
-				.int16 = true,
-				.int64 = true,
-				.int64_atomics = true,
-				.min_lod = true,
-				.multiview = true,
-				.physical_storage_buffer_address = true,
-				.post_depth_coverage = true,
-				.runtime_descriptor_array = true,
-				.shader_clock = true,
-				.shader_viewport_index_layer = true,
-				.sparse_residency = true,
-				.stencil_export = true,
-				.storage_8bit = true,
-				.storage_16bit = true,
-				.storage_image_ms = true,
-				.subgroup_arithmetic = true,
-				.subgroup_ballot = true,
-				.subgroup_basic = true,
-				.subgroup_quad = true,
-				.subgroup_shuffle = true,
-				.subgroup_vote = true,
-				.tessellation = true,
-				.transform_feedback = true,
-				.variable_pointers = true,
-				.vk_memory_model = true,
-				.vk_memory_model_device_scope = true,
-				.fragment_shading_rate = device->physical_device->rad_info.chip_class >= GFX10_3,
-				.workgroup_memory_explicit_layout = true,
-			},
-			.ubo_addr_format = nir_address_format_32bit_index_offset,
-			.ssbo_addr_format = nir_address_format_32bit_index_offset,
-			.phys_ssbo_addr_format = nir_address_format_64bit_global,
-			.push_const_addr_format = nir_address_format_logical,
-			.shared_addr_format = nir_address_format_32bit_offset,
-			.frag_coord_is_sysval = true,
-			.use_deref_buffer_array_length = true,
-			.debug = {
-				.func = radv_spirv_nir_debug,
-				.private_data = &spirv_debug_data,
-			},
-		};
-		nir = spirv_to_nir(spirv, module->size / 4,
-				   spec_entries, num_spec_entries,
-				   stage, entrypoint_name,
-				   &spirv_options, &nir_options);
-		assert(nir->info.stage == stage);
-		nir_validate_shader(nir, "after spirv_to_nir");
-
-		free(spec_entries);
-
-		/* We have to lower away local constant initializers right before we
-		 * inline functions.  That way they get properly initialized at the top
-		 * of the function and not at the top of its caller.
-		 */
-		NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
-		NIR_PASS_V(nir, nir_lower_returns);
-		NIR_PASS_V(nir, nir_inline_functions);
-		NIR_PASS_V(nir, nir_copy_prop);
-		NIR_PASS_V(nir, nir_opt_deref);
-
-		/* Pick off the single entrypoint that we want */
-		foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
-			if (func->is_entrypoint)
-				func->name = ralloc_strdup(func, "main");
-			else
-				exec_node_remove(&func->node);
-		}
-		assert(exec_list_length(&nir->functions) == 1);
-
-		/* Make sure we lower constant initializers on output variables so that
-		 * nir_remove_dead_variables below sees the corresponding stores
-		 */
-		NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
-
-		/* Now that we've deleted all but the main function, we can go ahead and
-		 * lower the rest of the constant initializers.
-		 */
-		NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
-
-		/* Split member structs.  We do this before lower_io_to_temporaries so that
-		 * it doesn't lower system values to temporaries by accident.
-		 */
-		NIR_PASS_V(nir, nir_split_var_copies);
-		NIR_PASS_V(nir, nir_split_per_member_structs);
-
-		if (nir->info.stage == MESA_SHADER_FRAGMENT)
-                        NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
-		if (nir->info.stage == MESA_SHADER_FRAGMENT)
-			NIR_PASS_V(nir, nir_lower_input_attachments,
-				   &(nir_input_attachment_options) {
-					.use_fragcoord_sysval = true,
-					.use_layer_id_sysval = false,
-				   });
-
-		NIR_PASS_V(nir, nir_remove_dead_variables,
-		           nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
-			   NULL);
-
-		/* Variables can make nir_propagate_invariant more conservative
-		 * than it needs to be.
-		 */
-		NIR_PASS_V(nir, nir_lower_global_vars_to_local);
-		NIR_PASS_V(nir, nir_lower_vars_to_ssa);
-
-		if (device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM &&
-		    stage != MESA_SHADER_FRAGMENT) {
-			mark_geom_invariant(nir);
-		}
-
-		NIR_PASS_V(nir, nir_propagate_invariant);
-
-		NIR_PASS_V(nir, nir_lower_system_values);
-		NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
-
-		NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
-
-		NIR_PASS_V(nir, nir_lower_discard_or_demote,
-			   device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE);
-
-		nir_lower_doubles_options lower_doubles =
-			nir->options->lower_doubles_options;
-
-		if (device->physical_device->rad_info.chip_class == GFX6) {
-			/* GFX6 doesn't support v_floor_f64 and the precision
-			 * of v_fract_f64 which is used to implement 64-bit
-			 * floor is less than what Vulkan requires.
-			 */
-			lower_doubles |= nir_lower_dfloor;
-		}
-
-		NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles);
-	}
-
-	/* Vulkan uses the separate-shader linking model */
-	nir->info.separate_shader = true;
-
-	nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
-
-	if (nir->info.stage == MESA_SHADER_GEOMETRY) {
-		unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
-
-		if (device->physical_device->use_ngg && !radv_use_llvm_for_stage(device, stage)) {
-			/* ACO needs NIR to do some of the hard lifting */
-			nir_gs_flags |= nir_lower_gs_intrinsics_count_primitives |
-			                nir_lower_gs_intrinsics_count_vertices_per_primitive |
-							nir_lower_gs_intrinsics_overwrite_incomplete;
-		}
-
-		nir_lower_gs_intrinsics(nir, nir_gs_flags);
-	}
-
-	static const nir_lower_tex_options tex_options = {
-	  .lower_txp = ~0,
-	  .lower_tg4_offsets = true,
-	};
-
-	nir_lower_tex(nir, &tex_options);
-
-	nir_lower_vars_to_ssa(nir);
-
-	if (nir->info.stage == MESA_SHADER_VERTEX ||
-	    nir->info.stage == MESA_SHADER_GEOMETRY ||
-	    nir->info.stage == MESA_SHADER_FRAGMENT) {
-		NIR_PASS_V(nir, nir_lower_io_to_temporaries,
-			   nir_shader_get_entrypoint(nir), true, true);
-	} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
-		NIR_PASS_V(nir, nir_lower_io_to_temporaries,
-			   nir_shader_get_entrypoint(nir), true, false);
-	}
-
-	nir_split_var_copies(nir);
-
-	nir_lower_global_vars_to_local(nir);
-	nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
-	bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
-	nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
-			.subgroup_size = subgroup_size,
-			.ballot_bit_size = ballot_bit_size,
-			.lower_to_scalar = 1,
-			.lower_subgroup_masks = 1,
-			.lower_shuffle = 1,
-			.lower_shuffle_to_32bit = 1,
-			.lower_vote_eq_to_ballot = 1,
-			.lower_quad_broadcast_dynamic = 1,
-			.lower_quad_broadcast_dynamic_to_const = gfx7minus,
-			.lower_shuffle_to_swizzle_amd = 1,
-			.lower_elect = radv_use_llvm_for_stage(device, stage),
-		});
-
-	nir_lower_load_const_to_scalar(nir);
-
-	if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
-		radv_optimize_nir(device, nir, false, true);
-
-	/* call radv_nir_lower_ycbcr_textures() late as there might still be
-	 * tex with undef texture/sampler before first optimization */
-	NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout);
-
-	/* We call nir_lower_var_copies() after the first radv_optimize_nir()
-	 * to remove any copies introduced by nir_opt_find_array_copies().
-	 */
-	nir_lower_var_copies(nir);
-
-	const nir_opt_access_options opt_access_options = {
-		.is_vulkan = true,
-		.infer_non_readable = true,
-	};
-	NIR_PASS_V(nir, nir_opt_access, &opt_access_options);
-
-	NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
-		   nir_address_format_32bit_offset);
-
-	NIR_PASS_V(nir, nir_lower_explicit_io,
-		   nir_var_mem_ubo | nir_var_mem_ssbo,
-		   nir_address_format_32bit_index_offset);
-
-	NIR_PASS_V(nir, lower_intrinsics, key);
-
-	/* Lower deref operations for compute shared memory. */
-	if (nir->info.stage == MESA_SHADER_COMPUTE) {
-		if (!nir->info.cs.shared_memory_explicit_layout) {
-			NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
-			           nir_var_mem_shared, shared_var_info);
-		}
-		NIR_PASS_V(nir, nir_lower_explicit_io,
-			   nir_var_mem_shared, nir_address_format_32bit_offset);
-
-		if (nir->info.cs.zero_initialize_shared_memory &&
-		    nir->info.shared_size > 0) {
-			const unsigned chunk_size = 16; /* max single store size */
-			const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
-			NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
-			           shared_size, chunk_size);
-		}
-	}
-
-	nir_lower_explicit_io(nir, nir_var_mem_global,
-			      nir_address_format_64bit_global);
-
-	/* Lower large variables that are always constant with load_constant
-	 * intrinsics, which get turned into PC-relative loads from a data
-	 * section next to the shader.
-	 */
-	NIR_PASS_V(nir, nir_opt_large_constants,
-		   glsl_get_natural_size_align_bytes, 16);
-
-	/* Indirect lowering must be called after the radv_optimize_nir() loop
-	 * has been called at least once. Otherwise indirect lowering can
-	 * bloat the instruction count of the loop and cause it to be
-	 * considered too large for unrolling.
-	 */
-	if (ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
-	    !(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) &&
-	    nir->info.stage != MESA_SHADER_COMPUTE) {
-		/* Optimize the lowered code before the linking optimizations. */
-		radv_optimize_nir(device, nir, false, false);
-	}
-
-	return nir;
+   unsigned subgroup_size = 64, ballot_bit_size = 64;
+   if (key->compute_subgroup_size) {
+      /* Only compute shaders currently support requiring a
+       * specific subgroup size.
+       */
+      assert(stage == MESA_SHADER_COMPUTE);
+      subgroup_size = key->compute_subgroup_size;
+      ballot_bit_size = key->compute_subgroup_size;
+   }
+
+   nir_shader *nir;
+
+   if (module->nir) {
+      /* Some things such as our meta clear/blit code will give us a NIR
+       * shader directly.  In that case, we just ignore the SPIR-V entirely
+       * and just use the NIR shader */
+      nir = module->nir;
+      nir->options = &nir_options;
+      nir_validate_shader(nir, "in internal shader");
+
+      assert(exec_list_length(&nir->functions) == 1);
+   } else {
+      uint32_t *spirv = (uint32_t *)module->data;
+      assert(module->size % 4 == 0);
+
+      if (device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV)
+         radv_print_spirv(module->data, module->size, stderr);
+
+      uint32_t num_spec_entries = 0;
+      struct nir_spirv_specialization *spec_entries = NULL;
+      if (spec_info && spec_info->mapEntryCount > 0) {
+         num_spec_entries = spec_info->mapEntryCount;
+         spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
+         for (uint32_t i = 0; i < num_spec_entries; i++) {
+            VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
+            const void *data = (uint8_t *)spec_info->pData + entry.offset;
+            assert((uint8_t *)data + entry.size <=
+                   (uint8_t *)spec_info->pData + spec_info->dataSize);
+
+            spec_entries[i].id = spec_info->pMapEntries[i].constantID;
+            switch (entry.size) {
+            case 8:
+               memcpy(&spec_entries[i].value.u64, data, sizeof(uint64_t));
+               break;
+            case 4:
+               memcpy(&spec_entries[i].value.u32, data, sizeof(uint32_t));
+               break;
+            case 2:
+               memcpy(&spec_entries[i].value.u16, data, sizeof(uint16_t));
+               break;
+            case 1:
+               memcpy(&spec_entries[i].value.u8, data, sizeof(uint8_t));
+               break;
+            default:
+               assert(!"Invalid spec constant size");
+               break;
+            }
+         }
+      }
+
+      struct radv_shader_debug_data spirv_debug_data = {
+         .device = device,
+         .module = module,
+      };
+      const struct spirv_to_nir_options spirv_options = {
+         .caps =
+            {
+               .amd_fragment_mask = true,
+               .amd_gcn_shader = true,
+               .amd_image_gather_bias_lod = true,
+               .amd_image_read_write_lod = true,
+               .amd_shader_ballot = true,
+               .amd_shader_explicit_vertex_parameter = true,
+               .amd_trinary_minmax = true,
+               .demote_to_helper_invocation = true,
+               .derivative_group = true,
+               .descriptor_array_dynamic_indexing = true,
+               .descriptor_array_non_uniform_indexing = true,
+               .descriptor_indexing = true,
+               .device_group = true,
+               .draw_parameters = true,
+               .float_controls = true,
+               .float16 = device->physical_device->rad_info.has_packed_math_16bit,
+               .float32_atomic_add = true,
+               .float64 = true,
+               .geometry_streams = true,
+               .image_atomic_int64 = true,
+               .image_ms_array = true,
+               .image_read_without_format = true,
+               .image_write_without_format = true,
+               .int8 = true,
+               .int16 = true,
+               .int64 = true,
+               .int64_atomics = true,
+               .min_lod = true,
+               .multiview = true,
+               .physical_storage_buffer_address = true,
+               .post_depth_coverage = true,
+               .runtime_descriptor_array = true,
+               .shader_clock = true,
+               .shader_viewport_index_layer = true,
+               .sparse_residency = true,
+               .stencil_export = true,
+               .storage_8bit = true,
+               .storage_16bit = true,
+               .storage_image_ms = true,
+               .subgroup_arithmetic = true,
+               .subgroup_ballot = true,
+               .subgroup_basic = true,
+               .subgroup_quad = true,
+               .subgroup_shuffle = true,
+               .subgroup_vote = true,
+               .tessellation = true,
+               .transform_feedback = true,
+               .variable_pointers = true,
+               .vk_memory_model = true,
+               .vk_memory_model_device_scope = true,
+               .fragment_shading_rate = device->physical_device->rad_info.chip_class >= GFX10_3,
+               .workgroup_memory_explicit_layout = true,
+            },
+         .ubo_addr_format = nir_address_format_32bit_index_offset,
+         .ssbo_addr_format = nir_address_format_32bit_index_offset,
+         .phys_ssbo_addr_format = nir_address_format_64bit_global,
+         .push_const_addr_format = nir_address_format_logical,
+         .shared_addr_format = nir_address_format_32bit_offset,
+         .frag_coord_is_sysval = true,
+         .use_deref_buffer_array_length = true,
+         .debug =
+            {
+               .func = radv_spirv_nir_debug,
+               .private_data = &spirv_debug_data,
+            },
+      };
+      nir = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, stage,
+                         entrypoint_name, &spirv_options, &nir_options);
+      assert(nir->info.stage == stage);
+      nir_validate_shader(nir, "after spirv_to_nir");
+
+      free(spec_entries);
+
+      /* We have to lower away local constant initializers right before we
+       * inline functions.  That way they get properly initialized at the top
+       * of the function and not at the top of its caller.
+       */
+      NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
+      NIR_PASS_V(nir, nir_lower_returns);
+      NIR_PASS_V(nir, nir_inline_functions);
+      NIR_PASS_V(nir, nir_copy_prop);
+      NIR_PASS_V(nir, nir_opt_deref);
+
+      /* Pick off the single entrypoint that we want */
+      foreach_list_typed_safe(nir_function, func, node, &nir->functions)
+      {
+         if (func->is_entrypoint)
+            func->name = ralloc_strdup(func, "main");
+         else
+            exec_node_remove(&func->node);
+      }
+      assert(exec_list_length(&nir->functions) == 1);
+
+      /* Make sure we lower constant initializers on output variables so that
+       * nir_remove_dead_variables below sees the corresponding stores
+       */
+      NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
+
+      /* Now that we've deleted all but the main function, we can go ahead and
+       * lower the rest of the constant initializers.
+       */
+      NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
+
+      /* Split member structs.  We do this before lower_io_to_temporaries so that
+       * it doesn't lower system values to temporaries by accident.
+       */
+      NIR_PASS_V(nir, nir_split_var_copies);
+      NIR_PASS_V(nir, nir_split_per_member_structs);
+
+      if (nir->info.stage == MESA_SHADER_FRAGMENT)
+         NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
+      if (nir->info.stage == MESA_SHADER_FRAGMENT)
+         NIR_PASS_V(nir, nir_lower_input_attachments,
+                    &(nir_input_attachment_options){
+                       .use_fragcoord_sysval = true,
+                       .use_layer_id_sysval = false,
+                    });
+
+      NIR_PASS_V(nir, nir_remove_dead_variables,
+                 nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
+                 NULL);
+
+      /* Variables can make nir_propagate_invariant more conservative
+       * than it needs to be.
+       */
+      NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+      NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+
+      if (device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM &&
+          stage != MESA_SHADER_FRAGMENT) {
+         mark_geom_invariant(nir);
+      }
+
+      NIR_PASS_V(nir, nir_propagate_invariant);
+
+      NIR_PASS_V(nir, nir_lower_system_values);
+      NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
+
+      NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
+
+      NIR_PASS_V(nir, nir_lower_discard_or_demote,
+                 device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE);
+
+      nir_lower_doubles_options lower_doubles = nir->options->lower_doubles_options;
+
+      if (device->physical_device->rad_info.chip_class == GFX6) {
+         /* GFX6 doesn't support v_floor_f64 and the precision
+          * of v_fract_f64 which is used to implement 64-bit
+          * floor is less than what Vulkan requires.
+          */
+         lower_doubles |= nir_lower_dfloor;
+      }
+
+      NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles);
+   }
+
+   /* Vulkan uses the separate-shader linking model */
+   nir->info.separate_shader = true;
+
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+   if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+      unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
+
+      if (device->physical_device->use_ngg && !radv_use_llvm_for_stage(device, stage)) {
+         /* ACO needs NIR to do some of the hard lifting */
+         nir_gs_flags |= nir_lower_gs_intrinsics_count_primitives |
+                         nir_lower_gs_intrinsics_count_vertices_per_primitive |
+                         nir_lower_gs_intrinsics_overwrite_incomplete;
+      }
+
+      nir_lower_gs_intrinsics(nir, nir_gs_flags);
+   }
+
+   static const nir_lower_tex_options tex_options = {
+      .lower_txp = ~0,
+      .lower_tg4_offsets = true,
+   };
+
+   nir_lower_tex(nir, &tex_options);
+
+   nir_lower_vars_to_ssa(nir);
+
+   if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_GEOMETRY ||
+       nir->info.stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
+   } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
+      NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, false);
+   }
+
+   nir_split_var_copies(nir);
+
+   nir_lower_global_vars_to_local(nir);
+   nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
+   bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
+   nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options){
+                               .subgroup_size = subgroup_size,
+                               .ballot_bit_size = ballot_bit_size,
+                               .lower_to_scalar = 1,
+                               .lower_subgroup_masks = 1,
+                               .lower_shuffle = 1,
+                               .lower_shuffle_to_32bit = 1,
+                               .lower_vote_eq_to_ballot = 1,
+                               .lower_quad_broadcast_dynamic = 1,
+                               .lower_quad_broadcast_dynamic_to_const = gfx7minus,
+                               .lower_shuffle_to_swizzle_amd = 1,
+                               .lower_elect = radv_use_llvm_for_stage(device, stage),
+                            });
+
+   nir_lower_load_const_to_scalar(nir);
+
+   if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
+      radv_optimize_nir(device, nir, false, true);
+
+   /* call radv_nir_lower_ycbcr_textures() late as there might still be
+    * tex with undef texture/sampler before first optimization */
+   NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout);
+
+   /* We call nir_lower_var_copies() after the first radv_optimize_nir()
+    * to remove any copies introduced by nir_opt_find_array_copies().
+    */
+   nir_lower_var_copies(nir);
+
+   const nir_opt_access_options opt_access_options = {
+      .is_vulkan = true,
+      .infer_non_readable = true,
+   };
+   NIR_PASS_V(nir, nir_opt_access, &opt_access_options);
+
+   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const, nir_address_format_32bit_offset);
+
+   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo | nir_var_mem_ssbo,
+              nir_address_format_32bit_index_offset);
+
+   NIR_PASS_V(nir, lower_intrinsics, key);
+
+   /* Lower deref operations for compute shared memory. */
+   if (nir->info.stage == MESA_SHADER_COMPUTE) {
+      if (!nir->info.cs.shared_memory_explicit_layout) {
+         NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
+      }
+      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
+
+      if (nir->info.cs.zero_initialize_shared_memory && nir->info.shared_size > 0) {
+         const unsigned chunk_size = 16; /* max single store size */
+         const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
+         NIR_PASS_V(nir, nir_zero_initialize_shared_memory, shared_size, chunk_size);
+      }
+   }
+
+   nir_lower_explicit_io(nir, nir_var_mem_global, nir_address_format_64bit_global);
+
+   /* Lower large variables that are always constant with load_constant
+    * intrinsics, which get turned into PC-relative loads from a data
+    * section next to the shader.
+    */
+   NIR_PASS_V(nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
+
+   /* Indirect lowering must be called after the radv_optimize_nir() loop
+    * has been called at least once. Otherwise indirect lowering can
+    * bloat the instruction count of the loop and cause it to be
+    * considered too large for unrolling.
+    */
+   if (ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
+       !(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) &&
+       nir->info.stage != MESA_SHADER_COMPUTE) {
+      /* Optimize the lowered code before the linking optimizations. */
+      radv_optimize_nir(device, nir, false, false);
+   }
+
+   return nir;
 }
 
 static int
 type_size_vec4(const struct glsl_type *type, bool bindless)
 {
-	return glsl_count_attribute_slots(type, false);
+   return glsl_count_attribute_slots(type, false);
 }
 
 static nir_variable *
 find_layer_in_var(nir_shader *nir)
 {
-	nir_variable *var =
-		nir_find_variable_with_location(nir, nir_var_shader_in, VARYING_SLOT_LAYER);
-	if (var != NULL)
-		return var;
-
-	var = nir_variable_create(nir, nir_var_shader_in, glsl_int_type(), "layer id");
-	var->data.location = VARYING_SLOT_LAYER;
-	var->data.interpolation = INTERP_MODE_FLAT;
-	return var;
+   nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_in, VARYING_SLOT_LAYER);
+   if (var != NULL)
+      return var;
+
+   var = nir_variable_create(nir, nir_var_shader_in, glsl_int_type(), "layer id");
+   var->data.location = VARYING_SLOT_LAYER;
+   var->data.interpolation = INTERP_MODE_FLAT;
+   return var;
 }
 
 /* We use layered rendering to implement multiview, which means we need to map
@@ -722,1174 +685,1108 @@ find_layer_in_var(nir_shader *nir)
 static bool
 lower_view_index(nir_shader *nir)
 {
-	bool progress = false;
-	nir_function_impl *entry = nir_shader_get_entrypoint(nir);
-	nir_builder b;
-	nir_builder_init(&b, entry);
-
-	nir_variable *layer = NULL;
-	nir_foreach_block(block, entry) {
-		nir_foreach_instr_safe(instr, block) {
-			if (instr->type != nir_instr_type_intrinsic)
-				continue;
-
-			nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
-			if (load->intrinsic != nir_intrinsic_load_view_index)
-				continue;
-
-			if (!layer)
-				layer = find_layer_in_var(nir);
-
-			b.cursor = nir_before_instr(instr);
-			nir_ssa_def *def = nir_load_var(&b, layer);
-			nir_ssa_def_rewrite_uses(&load->dest.ssa,
-						 def);
-
-			nir_instr_remove(instr);
-			progress = true;
-		}
-	}
-
-	return progress;
+   bool progress = false;
+   nir_function_impl *entry = nir_shader_get_entrypoint(nir);
+   nir_builder b;
+   nir_builder_init(&b, entry);
+
+   nir_variable *layer = NULL;
+   nir_foreach_block (block, entry) {
+      nir_foreach_instr_safe (instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
+         if (load->intrinsic != nir_intrinsic_load_view_index)
+            continue;
+
+         if (!layer)
+            layer = find_layer_in_var(nir);
+
+         b.cursor = nir_before_instr(instr);
+         nir_ssa_def *def = nir_load_var(&b, layer);
+         nir_ssa_def_rewrite_uses(&load->dest.ssa, def);
+
+         nir_instr_remove(instr);
+         progress = true;
+      }
+   }
+
+   return progress;
 }
 
 void
 radv_lower_io(struct radv_device *device, nir_shader *nir)
 {
-	if (nir->info.stage == MESA_SHADER_COMPUTE)
-		return;
+   if (nir->info.stage == MESA_SHADER_COMPUTE)
+      return;
 
-	if (nir->info.stage == MESA_SHADER_FRAGMENT) {
-		NIR_PASS_V(nir, lower_view_index);
-		nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
-					    MESA_SHADER_FRAGMENT);
-	}
+   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS_V(nir, lower_view_index);
+      nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT);
+   }
 
-	/* The RADV/LLVM backend expects 64-bit IO to be lowered. */
-	nir_lower_io_options options =
-		radv_use_llvm_for_stage(device, nir->info.stage) ? nir_lower_io_lower_64bit_to_32 : 0;
+   /* The RADV/LLVM backend expects 64-bit IO to be lowered. */
+   nir_lower_io_options options =
+      radv_use_llvm_for_stage(device, nir->info.stage) ? nir_lower_io_lower_64bit_to_32 : 0;
 
-	NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
-		   type_size_vec4, options);
+   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, options);
 
-	/* This pass needs actual constants */
-	nir_opt_constant_folding(nir);
+   /* This pass needs actual constants */
+   nir_opt_constant_folding(nir);
 
-	NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
-		   nir_var_shader_in | nir_var_shader_out);
+   NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
 }
 
 bool
 radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir,
                      struct radv_shader_info *info, const struct radv_pipeline_key *pl_key)
 {
-	if (nir->info.stage == MESA_SHADER_VERTEX) {
-		if (info->vs.as_ls) {
-			ac_nir_lower_ls_outputs_to_mem(
-				nir,
-				info->vs.tcs_in_out_eq,
-				info->vs.tcs_temp_only_input_mask,
-				info->vs.num_linked_outputs);
-			return true;
-		} else if (info->vs.as_es) {
-			ac_nir_lower_es_outputs_to_mem(
-				nir,
-				device->physical_device->rad_info.chip_class,
-				info->vs.num_linked_outputs);
-			return true;
-		}
-	} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
-		ac_nir_lower_hs_inputs_to_mem(
-			nir,
-			info->vs.tcs_in_out_eq,
-			info->tcs.num_linked_inputs);
-		ac_nir_lower_hs_outputs_to_mem(
-			nir, device->physical_device->rad_info.chip_class,
-			info->tcs.tes_reads_tess_factors,
-			info->tcs.tes_inputs_read,
-			info->tcs.tes_patch_inputs_read,
-			info->tcs.num_linked_inputs,
-			info->tcs.num_linked_outputs,
-			info->tcs.num_linked_patch_outputs,
-			true);
-		ac_nir_lower_tess_to_const(
-			nir,
-			pl_key->tess_input_vertices,
-			info->num_tess_patches,
-			ac_nir_lower_patch_vtx_in | ac_nir_lower_num_patches);
-
-		return true;
-	} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
-		ac_nir_lower_tes_inputs_to_mem(
-			nir,
-			info->tes.num_linked_inputs,
-			info->tes.num_linked_patch_inputs);
-		ac_nir_lower_tess_to_const(
-			nir,
-			nir->info.tess.tcs_vertices_out,
-			info->num_tess_patches,
-			ac_nir_lower_patch_vtx_in | ac_nir_lower_num_patches);
-
-		if (info->tes.as_es) {
-			ac_nir_lower_es_outputs_to_mem(
-				nir,
-				device->physical_device->rad_info.chip_class,
-				info->tes.num_linked_outputs);
-		}
-
-		return true;
-	} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
-		ac_nir_lower_gs_inputs_to_mem(
-			nir,
-			device->physical_device->rad_info.chip_class,
-			info->gs.num_linked_inputs);
-		return true;
-	}
-
-	return false;
+   if (nir->info.stage == MESA_SHADER_VERTEX) {
+      if (info->vs.as_ls) {
+         ac_nir_lower_ls_outputs_to_mem(nir, info->vs.tcs_in_out_eq,
+                                        info->vs.tcs_temp_only_input_mask,
+                                        info->vs.num_linked_outputs);
+         return true;
+      } else if (info->vs.as_es) {
+         ac_nir_lower_es_outputs_to_mem(nir, device->physical_device->rad_info.chip_class,
+                                        info->vs.num_linked_outputs);
+         return true;
+      }
+   } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
+      ac_nir_lower_hs_inputs_to_mem(nir, info->vs.tcs_in_out_eq, info->tcs.num_linked_inputs);
+      ac_nir_lower_hs_outputs_to_mem(
+         nir, device->physical_device->rad_info.chip_class, info->tcs.tes_reads_tess_factors,
+         info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->tcs.num_linked_inputs,
+         info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, true);
+      ac_nir_lower_tess_to_const(nir, pl_key->tess_input_vertices, info->num_tess_patches,
+                                 ac_nir_lower_patch_vtx_in | ac_nir_lower_num_patches);
+
+      return true;
+   } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
+      ac_nir_lower_tes_inputs_to_mem(nir, info->tes.num_linked_inputs,
+                                     info->tes.num_linked_patch_inputs);
+      ac_nir_lower_tess_to_const(nir, nir->info.tess.tcs_vertices_out, info->num_tess_patches,
+                                 ac_nir_lower_patch_vtx_in | ac_nir_lower_num_patches);
+
+      if (info->tes.as_es) {
+         ac_nir_lower_es_outputs_to_mem(nir, device->physical_device->rad_info.chip_class,
+                                        info->tes.num_linked_outputs);
+      }
+
+      return true;
+   } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+      ac_nir_lower_gs_inputs_to_mem(nir, device->physical_device->rad_info.chip_class,
+                                    info->gs.num_linked_inputs);
+      return true;
+   }
+
+   return false;
 }
 
 static void *
-radv_alloc_shader_memory(struct radv_device *device,
-			 struct radv_shader_variant *shader)
+radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant *shader)
 {
-	mtx_lock(&device->shader_slab_mutex);
-	list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
-		uint64_t offset = 0;
+   mtx_lock(&device->shader_slab_mutex);
+   list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs)
+   {
+      uint64_t offset = 0;
 
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
 #endif
-		list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) {
+      list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list)
+      {
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
-			if (s->bo_offset - offset >= shader->code_size) {
-				shader->bo = slab->bo;
-				shader->bo_offset = offset;
-				list_addtail(&shader->slab_list, &s->slab_list);
-				mtx_unlock(&device->shader_slab_mutex);
-				return slab->ptr + offset;
-			}
-			offset = align_u64(s->bo_offset + s->code_size, 256);
-		}
-		if (offset <= slab->size && slab->size - offset >= shader->code_size) {
-			shader->bo = slab->bo;
-			shader->bo_offset = offset;
-			list_addtail(&shader->slab_list, &slab->shaders);
-			mtx_unlock(&device->shader_slab_mutex);
-			return slab->ptr + offset;
-		}
-	}
-
-	mtx_unlock(&device->shader_slab_mutex);
-	struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab));
-
-	slab->size = MAX2(256 * 1024, shader->code_size);
-	slab->bo = device->ws->buffer_create(device->ws, slab->size, 256,
-	                                     RADEON_DOMAIN_VRAM,
-					     RADEON_FLAG_NO_INTERPROCESS_SHARING |
-					     (device->physical_device->rad_info.cpdma_prefetch_writes_memory ?
-					             0 : RADEON_FLAG_READ_ONLY),
-					     RADV_BO_PRIORITY_SHADER);
-	if (!slab->bo) {
-		free(slab);
-		return NULL;
-	}
-
-	slab->ptr = (char*)device->ws->buffer_map(slab->bo);
-	if (!slab->ptr) {
-		device->ws->buffer_destroy(device->ws, slab->bo);
-		free(slab);
-		return NULL;
-	}
-
-	list_inithead(&slab->shaders);
-
-	mtx_lock(&device->shader_slab_mutex);
-	list_add(&slab->slabs, &device->shader_slabs);
-
-	shader->bo = slab->bo;
-	shader->bo_offset = 0;
-	list_add(&shader->slab_list, &slab->shaders);
-	mtx_unlock(&device->shader_slab_mutex);
-	return slab->ptr;
+         if (s->bo_offset - offset >= shader->code_size) {
+            shader->bo = slab->bo;
+            shader->bo_offset = offset;
+            list_addtail(&shader->slab_list, &s->slab_list);
+            mtx_unlock(&device->shader_slab_mutex);
+            return slab->ptr + offset;
+         }
+         offset = align_u64(s->bo_offset + s->code_size, 256);
+      }
+      if (offset <= slab->size && slab->size - offset >= shader->code_size) {
+         shader->bo = slab->bo;
+         shader->bo_offset = offset;
+         list_addtail(&shader->slab_list, &slab->shaders);
+         mtx_unlock(&device->shader_slab_mutex);
+         return slab->ptr + offset;
+      }
+   }
+
+   mtx_unlock(&device->shader_slab_mutex);
+   struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab));
+
+   slab->size = MAX2(256 * 1024, shader->code_size);
+   slab->bo = device->ws->buffer_create(
+      device->ws, slab->size, 256, RADEON_DOMAIN_VRAM,
+      RADEON_FLAG_NO_INTERPROCESS_SHARING |
+         (device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0
+                                                                         : RADEON_FLAG_READ_ONLY),
+      RADV_BO_PRIORITY_SHADER);
+   if (!slab->bo) {
+      free(slab);
+      return NULL;
+   }
+
+   slab->ptr = (char *)device->ws->buffer_map(slab->bo);
+   if (!slab->ptr) {
+      device->ws->buffer_destroy(device->ws, slab->bo);
+      free(slab);
+      return NULL;
+   }
+
+   list_inithead(&slab->shaders);
+
+   mtx_lock(&device->shader_slab_mutex);
+   list_add(&slab->slabs, &device->shader_slabs);
+
+   shader->bo = slab->bo;
+   shader->bo_offset = 0;
+   list_add(&shader->slab_list, &slab->shaders);
+   mtx_unlock(&device->shader_slab_mutex);
+   return slab->ptr;
 }
 
 void
 radv_destroy_shader_slabs(struct radv_device *device)
 {
-	list_for_each_entry_safe(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
-		device->ws->buffer_destroy(device->ws, slab->bo);
-		free(slab);
-	}
-	mtx_destroy(&device->shader_slab_mutex);
+   list_for_each_entry_safe(struct radv_shader_slab, slab, &device->shader_slabs, slabs)
+   {
+      device->ws->buffer_destroy(device->ws, slab->bo);
+      free(slab);
+   }
+   mtx_destroy(&device->shader_slab_mutex);
 }
 
 /* For the UMR disassembler. */
-#define DEBUGGER_END_OF_CODE_MARKER    0xbf9f0000 /* invalid instruction */
-#define DEBUGGER_NUM_MARKERS           5
+#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
+#define DEBUGGER_NUM_MARKERS        5
 
 static unsigned
 radv_get_shader_binary_size(size_t code_size)
 {
-	return code_size + DEBUGGER_NUM_MARKERS * 4;
+   return code_size + DEBUGGER_NUM_MARKERS * 4;
 }
 
-static bool radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage,
-				     const struct radv_shader_info *info)
+static bool
+radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage,
+                         const struct radv_shader_info *info)
 {
-	enum chip_class chip = device->physical_device->rad_info.chip_class;
-	switch (stage) {
-	case MESA_SHADER_COMPUTE:
-	case MESA_SHADER_TESS_CTRL:
-		return chip >= GFX10;
-	case MESA_SHADER_GEOMETRY:
-		return chip == GFX10 || (chip >= GFX10_3 && !info->is_ngg);
-	case MESA_SHADER_VERTEX:
-	case MESA_SHADER_TESS_EVAL:
-		return chip == GFX10 && info->is_ngg;
-	default:
-		return false;
-	}
+   enum chip_class chip = device->physical_device->rad_info.chip_class;
+   switch (stage) {
+   case MESA_SHADER_COMPUTE:
+   case MESA_SHADER_TESS_CTRL:
+      return chip >= GFX10;
+   case MESA_SHADER_GEOMETRY:
+      return chip == GFX10 || (chip >= GFX10_3 && !info->is_ngg);
+   case MESA_SHADER_VERTEX:
+   case MESA_SHADER_TESS_EVAL:
+      return chip == GFX10 && info->is_ngg;
+   default:
+      return false;
+   }
 }
 
-static void radv_postprocess_config(const struct radv_device *device,
-				    const struct ac_shader_config *config_in,
-				    const struct radv_shader_info *info,
-				    gl_shader_stage stage,
-				    struct ac_shader_config *config_out)
+static void
+radv_postprocess_config(const struct radv_device *device, const struct ac_shader_config *config_in,
+                        const struct radv_shader_info *info, gl_shader_stage stage,
+                        struct ac_shader_config *config_out)
 {
-	const struct radv_physical_device *pdevice = device->physical_device;
-	bool scratch_enabled = config_in->scratch_bytes_per_wave > 0;
-	bool trap_enabled = !!device->trap_handler_shader;
-	unsigned vgpr_comp_cnt = 0;
-	unsigned num_input_vgprs = info->num_input_vgprs;
-
-	if (stage == MESA_SHADER_FRAGMENT) {
-		num_input_vgprs = ac_get_fs_input_vgpr_cnt(config_in, NULL, NULL);
-	}
-
-	unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs);
-	/* +3 for scratch wave offset and VCC */
-	unsigned num_sgprs = MAX2(config_in->num_sgprs, info->num_input_sgprs + 3);
-	unsigned num_shared_vgprs = config_in->num_shared_vgprs;
-	/* shared VGPRs are introduced in Navi and are allocated in blocks of 8 (RDNA ref 3.6.5) */
-	assert((pdevice->rad_info.chip_class >= GFX10 && num_shared_vgprs % 8 == 0)
-	       || (pdevice->rad_info.chip_class < GFX10 && num_shared_vgprs == 0));
-	unsigned num_shared_vgpr_blocks = num_shared_vgprs / 8;
-	unsigned excp_en = 0;
-
-	*config_out = *config_in;
-	config_out->num_vgprs = num_vgprs;
-	config_out->num_sgprs = num_sgprs;
-	config_out->num_shared_vgprs = num_shared_vgprs;
-
-	config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
-			    S_00B12C_SCRATCH_EN(scratch_enabled) |
-			    S_00B12C_TRAP_PRESENT(trap_enabled);
-
-	if (trap_enabled) {
-		/* Configure the shader exceptions like memory violation, etc.
-		 * TODO: Enable (and validate) more exceptions.
-		 */
-		excp_en = 1 << 8; /* mem_viol */
-	}
-
-	if (!pdevice->use_ngg_streamout) {
-		config_out->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
-				     S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
-				     S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
-				     S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
-				     S_00B12C_SO_EN(!!info->so.num_outputs);
-	}
-
-	config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) /
-					   (info->wave_size == 32 ? 8 : 4)) |
-			    S_00B848_DX10_CLAMP(1) |
-			    S_00B848_FLOAT_MODE(config_out->float_mode);
-
-	if (pdevice->rad_info.chip_class >= GFX10) {
-		config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(info->num_user_sgprs >> 5);
-	} else {
-		config_out->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8);
-		config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5);
-	}
-
-	bool wgp_mode = radv_should_use_wgp_mode(device, stage, info);
-
-	switch (stage) {
-	case MESA_SHADER_TESS_EVAL:
-		if (info->is_ngg) {
-			config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
-			config_out->rsrc2 |= S_00B22C_OC_LDS_EN(1) |
-					     S_00B22C_EXCP_EN(excp_en);
-		} else if (info->tes.as_es) {
-			assert(pdevice->rad_info.chip_class <= GFX8);
-			vgpr_comp_cnt = info->uses_prim_id ? 3 : 2;
-
-			config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) |
-					     S_00B12C_EXCP_EN(excp_en);
-		} else {
-			bool enable_prim_id = info->tes.export_prim_id || info->uses_prim_id;
-			vgpr_comp_cnt = enable_prim_id ? 3 : 2;
-
-			config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
-			config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) |
-					     S_00B12C_EXCP_EN(excp_en);
-		}
-		config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
-		break;
-	case MESA_SHADER_TESS_CTRL:
-		if (pdevice->rad_info.chip_class >= GFX9) {
-			/* We need at least 2 components for LS.
-			 * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
-			 * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
-			 */
-			if (pdevice->rad_info.chip_class >= GFX10) {
-				vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 1;
-				config_out->rsrc2 |= S_00B42C_LDS_SIZE_GFX10(info->tcs.num_lds_blocks) |
-						     S_00B42C_EXCP_EN_GFX6(excp_en);
-			} else {
-				vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1;
-				config_out->rsrc2 |= S_00B42C_LDS_SIZE_GFX9(info->tcs.num_lds_blocks) |
-						     S_00B42C_EXCP_EN_GFX9(excp_en);
-			}
-		} else {
-			config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) |
-					     S_00B12C_EXCP_EN(excp_en);
-		}
-		config_out->rsrc1 |= S_00B428_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) |
-				     S_00B428_WGP_MODE(wgp_mode);
-		config_out->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
-		break;
-	case MESA_SHADER_VERTEX:
-		if (info->is_ngg) {
-			config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
-		} else if (info->vs.as_ls) {
-			assert(pdevice->rad_info.chip_class <= GFX8);
-			/* We need at least 2 components for LS.
-			 * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
-			 * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
-			 */
-			vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1;
-		} else if (info->vs.as_es) {
-			assert(pdevice->rad_info.chip_class <= GFX8);
-			/* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
-			vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0;
-		} else {
-			/* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID)
-			 * If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
-			 * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
-			 */
-			if (info->vs.needs_instance_id && pdevice->rad_info.chip_class >= GFX10) {
-				vgpr_comp_cnt = 3;
-			} else if (info->vs.export_prim_id) {
-				vgpr_comp_cnt = 2;
-			} else if (info->vs.needs_instance_id) {
-				vgpr_comp_cnt = 1;
-			} else {
-				vgpr_comp_cnt = 0;
-			}
-
-			config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
-		}
-		config_out->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) |
-				     S_00B12C_EXCP_EN(excp_en);
-		break;
-	case MESA_SHADER_FRAGMENT:
-		config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
-		config_out->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) |
-				     S_00B02C_TRAP_PRESENT(1) |
-				     S_00B02C_EXCP_EN(excp_en);
-		break;
-	case MESA_SHADER_GEOMETRY:
-		config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
-		config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) |
-				     S_00B22C_EXCP_EN(excp_en);
-		break;
-	case MESA_SHADER_COMPUTE:
-		config_out->rsrc1 |= S_00B848_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) |
-				     S_00B848_WGP_MODE(wgp_mode);
-		config_out->rsrc2 |=
-			S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
-			S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
-			S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
-			S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 :
-						info->cs.uses_thread_id[1] ? 1 : 0) |
-			S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) |
-			S_00B84C_LDS_SIZE(config_in->lds_size) |
-			S_00B84C_EXCP_EN(excp_en);
-		config_out->rsrc3 |= S_00B8A0_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
-
-		break;
-	default:
-		unreachable("unsupported shader type");
-		break;
-	}
-
-	if (pdevice->rad_info.chip_class >= GFX10 && info->is_ngg &&
-	    (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_GEOMETRY)) {
-		unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
-		gl_shader_stage es_stage = stage;
-		if (stage == MESA_SHADER_GEOMETRY)
-			es_stage = info->gs.es_type;
-
-		/* VGPR5-8: (VertexID, UserVGPR0, UserVGPR1, UserVGPR2 / InstanceID) */
-		if (es_stage == MESA_SHADER_VERTEX) {
-			es_vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 0;
-		} else if (es_stage == MESA_SHADER_TESS_EVAL) {
-			bool enable_prim_id = info->tes.export_prim_id || info->uses_prim_id;
-			es_vgpr_comp_cnt = enable_prim_id ? 3 : 2;
-		} else
-			unreachable("Unexpected ES shader stage");
-
-		bool tes_triangles = stage == MESA_SHADER_TESS_EVAL &&
-			info->tes.primitive_mode >= 4; /* GL_TRIANGLES */
-		if (info->uses_invocation_id || stage == MESA_SHADER_VERTEX) {
-			gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
-		} else if (info->uses_prim_id) {
-			gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
-		} else if (info->gs.vertices_in >= 3 || tes_triangles) {
-			gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
-		} else {
-			gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
-		}
-
-		/* Disable the WGP mode on gfx10.3 because it can hang. (it
-		 * happened on VanGogh) Let's disable it on all chips that
-		 * disable exactly 1 CU per SA for GS.
-		 */
-		config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) |
-				     S_00B228_WGP_MODE(wgp_mode);
-		config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
-				     S_00B22C_LDS_SIZE(config_in->lds_size) |
-				     S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
-	} else if (pdevice->rad_info.chip_class >= GFX9 &&
-		   stage == MESA_SHADER_GEOMETRY) {
-		unsigned es_type = info->gs.es_type;
-		unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
-
-		if (es_type == MESA_SHADER_VERTEX) {
-			/* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
-			if (info->vs.needs_instance_id) {
-				es_vgpr_comp_cnt = pdevice->rad_info.chip_class >= GFX10 ? 3 : 1;
-			} else {
-				es_vgpr_comp_cnt = 0;
-			}
-		} else if (es_type == MESA_SHADER_TESS_EVAL) {
-			es_vgpr_comp_cnt = info->uses_prim_id ? 3 : 2;
-		} else {
-			unreachable("invalid shader ES type");
-		}
-
-		/* If offsets 4, 5 are used, GS_VGPR_COMP_CNT is ignored and
-		 * VGPR[0:4] are always loaded.
-		 */
-		if (info->uses_invocation_id) {
-			gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
-		} else if (info->uses_prim_id) {
-			gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
-		} else if (info->gs.vertices_in >= 3) {
-			gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
-		} else {
-			gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
-		}
-
-		config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) |
-				     S_00B228_WGP_MODE(wgp_mode);
-		config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
-		                         S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
-	} else if (pdevice->rad_info.chip_class >= GFX9 &&
-		   stage == MESA_SHADER_TESS_CTRL) {
-		config_out->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
-	} else {
-		config_out->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
-	}
+   const struct radv_physical_device *pdevice = device->physical_device;
+   bool scratch_enabled = config_in->scratch_bytes_per_wave > 0;
+   bool trap_enabled = !!device->trap_handler_shader;
+   unsigned vgpr_comp_cnt = 0;
+   unsigned num_input_vgprs = info->num_input_vgprs;
+
+   if (stage == MESA_SHADER_FRAGMENT) {
+      num_input_vgprs = ac_get_fs_input_vgpr_cnt(config_in, NULL, NULL);
+   }
+
+   unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs);
+   /* +3 for scratch wave offset and VCC */
+   unsigned num_sgprs = MAX2(config_in->num_sgprs, info->num_input_sgprs + 3);
+   unsigned num_shared_vgprs = config_in->num_shared_vgprs;
+   /* shared VGPRs are introduced in Navi and are allocated in blocks of 8 (RDNA ref 3.6.5) */
+   assert((pdevice->rad_info.chip_class >= GFX10 && num_shared_vgprs % 8 == 0) ||
+          (pdevice->rad_info.chip_class < GFX10 && num_shared_vgprs == 0));
+   unsigned num_shared_vgpr_blocks = num_shared_vgprs / 8;
+   unsigned excp_en = 0;
+
+   *config_out = *config_in;
+   config_out->num_vgprs = num_vgprs;
+   config_out->num_sgprs = num_sgprs;
+   config_out->num_shared_vgprs = num_shared_vgprs;
+
+   config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
+                       S_00B12C_SCRATCH_EN(scratch_enabled) | S_00B12C_TRAP_PRESENT(trap_enabled);
+
+   if (trap_enabled) {
+      /* Configure the shader exceptions like memory violation, etc.
+       * TODO: Enable (and validate) more exceptions.
+       */
+      excp_en = 1 << 8; /* mem_viol */
+   }
+
+   if (!pdevice->use_ngg_streamout) {
+      config_out->rsrc2 |=
+         S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
+         S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
+         S_00B12C_SO_EN(!!info->so.num_outputs);
+   }
+
+   config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) |
+                       S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(config_out->float_mode);
+
+   if (pdevice->rad_info.chip_class >= GFX10) {
+      config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(info->num_user_sgprs >> 5);
+   } else {
+      config_out->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8);
+      config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5);
+   }
+
+   bool wgp_mode = radv_should_use_wgp_mode(device, stage, info);
+
+   switch (stage) {
+   case MESA_SHADER_TESS_EVAL:
+      if (info->is_ngg) {
+         config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+         config_out->rsrc2 |= S_00B22C_OC_LDS_EN(1) | S_00B22C_EXCP_EN(excp_en);
+      } else if (info->tes.as_es) {
+         assert(pdevice->rad_info.chip_class <= GFX8);
+         vgpr_comp_cnt = info->uses_prim_id ? 3 : 2;
+
+         config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
+      } else {
+         bool enable_prim_id = info->tes.export_prim_id || info->uses_prim_id;
+         vgpr_comp_cnt = enable_prim_id ? 3 : 2;
+
+         config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+         config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
+      }
+      config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      if (pdevice->rad_info.chip_class >= GFX9) {
+         /* We need at least 2 components for LS.
+          * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
+          * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+          */
+         if (pdevice->rad_info.chip_class >= GFX10) {
+            vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 1;
+            config_out->rsrc2 |=
+               S_00B42C_LDS_SIZE_GFX10(info->tcs.num_lds_blocks) | S_00B42C_EXCP_EN_GFX6(excp_en);
+         } else {
+            vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1;
+            config_out->rsrc2 |=
+               S_00B42C_LDS_SIZE_GFX9(info->tcs.num_lds_blocks) | S_00B42C_EXCP_EN_GFX9(excp_en);
+         }
+      } else {
+         config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
+      }
+      config_out->rsrc1 |=
+         S_00B428_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | S_00B428_WGP_MODE(wgp_mode);
+      config_out->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
+      break;
+   case MESA_SHADER_VERTEX:
+      if (info->is_ngg) {
+         config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+      } else if (info->vs.as_ls) {
+         assert(pdevice->rad_info.chip_class <= GFX8);
+         /* We need at least 2 components for LS.
+          * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
+          * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+          */
+         vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1;
+      } else if (info->vs.as_es) {
+         assert(pdevice->rad_info.chip_class <= GFX8);
+         /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
+         vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0;
+      } else {
+         /* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID)
+          * If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
+          * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+          */
+         if (info->vs.needs_instance_id && pdevice->rad_info.chip_class >= GFX10) {
+            vgpr_comp_cnt = 3;
+         } else if (info->vs.export_prim_id) {
+            vgpr_comp_cnt = 2;
+         } else if (info->vs.needs_instance_id) {
+            vgpr_comp_cnt = 1;
+         } else {
+            vgpr_comp_cnt = 0;
+         }
+
+         config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+      }
+      config_out->rsrc2 |=
+         S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B12C_EXCP_EN(excp_en);
+      break;
+   case MESA_SHADER_FRAGMENT:
+      config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+      config_out->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) |
+                           S_00B02C_TRAP_PRESENT(1) | S_00B02C_EXCP_EN(excp_en);
+      break;
+   case MESA_SHADER_GEOMETRY:
+      config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+      config_out->rsrc2 |=
+         S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B22C_EXCP_EN(excp_en);
+      break;
+   case MESA_SHADER_COMPUTE:
+      config_out->rsrc1 |=
+         S_00B848_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | S_00B848_WGP_MODE(wgp_mode);
+      config_out->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
+                           S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
+                           S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
+                           S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2]   ? 2
+                                                   : info->cs.uses_thread_id[1] ? 1
+                                                                                : 0) |
+                           S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) |
+                           S_00B84C_LDS_SIZE(config_in->lds_size) | S_00B84C_EXCP_EN(excp_en);
+      config_out->rsrc3 |= S_00B8A0_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
+
+      break;
+   default:
+      unreachable("unsupported shader type");
+      break;
+   }
+
+   if (pdevice->rad_info.chip_class >= GFX10 && info->is_ngg &&
+       (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||
+        stage == MESA_SHADER_GEOMETRY)) {
+      unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
+      gl_shader_stage es_stage = stage;
+      if (stage == MESA_SHADER_GEOMETRY)
+         es_stage = info->gs.es_type;
+
+      /* VGPR5-8: (VertexID, UserVGPR0, UserVGPR1, UserVGPR2 / InstanceID) */
+      if (es_stage == MESA_SHADER_VERTEX) {
+         es_vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 0;
+      } else if (es_stage == MESA_SHADER_TESS_EVAL) {
+         bool enable_prim_id = info->tes.export_prim_id || info->uses_prim_id;
+         es_vgpr_comp_cnt = enable_prim_id ? 3 : 2;
+      } else
+         unreachable("Unexpected ES shader stage");
+
+      bool tes_triangles =
+         stage == MESA_SHADER_TESS_EVAL && info->tes.primitive_mode >= 4; /* GL_TRIANGLES */
+      if (info->uses_invocation_id || stage == MESA_SHADER_VERTEX) {
+         gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
+      } else if (info->uses_prim_id) {
+         gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
+      } else if (info->gs.vertices_in >= 3 || tes_triangles) {
+         gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
+      } else {
+         gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
+      }
+
+      /* Disable the WGP mode on gfx10.3 because it can hang. (it
+       * happened on VanGogh) Let's disable it on all chips that
+       * disable exactly 1 CU per SA for GS.
+       */
+      config_out->rsrc1 |=
+         S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode);
+      config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
+                           S_00B22C_LDS_SIZE(config_in->lds_size) |
+                           S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
+   } else if (pdevice->rad_info.chip_class >= GFX9 && stage == MESA_SHADER_GEOMETRY) {
+      unsigned es_type = info->gs.es_type;
+      unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
+
+      if (es_type == MESA_SHADER_VERTEX) {
+         /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
+         if (info->vs.needs_instance_id) {
+            es_vgpr_comp_cnt = pdevice->rad_info.chip_class >= GFX10 ? 3 : 1;
+         } else {
+            es_vgpr_comp_cnt = 0;
+         }
+      } else if (es_type == MESA_SHADER_TESS_EVAL) {
+         es_vgpr_comp_cnt = info->uses_prim_id ? 3 : 2;
+      } else {
+         unreachable("invalid shader ES type");
+      }
+
+      /* If offsets 4, 5 are used, GS_VGPR_COMP_CNT is ignored and
+       * VGPR[0:4] are always loaded.
+       */
+      if (info->uses_invocation_id) {
+         gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
+      } else if (info->uses_prim_id) {
+         gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
+      } else if (info->gs.vertices_in >= 3) {
+         gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
+      } else {
+         gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
+      }
+
+      config_out->rsrc1 |=
+         S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode);
+      config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
+                           S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
+   } else if (pdevice->rad_info.chip_class >= GFX9 && stage == MESA_SHADER_TESS_CTRL) {
+      config_out->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
+   } else {
+      config_out->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
+   }
 }
 
 struct radv_shader_variant *
-radv_shader_variant_create(struct radv_device *device,
-			   const struct radv_shader_binary *binary,
-			   bool keep_shader_info)
+radv_shader_variant_create(struct radv_device *device, const struct radv_shader_binary *binary,
+                           bool keep_shader_info)
 {
-	struct ac_shader_config config = {0};
-	struct ac_rtld_binary rtld_binary = {0};
-	struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
-	if (!variant)
-		return NULL;
-
-	variant->ref_count = 1;
-
-	if (binary->type == RADV_BINARY_TYPE_RTLD) {
-		struct ac_rtld_symbol lds_symbols[2];
-		unsigned num_lds_symbols = 0;
-		const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
-		size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
-
-		if (device->physical_device->rad_info.chip_class >= GFX9 &&
-		    (binary->stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg) &&
-		    !binary->is_gs_copy_shader) {
-			/* We add this symbol even on LLVM <= 8 to ensure that
-			 * shader->config.lds_size is set correctly below.
-			 */
-			struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
-			sym->name = "esgs_ring";
-			sym->size = binary->info.ngg_info.esgs_ring_size;
-			sym->align = 64 * 1024;
-		}
-
-		if (binary->info.is_ngg &&
-		    binary->stage == MESA_SHADER_GEOMETRY) {
-			struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
-			sym->name = "ngg_emit";
-			sym->size = binary->info.ngg_info.ngg_emit_size * 4;
-			sym->align = 4;
-		}
-
-		struct ac_rtld_open_info open_info = {
-			.info = &device->physical_device->rad_info,
-			.shader_type = binary->stage,
-			.wave_size = binary->info.wave_size,
-			.num_parts = 1,
-			.elf_ptrs = &elf_data,
-			.elf_sizes = &elf_size,
-			.num_shared_lds_symbols = num_lds_symbols,
-			.shared_lds_symbols = lds_symbols,
-		};
-
-		if (!ac_rtld_open(&rtld_binary, open_info)) {
-			free(variant);
-			return NULL;
-		}
-
-		if (!ac_rtld_read_config(&device->physical_device->rad_info,
-					 &rtld_binary, &config)) {
-			ac_rtld_close(&rtld_binary);
-			free(variant);
-			return NULL;
-		}
-
-		if (rtld_binary.lds_size > 0) {
-			unsigned encode_granularity = device->physical_device->rad_info.lds_encode_granularity;
-			config.lds_size = align(rtld_binary.lds_size, encode_granularity) / encode_granularity;
-		}
-		if (!config.lds_size && binary->stage == MESA_SHADER_TESS_CTRL) {
-			/* This is used for reporting LDS statistics */
-			config.lds_size = binary->info.tcs.num_lds_blocks;
-		}
-
-		variant->code_size = rtld_binary.rx_size;
-		variant->exec_size = rtld_binary.exec_size;
-	} else {
-		assert(binary->type == RADV_BINARY_TYPE_LEGACY);
-		config = ((struct radv_shader_binary_legacy *)binary)->config;
-		variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
-		variant->exec_size = ((struct radv_shader_binary_legacy *)binary)->exec_size;
-	}
-
-	variant->info = binary->info;
-	radv_postprocess_config(device, &config, &binary->info,
-				binary->stage, &variant->config);
-
-	void *dest_ptr = radv_alloc_shader_memory(device, variant);
-	if (!dest_ptr) {
-		if (binary->type == RADV_BINARY_TYPE_RTLD)
-			ac_rtld_close(&rtld_binary);
-		free(variant);
-		return NULL;
-	}
-
-	if (binary->type == RADV_BINARY_TYPE_RTLD) {
-		struct radv_shader_binary_rtld* bin = (struct radv_shader_binary_rtld *)binary;
-		struct ac_rtld_upload_info info = {
-			.binary = &rtld_binary,
-			.rx_va = radv_buffer_get_va(variant->bo) + variant->bo_offset,
-			.rx_ptr = dest_ptr,
-		};
-
-		if (!ac_rtld_upload(&info)) {
-			radv_shader_variant_destroy(device, variant);
-			ac_rtld_close(&rtld_binary);
-			return NULL;
-		}
-
-		if (keep_shader_info ||
-		    (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) {
-			const char *disasm_data;
-			size_t disasm_size;
-			if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) {
-				radv_shader_variant_destroy(device, variant);
-				ac_rtld_close(&rtld_binary);
-				return NULL;
-			}
-
-			variant->ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->elf_size)) : NULL;
-			variant->disasm_string = malloc(disasm_size + 1);
-			memcpy(variant->disasm_string, disasm_data, disasm_size);
-			variant->disasm_string[disasm_size] = 0;
-		}
-
-		variant->code_ptr = dest_ptr;
-		ac_rtld_close(&rtld_binary);
-	} else {
-		struct radv_shader_binary_legacy* bin = (struct radv_shader_binary_legacy *)binary;
-		memcpy(dest_ptr, bin->data + bin->stats_size, bin->code_size);
-
-		/* Add end-of-code markers for the UMR disassembler. */
-		uint32_t *ptr32 = (uint32_t *)dest_ptr + bin->code_size / 4;
-		for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++)
-			ptr32[i] = DEBUGGER_END_OF_CODE_MARKER;
-
-		variant->code_ptr = dest_ptr;
-		variant->ir_string = bin->ir_size ? strdup((const char*)(bin->data + bin->stats_size + bin->code_size)) : NULL;
-		variant->disasm_string = bin->disasm_size ? strdup((const char*)(bin->data + bin->stats_size + bin->code_size + bin->ir_size)) : NULL;
-
-		if (bin->stats_size) {
-			variant->statistics = calloc(bin->stats_size, 1);
-			memcpy(variant->statistics, bin->data, bin->stats_size);
-		}
-	}
-	return variant;
+   struct ac_shader_config config = {0};
+   struct ac_rtld_binary rtld_binary = {0};
+   struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
+   if (!variant)
+      return NULL;
+
+   variant->ref_count = 1;
+
+   if (binary->type == RADV_BINARY_TYPE_RTLD) {
+      struct ac_rtld_symbol lds_symbols[2];
+      unsigned num_lds_symbols = 0;
+      const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
+      size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
+
+      if (device->physical_device->rad_info.chip_class >= GFX9 &&
+          (binary->stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg) &&
+          !binary->is_gs_copy_shader) {
+         /* We add this symbol even on LLVM <= 8 to ensure that
+          * shader->config.lds_size is set correctly below.
+          */
+         struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
+         sym->name = "esgs_ring";
+         sym->size = binary->info.ngg_info.esgs_ring_size;
+         sym->align = 64 * 1024;
+      }
+
+      if (binary->info.is_ngg && binary->stage == MESA_SHADER_GEOMETRY) {
+         struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
+         sym->name = "ngg_emit";
+         sym->size = binary->info.ngg_info.ngg_emit_size * 4;
+         sym->align = 4;
+      }
+
+      struct ac_rtld_open_info open_info = {
+         .info = &device->physical_device->rad_info,
+         .shader_type = binary->stage,
+         .wave_size = binary->info.wave_size,
+         .num_parts = 1,
+         .elf_ptrs = &elf_data,
+         .elf_sizes = &elf_size,
+         .num_shared_lds_symbols = num_lds_symbols,
+         .shared_lds_symbols = lds_symbols,
+      };
+
+      if (!ac_rtld_open(&rtld_binary, open_info)) {
+         free(variant);
+         return NULL;
+      }
+
+      if (!ac_rtld_read_config(&device->physical_device->rad_info, &rtld_binary, &config)) {
+         ac_rtld_close(&rtld_binary);
+         free(variant);
+         return NULL;
+      }
+
+      if (rtld_binary.lds_size > 0) {
+         unsigned encode_granularity = device->physical_device->rad_info.lds_encode_granularity;
+         config.lds_size = align(rtld_binary.lds_size, encode_granularity) / encode_granularity;
+      }
+      if (!config.lds_size && binary->stage == MESA_SHADER_TESS_CTRL) {
+         /* This is used for reporting LDS statistics */
+         config.lds_size = binary->info.tcs.num_lds_blocks;
+      }
+
+      variant->code_size = rtld_binary.rx_size;
+      variant->exec_size = rtld_binary.exec_size;
+   } else {
+      assert(binary->type == RADV_BINARY_TYPE_LEGACY);
+      config = ((struct radv_shader_binary_legacy *)binary)->config;
+      variant->code_size =
+         radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
+      variant->exec_size = ((struct radv_shader_binary_legacy *)binary)->exec_size;
+   }
+
+   variant->info = binary->info;
+   radv_postprocess_config(device, &config, &binary->info, binary->stage, &variant->config);
+
+   void *dest_ptr = radv_alloc_shader_memory(device, variant);
+   if (!dest_ptr) {
+      if (binary->type == RADV_BINARY_TYPE_RTLD)
+         ac_rtld_close(&rtld_binary);
+      free(variant);
+      return NULL;
+   }
+
+   if (binary->type == RADV_BINARY_TYPE_RTLD) {
+      struct radv_shader_binary_rtld *bin = (struct radv_shader_binary_rtld *)binary;
+      struct ac_rtld_upload_info info = {
+         .binary = &rtld_binary,
+         .rx_va = radv_buffer_get_va(variant->bo) + variant->bo_offset,
+         .rx_ptr = dest_ptr,
+      };
+
+      if (!ac_rtld_upload(&info)) {
+         radv_shader_variant_destroy(device, variant);
+         ac_rtld_close(&rtld_binary);
+         return NULL;
+      }
+
+      if (keep_shader_info || (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) {
+         const char *disasm_data;
+         size_t disasm_size;
+         if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data,
+                                          &disasm_size)) {
+            radv_shader_variant_destroy(device, variant);
+            ac_rtld_close(&rtld_binary);
+            return NULL;
+         }
+
+         variant->ir_string =
+            bin->llvm_ir_size ? strdup((const char *)(bin->data + bin->elf_size)) : NULL;
+         variant->disasm_string = malloc(disasm_size + 1);
+         memcpy(variant->disasm_string, disasm_data, disasm_size);
+         variant->disasm_string[disasm_size] = 0;
+      }
+
+      variant->code_ptr = dest_ptr;
+      ac_rtld_close(&rtld_binary);
+   } else {
+      struct radv_shader_binary_legacy *bin = (struct radv_shader_binary_legacy *)binary;
+      memcpy(dest_ptr, bin->data + bin->stats_size, bin->code_size);
+
+      /* Add end-of-code markers for the UMR disassembler. */
+      uint32_t *ptr32 = (uint32_t *)dest_ptr + bin->code_size / 4;
+      for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++)
+         ptr32[i] = DEBUGGER_END_OF_CODE_MARKER;
+
+      variant->code_ptr = dest_ptr;
+      variant->ir_string =
+         bin->ir_size ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size)) : NULL;
+      variant->disasm_string =
+         bin->disasm_size
+            ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size + bin->ir_size))
+            : NULL;
+
+      if (bin->stats_size) {
+         variant->statistics = calloc(bin->stats_size, 1);
+         memcpy(variant->statistics, bin->data, bin->stats_size);
+      }
+   }
+   return variant;
 }
 
 static char *
-radv_dump_nir_shaders(struct nir_shader * const *shaders,
-                      int shader_count)
+radv_dump_nir_shaders(struct nir_shader *const *shaders, int shader_count)
 {
-	char *data = NULL;
-	char *ret = NULL;
-	size_t size = 0;
-	struct u_memstream mem;
-	if (u_memstream_open(&mem, &data, &size)) {
-		FILE *const memf = u_memstream_get(&mem);
-		for (int i = 0; i < shader_count; ++i)
-			nir_print_shader(shaders[i], memf);
-		u_memstream_close(&mem);
-	}
-
-	ret = malloc(size + 1);
-	if (ret) {
-		memcpy(ret, data, size);
-		ret[size] = 0;
-	}
-	free(data);
-	return ret;
+   char *data = NULL;
+   char *ret = NULL;
+   size_t size = 0;
+   struct u_memstream mem;
+   if (u_memstream_open(&mem, &data, &size)) {
+      FILE *const memf = u_memstream_get(&mem);
+      for (int i = 0; i < shader_count; ++i)
+         nir_print_shader(shaders[i], memf);
+      u_memstream_close(&mem);
+   }
+
+   ret = malloc(size + 1);
+   if (ret) {
+      memcpy(ret, data, size);
+      ret[size] = 0;
+   }
+   free(data);
+   return ret;
 }
 
 static struct radv_shader_variant *
-shader_variant_compile(struct radv_device *device,
-		       struct vk_shader_module *module,
-		       struct nir_shader * const *shaders,
-		       int shader_count,
-		       gl_shader_stage stage,
-		       struct radv_shader_info *info,
-		       struct radv_nir_compiler_options *options,
-		       bool gs_copy_shader,
-		       bool trap_handler_shader,
-		       bool keep_shader_info,
-		       bool keep_statistic_info,
-		       struct radv_shader_binary **binary_out)
+shader_variant_compile(struct radv_device *device, struct vk_shader_module *module,
+                       struct nir_shader *const *shaders, int shader_count, gl_shader_stage stage,
+                       struct radv_shader_info *info, struct radv_nir_compiler_options *options,
+                       bool gs_copy_shader, bool trap_handler_shader, bool keep_shader_info,
+                       bool keep_statistic_info, struct radv_shader_binary **binary_out)
 {
-	enum radeon_family chip_family = device->physical_device->rad_info.family;
-	struct radv_shader_binary *binary = NULL;
-
-	struct radv_shader_debug_data debug_data = {
-		.device = device,
-                .module = module,
-        };
-
-	options->family = chip_family;
-	options->chip_class = device->physical_device->rad_info.chip_class;
-	options->info = &device->physical_device->rad_info;
-	options->dump_shader = radv_can_dump_shader(device, module, gs_copy_shader);
-	options->dump_preoptir = options->dump_shader &&
-				 device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
-	options->record_ir = keep_shader_info;
-	options->record_stats = keep_statistic_info;
-	options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
-	options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
-	options->address32_hi = device->physical_device->rad_info.address32_hi;
-	options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
-	options->use_ngg_streamout = device->physical_device->use_ngg_streamout;
-	options->enable_mrt_output_nan_fixup = module && !module->nir &&
-					       device->instance->enable_mrt_output_nan_fixup;
-	options->adjust_frag_coord_z = device->adjust_frag_coord_z;
-	options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug;
-	options->debug.func = radv_compiler_debug;
-	options->debug.private_data = &debug_data;
-
-	switch (device->force_vrs) {
-	case RADV_FORCE_VRS_2x2:
-		options->force_vrs_rates = (1u << 2) | (1u << 4);
-		break;
-	case RADV_FORCE_VRS_2x1:
-		options->force_vrs_rates = (0u << 2) | (1u << 4);
-		break;
-	case RADV_FORCE_VRS_1x2:
-		options->force_vrs_rates = (1u << 2) | (0u << 4);
-		break;
-	default:
-		break;
-	}
-
-	struct radv_shader_args args = {0};
-	args.options = options;
-	args.shader_info = info;
-	args.is_gs_copy_shader = gs_copy_shader;
-	args.is_trap_handler_shader = trap_handler_shader;
-
-	radv_declare_shader_args(&args,
-				 gs_copy_shader ? MESA_SHADER_VERTEX
-						: shaders[shader_count - 1]->info.stage,
-				 shader_count >= 2,
-				 shader_count >= 2 ? shaders[shader_count - 2]->info.stage
-						   : MESA_SHADER_VERTEX);
-
-	if (radv_use_llvm_for_stage(device, stage) ||
-	    options->dump_shader || options->record_ir)
-		ac_init_llvm_once();
-
-	if (radv_use_llvm_for_stage(device, stage)) {
-		llvm_compile_shader(device, shader_count, shaders, &binary, &args);
-	} else {
-		aco_compile_shader(shader_count, shaders, &binary, &args);
-	}
-
-	binary->info = *info;
-
-	struct radv_shader_variant *variant = radv_shader_variant_create(device, binary,
-									 keep_shader_info);
-	if (!variant) {
-		free(binary);
-		return NULL;
-	}
-
-	if (options->dump_shader) {
-		fprintf(stderr, "%s", radv_get_shader_name(info, shaders[0]->info.stage));
-		for (int i = 1; i < shader_count; ++i)
-			fprintf(stderr, " + %s", radv_get_shader_name(info, shaders[i]->info.stage));
-
-		fprintf(stderr, "\ndisasm:\n%s\n", variant->disasm_string);
-	}
-
-
-	if (keep_shader_info) {
-		variant->nir_string = radv_dump_nir_shaders(shaders, shader_count);
-		if (!gs_copy_shader && !trap_handler_shader && !module->nir) {
-			variant->spirv = malloc(module->size);
-			if (!variant->spirv) {
-				free(variant);
-				free(binary);
-				return NULL;
-			}
-
-			memcpy(variant->spirv, module->data, module->size);
-			variant->spirv_size = module->size;
-		}
-	}
-
-	if (binary_out)
-		*binary_out = binary;
-	else
-		free(binary);
-
-	return variant;
+   enum radeon_family chip_family = device->physical_device->rad_info.family;
+   struct radv_shader_binary *binary = NULL;
+
+   struct radv_shader_debug_data debug_data = {
+      .device = device,
+      .module = module,
+   };
+
+   options->family = chip_family;
+   options->chip_class = device->physical_device->rad_info.chip_class;
+   options->info = &device->physical_device->rad_info;
+   options->dump_shader = radv_can_dump_shader(device, module, gs_copy_shader);
+   options->dump_preoptir =
+      options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
+   options->record_ir = keep_shader_info;
+   options->record_stats = keep_statistic_info;
+   options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
+   options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
+   options->address32_hi = device->physical_device->rad_info.address32_hi;
+   options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
+   options->use_ngg_streamout = device->physical_device->use_ngg_streamout;
+   options->enable_mrt_output_nan_fixup =
+      module && !module->nir && device->instance->enable_mrt_output_nan_fixup;
+   options->adjust_frag_coord_z = device->adjust_frag_coord_z;
+   options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug;
+   options->debug.func = radv_compiler_debug;
+   options->debug.private_data = &debug_data;
+
+   switch (device->force_vrs) {
+   case RADV_FORCE_VRS_2x2:
+      options->force_vrs_rates = (1u << 2) | (1u << 4);
+      break;
+   case RADV_FORCE_VRS_2x1:
+      options->force_vrs_rates = (0u << 2) | (1u << 4);
+      break;
+   case RADV_FORCE_VRS_1x2:
+      options->force_vrs_rates = (1u << 2) | (0u << 4);
+      break;
+   default:
+      break;
+   }
+
+   struct radv_shader_args args = {0};
+   args.options = options;
+   args.shader_info = info;
+   args.is_gs_copy_shader = gs_copy_shader;
+   args.is_trap_handler_shader = trap_handler_shader;
+
+   radv_declare_shader_args(
+      &args, gs_copy_shader ? MESA_SHADER_VERTEX : shaders[shader_count - 1]->info.stage,
+      shader_count >= 2,
+      shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
+
+   if (radv_use_llvm_for_stage(device, stage) || options->dump_shader || options->record_ir)
+      ac_init_llvm_once();
+
+   if (radv_use_llvm_for_stage(device, stage)) {
+      llvm_compile_shader(device, shader_count, shaders, &binary, &args);
+   } else {
+      aco_compile_shader(shader_count, shaders, &binary, &args);
+   }
+
+   binary->info = *info;
+
+   struct radv_shader_variant *variant =
+      radv_shader_variant_create(device, binary, keep_shader_info);
+   if (!variant) {
+      free(binary);
+      return NULL;
+   }
+
+   if (options->dump_shader) {
+      fprintf(stderr, "%s", radv_get_shader_name(info, shaders[0]->info.stage));
+      for (int i = 1; i < shader_count; ++i)
+         fprintf(stderr, " + %s", radv_get_shader_name(info, shaders[i]->info.stage));
+
+      fprintf(stderr, "\ndisasm:\n%s\n", variant->disasm_string);
+   }
+
+   if (keep_shader_info) {
+      variant->nir_string = radv_dump_nir_shaders(shaders, shader_count);
+      if (!gs_copy_shader && !trap_handler_shader && !module->nir) {
+         variant->spirv = malloc(module->size);
+         if (!variant->spirv) {
+            free(variant);
+            free(binary);
+            return NULL;
+         }
+
+         memcpy(variant->spirv, module->data, module->size);
+         variant->spirv_size = module->size;
+      }
+   }
+
+   if (binary_out)
+      *binary_out = binary;
+   else
+      free(binary);
+
+   return variant;
 }
 
 struct radv_shader_variant *
-radv_shader_variant_compile(struct radv_device *device,
-			   struct vk_shader_module *module,
-			   struct nir_shader *const *shaders,
-			   int shader_count,
-			   struct radv_pipeline_layout *layout,
-			   const struct radv_shader_variant_key *key,
-			   struct radv_shader_info *info,
-			   bool keep_shader_info, bool keep_statistic_info,
-			   bool disable_optimizations,
-			   struct radv_shader_binary **binary_out)
+radv_shader_variant_compile(struct radv_device *device, struct vk_shader_module *module,
+                            struct nir_shader *const *shaders, int shader_count,
+                            struct radv_pipeline_layout *layout,
+                            const struct radv_shader_variant_key *key,
+                            struct radv_shader_info *info, bool keep_shader_info,
+                            bool keep_statistic_info, bool disable_optimizations,
+                            struct radv_shader_binary **binary_out)
 {
-	gl_shader_stage stage =  shaders[shader_count - 1]->info.stage;
-	struct radv_nir_compiler_options options = {0};
-
-	options.layout = layout;
-	if (key)
-		options.key = *key;
-
-	options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
-	options.robust_buffer_access = device->robust_buffer_access;
-	options.robust_buffer_access2 = device->robust_buffer_access2;
-	options.disable_optimizations = disable_optimizations;
-	options.wgp_mode = radv_should_use_wgp_mode(device, stage, info);
-
-	return shader_variant_compile(device, module, shaders, shader_count, stage, info,
-				      &options, false, false,
-				      keep_shader_info, keep_statistic_info, binary_out);
+   gl_shader_stage stage = shaders[shader_count - 1]->info.stage;
+   struct radv_nir_compiler_options options = {0};
+
+   options.layout = layout;
+   if (key)
+      options.key = *key;
+
+   options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
+   options.robust_buffer_access = device->robust_buffer_access;
+   options.robust_buffer_access2 = device->robust_buffer_access2;
+   options.disable_optimizations = disable_optimizations;
+   options.wgp_mode = radv_should_use_wgp_mode(device, stage, info);
+
+   return shader_variant_compile(device, module, shaders, shader_count, stage, info, &options,
+                                 false, false, keep_shader_info, keep_statistic_info, binary_out);
 }
 
 struct radv_shader_variant *
-radv_create_gs_copy_shader(struct radv_device *device,
-			   struct nir_shader *shader,
-			   struct radv_shader_info *info,
-			   struct radv_shader_binary **binary_out,
-			   bool keep_shader_info, bool keep_statistic_info,
-			   bool multiview, bool disable_optimizations)
+radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *shader,
+                           struct radv_shader_info *info, struct radv_shader_binary **binary_out,
+                           bool keep_shader_info, bool keep_statistic_info, bool multiview,
+                           bool disable_optimizations)
 {
-	struct radv_nir_compiler_options options = {0};
-	gl_shader_stage stage = MESA_SHADER_VERTEX;
+   struct radv_nir_compiler_options options = {0};
+   gl_shader_stage stage = MESA_SHADER_VERTEX;
 
-	options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
-	options.key.has_multiview_view_index = multiview;
-	options.disable_optimizations = disable_optimizations;
+   options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
+   options.key.has_multiview_view_index = multiview;
+   options.disable_optimizations = disable_optimizations;
 
-	return shader_variant_compile(device, NULL, &shader, 1, stage,
-				      info, &options, true, false,
-				      keep_shader_info, keep_statistic_info, binary_out);
+   return shader_variant_compile(device, NULL, &shader, 1, stage, info, &options, true, false,
+                                 keep_shader_info, keep_statistic_info, binary_out);
 }
 
 struct radv_shader_variant *
 radv_create_trap_handler_shader(struct radv_device *device)
 {
-	struct radv_nir_compiler_options options = {0};
-	struct radv_shader_variant *shader = NULL;
-	struct radv_shader_binary *binary = NULL;
-	struct radv_shader_info info = {0};
+   struct radv_nir_compiler_options options = {0};
+   struct radv_shader_variant *shader = NULL;
+   struct radv_shader_binary *binary = NULL;
+   struct radv_shader_info info = {0};
 
-	nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_trap_handler");
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_trap_handler");
 
-	options.explicit_scratch_args = true;
-	options.wgp_mode = radv_should_use_wgp_mode(device, MESA_SHADER_COMPUTE, &info);
-	info.wave_size = 64;
+   options.explicit_scratch_args = true;
+   options.wgp_mode = radv_should_use_wgp_mode(device, MESA_SHADER_COMPUTE, &info);
+   info.wave_size = 64;
 
-	shader = shader_variant_compile(device, NULL, &b.shader, 1,
-					MESA_SHADER_COMPUTE, &info, &options,
-					false, true, true, false, &binary);
+   shader = shader_variant_compile(device, NULL, &b.shader, 1, MESA_SHADER_COMPUTE, &info, &options,
+                                   false, true, true, false, &binary);
 
-	ralloc_free(b.shader);
-	free(binary);
+   ralloc_free(b.shader);
+   free(binary);
 
-	return shader;
+   return shader;
 }
 
 void
-radv_shader_variant_destroy(struct radv_device *device,
-			    struct radv_shader_variant *variant)
+radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant)
 {
-	if (!p_atomic_dec_zero(&variant->ref_count))
-		return;
-
-	mtx_lock(&device->shader_slab_mutex);
-	list_del(&variant->slab_list);
-	mtx_unlock(&device->shader_slab_mutex);
-
-	free(variant->spirv);
-	free(variant->nir_string);
-	free(variant->disasm_string);
-	free(variant->ir_string);
-	free(variant->statistics);
-	free(variant);
+   if (!p_atomic_dec_zero(&variant->ref_count))
+      return;
+
+   mtx_lock(&device->shader_slab_mutex);
+   list_del(&variant->slab_list);
+   mtx_unlock(&device->shader_slab_mutex);
+
+   free(variant->spirv);
+   free(variant->nir_string);
+   free(variant->disasm_string);
+   free(variant->ir_string);
+   free(variant->statistics);
+   free(variant);
 }
 
 const char *
-radv_get_shader_name(struct radv_shader_info *info,
-		     gl_shader_stage stage)
+radv_get_shader_name(struct radv_shader_info *info, gl_shader_stage stage)
 {
-	switch (stage) {
-	case MESA_SHADER_VERTEX:
-		if (info->vs.as_ls)
-			return "Vertex Shader as LS";
-		else if (info->vs.as_es)
-			return "Vertex Shader as ES";
-		else if (info->is_ngg)
-			return "Vertex Shader as ESGS";
-		else
-			return "Vertex Shader as VS";
-	case MESA_SHADER_TESS_CTRL:
-		return "Tessellation Control Shader";
-	case MESA_SHADER_TESS_EVAL:
-		if (info->tes.as_es)
-			return "Tessellation Evaluation Shader as ES";
-		else if (info->is_ngg)
-			return "Tessellation Evaluation Shader as ESGS";
-		else
-			return "Tessellation Evaluation Shader as VS";
-	case MESA_SHADER_GEOMETRY:
-		return "Geometry Shader";
-	case MESA_SHADER_FRAGMENT:
-		return "Pixel Shader";
-	case MESA_SHADER_COMPUTE:
-		return "Compute Shader";
-	default:
-		return "Unknown shader";
-	};
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      if (info->vs.as_ls)
+         return "Vertex Shader as LS";
+      else if (info->vs.as_es)
+         return "Vertex Shader as ES";
+      else if (info->is_ngg)
+         return "Vertex Shader as ESGS";
+      else
+         return "Vertex Shader as VS";
+   case MESA_SHADER_TESS_CTRL:
+      return "Tessellation Control Shader";
+   case MESA_SHADER_TESS_EVAL:
+      if (info->tes.as_es)
+         return "Tessellation Evaluation Shader as ES";
+      else if (info->is_ngg)
+         return "Tessellation Evaluation Shader as ESGS";
+      else
+         return "Tessellation Evaluation Shader as VS";
+   case MESA_SHADER_GEOMETRY:
+      return "Geometry Shader";
+   case MESA_SHADER_FRAGMENT:
+      return "Pixel Shader";
+   case MESA_SHADER_COMPUTE:
+      return "Compute Shader";
+   default:
+      return "Unknown shader";
+   };
 }
 
 unsigned
-radv_get_max_workgroup_size(enum chip_class chip_class,
-                            gl_shader_stage stage,
+radv_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
                             const unsigned *sizes)
 {
-	switch (stage) {
-	case MESA_SHADER_TESS_CTRL:
-		return chip_class >= GFX7 ? 128 : 64;
-	case MESA_SHADER_GEOMETRY:
-		return chip_class >= GFX9 ? 128 : 64;
-	case MESA_SHADER_COMPUTE:
-		break;
-	default:
-		return 0;
-	}
-
-	unsigned max_workgroup_size = sizes[0] * sizes[1] * sizes[2];
-	return max_workgroup_size;
+   switch (stage) {
+   case MESA_SHADER_TESS_CTRL:
+      return chip_class >= GFX7 ? 128 : 64;
+   case MESA_SHADER_GEOMETRY:
+      return chip_class >= GFX9 ? 128 : 64;
+   case MESA_SHADER_COMPUTE:
+      break;
+   default:
+      return 0;
+   }
+
+   unsigned max_workgroup_size = sizes[0] * sizes[1] * sizes[2];
+   return max_workgroup_size;
 }
 
 unsigned
-radv_get_max_waves(struct radv_device *device,
-                   struct radv_shader_variant *variant,
+radv_get_max_waves(struct radv_device *device, struct radv_shader_variant *variant,
                    gl_shader_stage stage)
 {
-	struct radeon_info *info = &device->physical_device->rad_info;
-	enum chip_class chip_class = info->chip_class;
-	uint8_t wave_size = variant->info.wave_size;
-	struct ac_shader_config *conf = &variant->config;
-	unsigned max_simd_waves;
-	unsigned lds_per_wave = 0;
-
-	max_simd_waves = info->max_wave64_per_simd * (64 / wave_size);
-
-	if (stage == MESA_SHADER_FRAGMENT) {
-		lds_per_wave = conf->lds_size * info->lds_encode_granularity +
-			       variant->info.ps.num_interp * 48;
-		lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity);
-	} else if (stage == MESA_SHADER_COMPUTE) {
-		unsigned max_workgroup_size =
-			radv_get_max_workgroup_size(chip_class, stage, variant->info.cs.block_size);
-		lds_per_wave = align(conf->lds_size * info->lds_encode_granularity,
-				     info->lds_alloc_granularity);
-		lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size);
-	}
-
-	if (conf->num_sgprs && chip_class < GFX10) {
-		unsigned sgprs = align(conf->num_sgprs, chip_class >= GFX8 ? 16 : 8);
-		max_simd_waves = MIN2(max_simd_waves, info->num_physical_sgprs_per_simd / sgprs);
-	}
-
-	if (conf->num_vgprs) {
-		unsigned physical_vgprs = info->num_physical_wave64_vgprs_per_simd * (64 / wave_size);
-		unsigned vgprs = align(conf->num_vgprs, wave_size == 32 ? 8 : 4);
-		if (chip_class >= GFX10_3)
-		   vgprs = align(vgprs, wave_size == 32 ? 16 : 8);
-		max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs);
-	}
-
-	unsigned simd_per_workgroup = info->num_simd_per_compute_unit;
-	if (chip_class >= GFX10)
-		simd_per_workgroup *= 2; /* like lds_size_per_workgroup, assume WGP on GFX10+ */
-
-	unsigned max_lds_per_simd = info->lds_size_per_workgroup / simd_per_workgroup;
-	if (lds_per_wave)
-		max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_lds_per_simd, lds_per_wave));
-
-	return chip_class >= GFX10 ? max_simd_waves * (wave_size / 32) : max_simd_waves;
+   struct radeon_info *info = &device->physical_device->rad_info;
+   enum chip_class chip_class = info->chip_class;
+   uint8_t wave_size = variant->info.wave_size;
+   struct ac_shader_config *conf = &variant->config;
+   unsigned max_simd_waves;
+   unsigned lds_per_wave = 0;
+
+   max_simd_waves = info->max_wave64_per_simd * (64 / wave_size);
+
+   if (stage == MESA_SHADER_FRAGMENT) {
+      lds_per_wave =
+         conf->lds_size * info->lds_encode_granularity + variant->info.ps.num_interp * 48;
+      lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity);
+   } else if (stage == MESA_SHADER_COMPUTE) {
+      unsigned max_workgroup_size =
+         radv_get_max_workgroup_size(chip_class, stage, variant->info.cs.block_size);
+      lds_per_wave =
+         align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity);
+      lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size);
+   }
+
+   if (conf->num_sgprs && chip_class < GFX10) {
+      unsigned sgprs = align(conf->num_sgprs, chip_class >= GFX8 ? 16 : 8);
+      max_simd_waves = MIN2(max_simd_waves, info->num_physical_sgprs_per_simd / sgprs);
+   }
+
+   if (conf->num_vgprs) {
+      unsigned physical_vgprs = info->num_physical_wave64_vgprs_per_simd * (64 / wave_size);
+      unsigned vgprs = align(conf->num_vgprs, wave_size == 32 ? 8 : 4);
+      if (chip_class >= GFX10_3)
+         vgprs = align(vgprs, wave_size == 32 ? 16 : 8);
+      max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs);
+   }
+
+   unsigned simd_per_workgroup = info->num_simd_per_compute_unit;
+   if (chip_class >= GFX10)
+      simd_per_workgroup *= 2; /* like lds_size_per_workgroup, assume WGP on GFX10+ */
+
+   unsigned max_lds_per_simd = info->lds_size_per_workgroup / simd_per_workgroup;
+   if (lds_per_wave)
+      max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_lds_per_simd, lds_per_wave));
+
+   return chip_class >= GFX10 ? max_simd_waves * (wave_size / 32) : max_simd_waves;
 }
 
 VkResult
-radv_GetShaderInfoAMD(VkDevice _device,
-		      VkPipeline _pipeline,
-		      VkShaderStageFlagBits shaderStage,
-		      VkShaderInfoTypeAMD infoType,
-		      size_t* pInfoSize,
-		      void* pInfo)
+radv_GetShaderInfoAMD(VkDevice _device, VkPipeline _pipeline, VkShaderStageFlagBits shaderStage,
+                      VkShaderInfoTypeAMD infoType, size_t *pInfoSize, void *pInfo)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
-	gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage);
-	struct radv_shader_variant *variant = pipeline->shaders[stage];
-	VkResult result = VK_SUCCESS;
-
-	/* Spec doesn't indicate what to do if the stage is invalid, so just
-	 * return no info for this. */
-	if (!variant)
-		return vk_error(device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
-
-	switch (infoType) {
-	case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
-		if (!pInfo) {
-			*pInfoSize = sizeof(VkShaderStatisticsInfoAMD);
-		} else {
-			unsigned lds_multiplier = device->physical_device->rad_info.lds_encode_granularity;
-			struct ac_shader_config *conf = &variant->config;
-
-			VkShaderStatisticsInfoAMD statistics = {0};
-			statistics.shaderStageMask = shaderStage;
-			statistics.numPhysicalVgprs = device->physical_device->rad_info.num_physical_wave64_vgprs_per_simd;
-			statistics.numPhysicalSgprs = device->physical_device->rad_info.num_physical_sgprs_per_simd;
-			statistics.numAvailableSgprs = statistics.numPhysicalSgprs;
-
-			if (stage == MESA_SHADER_COMPUTE) {
-				unsigned *local_size = variant->info.cs.block_size;
-				unsigned workgroup_size = local_size[0] * local_size[1] * local_size[2];
-
-				statistics.numAvailableVgprs = statistics.numPhysicalVgprs /
-							       ceil((double)workgroup_size / statistics.numPhysicalVgprs);
-
-				statistics.computeWorkGroupSize[0] = local_size[0];
-				statistics.computeWorkGroupSize[1] = local_size[1];
-				statistics.computeWorkGroupSize[2] = local_size[2];
-			} else {
-				statistics.numAvailableVgprs = statistics.numPhysicalVgprs;
-			}
-
-			statistics.resourceUsage.numUsedVgprs = conf->num_vgprs;
-			statistics.resourceUsage.numUsedSgprs = conf->num_sgprs;
-			statistics.resourceUsage.ldsSizePerLocalWorkGroup = 32768;
-			statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size * lds_multiplier;
-			statistics.resourceUsage.scratchMemUsageInBytes = conf->scratch_bytes_per_wave;
-
-			size_t size = *pInfoSize;
-			*pInfoSize = sizeof(statistics);
-
-			memcpy(pInfo, &statistics, MIN2(size, *pInfoSize));
-
-			if (size < *pInfoSize)
-				result = VK_INCOMPLETE;
-		}
-
-		break;
-	case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD: {
-		char *out;
-	        size_t outsize;
-		struct u_memstream mem;
-		u_memstream_open(&mem, &out, &outsize);
-		FILE *const memf = u_memstream_get(&mem);
-
-		fprintf(memf, "%s:\n", radv_get_shader_name(&variant->info, stage));
-		fprintf(memf, "%s\n\n", variant->ir_string);
-		fprintf(memf, "%s\n\n", variant->disasm_string);
-		radv_dump_shader_stats(device, pipeline, stage, memf);
-		u_memstream_close(&mem);
-
-		/* Need to include the null terminator. */
-		size_t length = outsize + 1;
-
-		if (!pInfo) {
-			*pInfoSize = length;
-		} else {
-			size_t size = *pInfoSize;
-			*pInfoSize = length;
-
-			memcpy(pInfo, out, MIN2(size, length));
-
-			if (size < length)
-				result = VK_INCOMPLETE;
-		}
-
-		free(out);
-		break;
-	}
-	default:
-		/* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */
-		result = VK_ERROR_FEATURE_NOT_PRESENT;
-		break;
-	}
-
-	return result;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+   gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage);
+   struct radv_shader_variant *variant = pipeline->shaders[stage];
+   VkResult result = VK_SUCCESS;
+
+   /* Spec doesn't indicate what to do if the stage is invalid, so just
+    * return no info for this. */
+   if (!variant)
+      return vk_error(device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
+
+   switch (infoType) {
+   case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
+      if (!pInfo) {
+         *pInfoSize = sizeof(VkShaderStatisticsInfoAMD);
+      } else {
+         unsigned lds_multiplier = device->physical_device->rad_info.lds_encode_granularity;
+         struct ac_shader_config *conf = &variant->config;
+
+         VkShaderStatisticsInfoAMD statistics = {0};
+         statistics.shaderStageMask = shaderStage;
+         statistics.numPhysicalVgprs =
+            device->physical_device->rad_info.num_physical_wave64_vgprs_per_simd;
+         statistics.numPhysicalSgprs =
+            device->physical_device->rad_info.num_physical_sgprs_per_simd;
+         statistics.numAvailableSgprs = statistics.numPhysicalSgprs;
+
+         if (stage == MESA_SHADER_COMPUTE) {
+            unsigned *local_size = variant->info.cs.block_size;
+            unsigned workgroup_size = local_size[0] * local_size[1] * local_size[2];
+
+            statistics.numAvailableVgprs =
+               statistics.numPhysicalVgprs /
+               ceil((double)workgroup_size / statistics.numPhysicalVgprs);
+
+            statistics.computeWorkGroupSize[0] = local_size[0];
+            statistics.computeWorkGroupSize[1] = local_size[1];
+            statistics.computeWorkGroupSize[2] = local_size[2];
+         } else {
+            statistics.numAvailableVgprs = statistics.numPhysicalVgprs;
+         }
+
+         statistics.resourceUsage.numUsedVgprs = conf->num_vgprs;
+         statistics.resourceUsage.numUsedSgprs = conf->num_sgprs;
+         statistics.resourceUsage.ldsSizePerLocalWorkGroup = 32768;
+         statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size * lds_multiplier;
+         statistics.resourceUsage.scratchMemUsageInBytes = conf->scratch_bytes_per_wave;
+
+         size_t size = *pInfoSize;
+         *pInfoSize = sizeof(statistics);
+
+         memcpy(pInfo, &statistics, MIN2(size, *pInfoSize));
+
+         if (size < *pInfoSize)
+            result = VK_INCOMPLETE;
+      }
+
+      break;
+   case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD: {
+      char *out;
+      size_t outsize;
+      struct u_memstream mem;
+      u_memstream_open(&mem, &out, &outsize);
+      FILE *const memf = u_memstream_get(&mem);
+
+      fprintf(memf, "%s:\n", radv_get_shader_name(&variant->info, stage));
+      fprintf(memf, "%s\n\n", variant->ir_string);
+      fprintf(memf, "%s\n\n", variant->disasm_string);
+      radv_dump_shader_stats(device, pipeline, stage, memf);
+      u_memstream_close(&mem);
+
+      /* Need to include the null terminator. */
+      size_t length = outsize + 1;
+
+      if (!pInfo) {
+         *pInfoSize = length;
+      } else {
+         size_t size = *pInfoSize;
+         *pInfoSize = length;
+
+         memcpy(pInfo, out, MIN2(size, length));
+
+         if (size < length)
+            result = VK_INCOMPLETE;
+      }
+
+      free(out);
+      break;
+   }
+   default:
+      /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */
+      result = VK_ERROR_FEATURE_NOT_PRESENT;
+      break;
+   }
+
+   return result;
 }
 
 VkResult
-radv_dump_shader_stats(struct radv_device *device,
-		       struct radv_pipeline *pipeline,
-		       gl_shader_stage stage, FILE *output)
+radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline,
+                       gl_shader_stage stage, FILE *output)
 {
-	struct radv_shader_variant *shader = pipeline->shaders[stage];
-	VkPipelineExecutablePropertiesKHR *props = NULL;
-	uint32_t prop_count = 0;
-	VkResult result;
-
-	VkPipelineInfoKHR pipeline_info = {0};
-	pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
-	pipeline_info.pipeline = radv_pipeline_to_handle(pipeline);
-
-	result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device),
-							 &pipeline_info,
-							 &prop_count, NULL);
-	if (result != VK_SUCCESS)
-		return result;
-
-	props = calloc(prop_count, sizeof(*props));
-	if (!props)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-	result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device),
-							 &pipeline_info,
-							 &prop_count, props);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	for (unsigned exec_idx = 0; exec_idx < prop_count; exec_idx++) {
-		if (!(props[exec_idx].stages & mesa_to_vk_shader_stage(stage)))
-			continue;
-
-		VkPipelineExecutableStatisticKHR *stats = NULL;
-		uint32_t stat_count = 0;
-
-		VkPipelineExecutableInfoKHR exec_info = {0};
-		exec_info.pipeline = radv_pipeline_to_handle(pipeline);
-		exec_info.executableIndex = exec_idx;
-
-		result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device),
-								 &exec_info,
-								 &stat_count, NULL);
-		if (result != VK_SUCCESS)
-			goto fail;
-
-		stats = calloc(stat_count, sizeof(*stats));
-		if (!stats) {
-			result = VK_ERROR_OUT_OF_HOST_MEMORY;
-			goto fail;
-		}
-
-		result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device),
-								 &exec_info,
-								 &stat_count, stats);
-		if (result != VK_SUCCESS) {
-			free(stats);
-			goto fail;
-		}
-
-		fprintf(output, "\n%s:\n",
-			radv_get_shader_name(&shader->info, stage));
-		fprintf(output, "*** SHADER STATS ***\n");
-
-		for (unsigned i = 0; i < stat_count; i++) {
-			fprintf(output, "%s: ", stats[i].name);
-			switch (stats[i].format) {
-			case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
-				fprintf(output, "%s", stats[i].value.b32 == VK_TRUE ? "true" : "false");
-				break;
-			case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
-				fprintf(output, "%"PRIi64, stats[i].value.i64);
-				break;
-			case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
-				fprintf(output, "%"PRIu64, stats[i].value.u64);
-				break;
-			case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
-				fprintf(output, "%f", stats[i].value.f64);
-				break;
-			default:
-				unreachable("Invalid pipeline statistic format");
-			}
-			fprintf(output, "\n");
-		}
-
-		fprintf(output, "********************\n\n\n");
-
-		free(stats);
-	}
+   struct radv_shader_variant *shader = pipeline->shaders[stage];
+   VkPipelineExecutablePropertiesKHR *props = NULL;
+   uint32_t prop_count = 0;
+   VkResult result;
+
+   VkPipelineInfoKHR pipeline_info = {0};
+   pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
+   pipeline_info.pipeline = radv_pipeline_to_handle(pipeline);
+
+   result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info,
+                                                    &prop_count, NULL);
+   if (result != VK_SUCCESS)
+      return result;
+
+   props = calloc(prop_count, sizeof(*props));
+   if (!props)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info,
+                                                    &prop_count, props);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   for (unsigned exec_idx = 0; exec_idx < prop_count; exec_idx++) {
+      if (!(props[exec_idx].stages & mesa_to_vk_shader_stage(stage)))
+         continue;
+
+      VkPipelineExecutableStatisticKHR *stats = NULL;
+      uint32_t stat_count = 0;
+
+      VkPipelineExecutableInfoKHR exec_info = {0};
+      exec_info.pipeline = radv_pipeline_to_handle(pipeline);
+      exec_info.executableIndex = exec_idx;
+
+      result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info,
+                                                       &stat_count, NULL);
+      if (result != VK_SUCCESS)
+         goto fail;
+
+      stats = calloc(stat_count, sizeof(*stats));
+      if (!stats) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail;
+      }
+
+      result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info,
+                                                       &stat_count, stats);
+      if (result != VK_SUCCESS) {
+         free(stats);
+         goto fail;
+      }
+
+      fprintf(output, "\n%s:\n", radv_get_shader_name(&shader->info, stage));
+      fprintf(output, "*** SHADER STATS ***\n");
+
+      for (unsigned i = 0; i < stat_count; i++) {
+         fprintf(output, "%s: ", stats[i].name);
+         switch (stats[i].format) {
+         case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
+            fprintf(output, "%s", stats[i].value.b32 == VK_TRUE ? "true" : "false");
+            break;
+         case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
+            fprintf(output, "%" PRIi64, stats[i].value.i64);
+            break;
+         case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
+            fprintf(output, "%" PRIu64, stats[i].value.u64);
+            break;
+         case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
+            fprintf(output, "%f", stats[i].value.f64);
+            break;
+         default:
+            unreachable("Invalid pipeline statistic format");
+         }
+         fprintf(output, "\n");
+      }
+
+      fprintf(output, "********************\n\n\n");
+
+      free(stats);
+   }
 
 fail:
-	free(props);
-	return result;
+   free(props);
+   return result;
 }
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index d4878cc14a1..7bfcf854dbe 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -35,570 +35,522 @@
 #include "radv_constants.h"
 
 #include "nir/nir.h"
-#include "vulkan/vulkan.h"
 #include "vulkan/util/vk_object.h"
 #include "vulkan/util/vk_shader_module.h"
-
-#include "aco_interface.h"
+#include "vulkan/vulkan.h"
 
 #define RADV_VERT_ATTRIB_MAX MAX2(VERT_ATTRIB_MAX, VERT_ATTRIB_GENERIC0 + MAX_VERTEX_ATTRIBS)
 
 struct radv_device;
+struct radv_pipeline;
+struct radv_pipeline_cache;
+struct radv_pipeline_key;
 
 struct radv_vs_out_key {
-	uint32_t as_es:1;
-	uint32_t as_ls:1;
-	uint32_t as_ngg:1;
-	uint32_t as_ngg_passthrough:1;
-	uint32_t export_prim_id:1;
-	uint32_t export_layer_id:1;
-	uint32_t export_clip_dists:1;
-	uint32_t export_viewport_index:1;
+   uint32_t as_es : 1;
+   uint32_t as_ls : 1;
+   uint32_t as_ngg : 1;
+   uint32_t as_ngg_passthrough : 1;
+   uint32_t export_prim_id : 1;
+   uint32_t export_layer_id : 1;
+   uint32_t export_clip_dists : 1;
+   uint32_t export_viewport_index : 1;
 };
 
 struct radv_vs_variant_key {
-	struct radv_vs_out_key out;
+   struct radv_vs_out_key out;
 
-	uint32_t instance_rate_inputs;
-	uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
-	uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
-	uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
-	uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
-	uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
+   uint32_t instance_rate_inputs;
+   uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
+   uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
+   uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
+   uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
+   uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
 
-	/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
-	 * so we may need to fix it up. */
-	enum ac_fetch_format alpha_adjust[MAX_VERTEX_ATTRIBS];
+   /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
+    * so we may need to fix it up. */
+   enum ac_fetch_format alpha_adjust[MAX_VERTEX_ATTRIBS];
 
-	/* For some formats the channels have to be shuffled. */
-	uint32_t post_shuffle;
+   /* For some formats the channels have to be shuffled. */
+   uint32_t post_shuffle;
 
-	/* Output primitive type. */
-	uint8_t outprim;
+   /* Output primitive type. */
+   uint8_t outprim;
 };
 
 struct radv_tes_variant_key {
-	struct radv_vs_out_key out;
+   struct radv_vs_out_key out;
 };
 
 struct radv_tcs_variant_key {
-	struct radv_vs_variant_key vs_key;
-	unsigned primitive_mode;
-	unsigned input_vertices;
+   struct radv_vs_variant_key vs_key;
+   unsigned primitive_mode;
+   unsigned input_vertices;
 };
 
 struct radv_fs_variant_key {
-	uint32_t col_format;
-	uint8_t log2_ps_iter_samples;
-	uint8_t num_samples;
-	uint32_t is_int8;
-	uint32_t is_int10;
+   uint32_t col_format;
+   uint8_t log2_ps_iter_samples;
+   uint8_t num_samples;
+   uint32_t is_int8;
+   uint32_t is_int10;
 };
 
 struct radv_cs_variant_key {
-	uint8_t subgroup_size;
+   uint8_t subgroup_size;
 };
 
 struct radv_shader_variant_key {
-	union {
-		struct radv_vs_variant_key vs;
-		struct radv_fs_variant_key fs;
-		struct radv_tes_variant_key tes;
-		struct radv_tcs_variant_key tcs;
-		struct radv_cs_variant_key cs;
-
-		/* A common prefix of the vs and tes keys. */
-		struct radv_vs_out_key vs_common_out;
-	};
-	bool has_multiview_view_index;
+   union {
+      struct radv_vs_variant_key vs;
+      struct radv_fs_variant_key fs;
+      struct radv_tes_variant_key tes;
+      struct radv_tcs_variant_key tcs;
+      struct radv_cs_variant_key cs;
+
+      /* A common prefix of the vs and tes keys. */
+      struct radv_vs_out_key vs_common_out;
+   };
+   bool has_multiview_view_index;
 };
 
 enum radv_compiler_debug_level {
-	RADV_COMPILER_DEBUG_LEVEL_PERFWARN,
-	RADV_COMPILER_DEBUG_LEVEL_ERROR,
+   RADV_COMPILER_DEBUG_LEVEL_PERFWARN,
+   RADV_COMPILER_DEBUG_LEVEL_ERROR,
 };
 
 struct radv_nir_compiler_options {
-	struct radv_pipeline_layout *layout;
-	struct radv_shader_variant_key key;
-	bool explicit_scratch_args;
-	bool clamp_shadow_reference;
-	bool robust_buffer_access;
-	bool robust_buffer_access2;
-	bool adjust_frag_coord_z;
-	bool dump_shader;
-	bool dump_preoptir;
-	bool record_ir;
-	bool record_stats;
-	bool check_ir;
-	bool has_ls_vgpr_init_bug;
-	bool has_image_load_dcc_bug;
-	bool use_ngg_streamout;
-	bool enable_mrt_output_nan_fixup;
-	bool disable_optimizations; /* only used by ACO */
-	bool wgp_mode;
-	enum radeon_family family;
-	enum chip_class chip_class;
-	const struct radeon_info *info;
-	uint32_t tess_offchip_block_dw_size;
-	uint32_t address32_hi;
-	uint8_t force_vrs_rates;
-
-	struct {
-		void (*func)(void *private_data,
-			     enum radv_compiler_debug_level level,
-			     const char *message);
-		void *private_data;
-	} debug;
+   struct radv_pipeline_layout *layout;
+   struct radv_shader_variant_key key;
+   bool explicit_scratch_args;
+   bool clamp_shadow_reference;
+   bool robust_buffer_access;
+   bool robust_buffer_access2;
+   bool adjust_frag_coord_z;
+   bool dump_shader;
+   bool dump_preoptir;
+   bool record_ir;
+   bool record_stats;
+   bool check_ir;
+   bool has_ls_vgpr_init_bug;
+   bool has_image_load_dcc_bug;
+   bool use_ngg_streamout;
+   bool enable_mrt_output_nan_fixup;
+   bool disable_optimizations; /* only used by ACO */
+   bool wgp_mode;
+   enum radeon_family family;
+   enum chip_class chip_class;
+   const struct radeon_info *info;
+   uint32_t tess_offchip_block_dw_size;
+   uint32_t address32_hi;
+   uint8_t force_vrs_rates;
+
+   struct {
+      void (*func)(void *private_data, enum radv_compiler_debug_level level, const char *message);
+      void *private_data;
+   } debug;
 };
 
 enum radv_ud_index {
-	AC_UD_SCRATCH_RING_OFFSETS = 0,
-	AC_UD_PUSH_CONSTANTS = 1,
-	AC_UD_INLINE_PUSH_CONSTANTS = 2,
-	AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
-	AC_UD_VIEW_INDEX = 4,
-	AC_UD_STREAMOUT_BUFFERS = 5,
-	AC_UD_NGG_GS_STATE = 6,
-	AC_UD_SHADER_START = 7,
-	AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
-	AC_UD_VS_BASE_VERTEX_START_INSTANCE,
-	AC_UD_VS_MAX_UD,
-	AC_UD_PS_MAX_UD,
-	AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
-	AC_UD_CS_MAX_UD,
-	AC_UD_GS_MAX_UD,
-	AC_UD_TCS_MAX_UD,
-	AC_UD_TES_MAX_UD,
-	AC_UD_MAX_UD = AC_UD_TCS_MAX_UD,
+   AC_UD_SCRATCH_RING_OFFSETS = 0,
+   AC_UD_PUSH_CONSTANTS = 1,
+   AC_UD_INLINE_PUSH_CONSTANTS = 2,
+   AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
+   AC_UD_VIEW_INDEX = 4,
+   AC_UD_STREAMOUT_BUFFERS = 5,
+   AC_UD_NGG_GS_STATE = 6,
+   AC_UD_SHADER_START = 7,
+   AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
+   AC_UD_VS_BASE_VERTEX_START_INSTANCE,
+   AC_UD_VS_MAX_UD,
+   AC_UD_PS_MAX_UD,
+   AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
+   AC_UD_CS_MAX_UD,
+   AC_UD_GS_MAX_UD,
+   AC_UD_TCS_MAX_UD,
+   AC_UD_TES_MAX_UD,
+   AC_UD_MAX_UD = AC_UD_TCS_MAX_UD,
 };
 
 struct radv_stream_output {
-	uint8_t location;
-	uint8_t buffer;
-	uint16_t offset;
-	uint8_t component_mask;
-	uint8_t stream;
+   uint8_t location;
+   uint8_t buffer;
+   uint16_t offset;
+   uint8_t component_mask;
+   uint8_t stream;
 };
 
 struct radv_streamout_info {
-	uint16_t num_outputs;
-	struct radv_stream_output outputs[MAX_SO_OUTPUTS];
-	uint16_t strides[MAX_SO_BUFFERS];
-	uint32_t enabled_stream_buffers_mask;
+   uint16_t num_outputs;
+   struct radv_stream_output outputs[MAX_SO_OUTPUTS];
+   uint16_t strides[MAX_SO_BUFFERS];
+   uint32_t enabled_stream_buffers_mask;
 };
 
 struct radv_userdata_info {
-	int8_t sgpr_idx;
-	uint8_t num_sgprs;
+   int8_t sgpr_idx;
+   uint8_t num_sgprs;
 };
 
 struct radv_userdata_locations {
-	struct radv_userdata_info descriptor_sets[MAX_SETS];
-	struct radv_userdata_info shader_data[AC_UD_MAX_UD];
-	uint32_t descriptor_sets_enabled;
+   struct radv_userdata_info descriptor_sets[MAX_SETS];
+   struct radv_userdata_info shader_data[AC_UD_MAX_UD];
+   uint32_t descriptor_sets_enabled;
 };
 
 struct radv_vs_output_info {
-	uint8_t	vs_output_param_offset[VARYING_SLOT_MAX];
-	uint8_t clip_dist_mask;
-	uint8_t cull_dist_mask;
-	uint8_t param_exports;
-	bool writes_pointsize;
-	bool writes_layer;
-	bool writes_viewport_index;
-	bool writes_primitive_shading_rate;
-	bool export_prim_id;
-	unsigned pos_exports;
+   uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
+   uint8_t clip_dist_mask;
+   uint8_t cull_dist_mask;
+   uint8_t param_exports;
+   bool writes_pointsize;
+   bool writes_layer;
+   bool writes_viewport_index;
+   bool writes_primitive_shading_rate;
+   bool export_prim_id;
+   unsigned pos_exports;
 };
 
 struct radv_es_output_info {
-       uint32_t esgs_itemsize;
+   uint32_t esgs_itemsize;
 };
 
 struct gfx9_gs_info {
-	uint32_t vgt_gs_onchip_cntl;
-	uint32_t vgt_gs_max_prims_per_subgroup;
-	uint32_t vgt_esgs_ring_itemsize;
-	uint32_t lds_size;
+   uint32_t vgt_gs_onchip_cntl;
+   uint32_t vgt_gs_max_prims_per_subgroup;
+   uint32_t vgt_esgs_ring_itemsize;
+   uint32_t lds_size;
 };
 
 struct gfx10_ngg_info {
-	uint16_t ngg_emit_size; /* in dwords */
-	uint32_t hw_max_esverts;
-	uint32_t max_gsprims;
-	uint32_t max_out_verts;
-	uint32_t prim_amp_factor;
-	uint32_t vgt_esgs_ring_itemsize;
-	uint32_t esgs_ring_size;
-	bool max_vert_out_per_gs_instance;
+   uint16_t ngg_emit_size; /* in dwords */
+   uint32_t hw_max_esverts;
+   uint32_t max_gsprims;
+   uint32_t max_out_verts;
+   uint32_t prim_amp_factor;
+   uint32_t vgt_esgs_ring_itemsize;
+   uint32_t esgs_ring_size;
+   bool max_vert_out_per_gs_instance;
 };
 
 struct radv_shader_info {
-	bool loads_push_constants;
-	bool loads_dynamic_offsets;
-	uint8_t min_push_constant_used;
-	uint8_t max_push_constant_used;
-	bool has_only_32bit_push_constants;
-	bool has_indirect_push_constants;
-	uint8_t num_inline_push_consts;
-	uint8_t base_inline_push_consts;
-	uint32_t desc_set_used_mask;
-	bool needs_multiview_view_index;
-	bool uses_invocation_id;
-	bool uses_prim_id;
-	uint8_t wave_size;
-	uint8_t ballot_bit_size;
-	struct radv_userdata_locations user_sgprs_locs;
-	unsigned num_user_sgprs;
-	unsigned num_input_sgprs;
-	unsigned num_input_vgprs;
-	unsigned private_mem_vgprs;
-	bool need_indirect_descriptor_sets;
-	bool is_ngg;
-	bool is_ngg_passthrough;
-	uint32_t num_tess_patches;
-	struct {
-		uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
-		uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
-		bool has_vertex_buffers; /* needs vertex buffers and base/start */
-		bool needs_draw_id;
-		bool needs_instance_id;
-		struct radv_vs_output_info outinfo;
-		struct radv_es_output_info es_info;
-		bool as_es;
-		bool as_ls;
-		bool export_prim_id;
-		bool tcs_in_out_eq;
-		uint64_t tcs_temp_only_input_mask;
-		uint8_t num_linked_outputs;
-		bool needs_base_instance;
-	} vs;
-	struct {
-		uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
-		uint8_t num_stream_output_components[4];
-		uint8_t output_streams[VARYING_SLOT_VAR31 + 1];
-		uint8_t max_stream;
-		unsigned gsvs_vertex_size;
-		unsigned max_gsvs_emit_size;
-		unsigned vertices_in;
-		unsigned vertices_out;
-		unsigned output_prim;
-		unsigned invocations;
-		unsigned es_type; /* GFX9: VS or TES */
-		uint8_t num_linked_inputs;
-	} gs;
-	struct {
-		uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
-		struct radv_vs_output_info outinfo;
-		struct radv_es_output_info es_info;
-		bool as_es;
-		unsigned primitive_mode;
-		enum gl_tess_spacing spacing;
-		bool ccw;
-		bool point_mode;
-		bool export_prim_id;
-		uint8_t num_linked_inputs;
-		uint8_t num_linked_patch_inputs;
-		uint8_t num_linked_outputs;
-	} tes;
-	struct {
-		bool uses_sample_shading;
-		bool needs_sample_positions;
-		bool writes_memory;
-		bool writes_z;
-		bool writes_stencil;
-		bool writes_sample_mask;
-		bool has_pcoord;
-		bool prim_id_input;
-		bool layer_input;
-		bool viewport_index_input;
-		uint8_t num_input_clips_culls;
-		uint32_t input_mask;
-		uint32_t flat_shaded_mask;
-		uint32_t explicit_shaded_mask;
-		uint32_t float16_shaded_mask;
-		uint32_t num_interp;
-		bool can_discard;
-		bool early_fragment_test;
-		bool post_depth_coverage;
-		bool reads_sample_mask_in;
-		uint8_t depth_layout;
-		bool uses_persp_or_linear_interp;
-		bool allow_flat_shading;
-	} ps;
-	struct {
-		bool uses_grid_size;
-		bool uses_block_id[3];
-		bool uses_thread_id[3];
-		bool uses_local_invocation_idx;
-		unsigned block_size[3];
-	} cs;
-	struct {
-		uint64_t tes_inputs_read;
-		uint64_t tes_patch_inputs_read;
-		unsigned tcs_vertices_out;
-		uint32_t num_lds_blocks;
-		uint8_t num_linked_inputs;
-		uint8_t num_linked_outputs;
-		uint8_t num_linked_patch_outputs;
-		bool tes_reads_tess_factors:1;
-	} tcs;
-
-	struct radv_streamout_info so;
-
-	struct gfx9_gs_info gs_ring_info;
-	struct gfx10_ngg_info ngg_info;
-
-	unsigned float_controls_mode;
+   bool loads_push_constants;
+   bool loads_dynamic_offsets;
+   uint8_t min_push_constant_used;
+   uint8_t max_push_constant_used;
+   bool has_only_32bit_push_constants;
+   bool has_indirect_push_constants;
+   uint8_t num_inline_push_consts;
+   uint8_t base_inline_push_consts;
+   uint32_t desc_set_used_mask;
+   bool needs_multiview_view_index;
+   bool uses_invocation_id;
+   bool uses_prim_id;
+   uint8_t wave_size;
+   uint8_t ballot_bit_size;
+   struct radv_userdata_locations user_sgprs_locs;
+   unsigned num_user_sgprs;
+   unsigned num_input_sgprs;
+   unsigned num_input_vgprs;
+   unsigned private_mem_vgprs;
+   bool need_indirect_descriptor_sets;
+   bool is_ngg;
+   bool is_ngg_passthrough;
+   uint32_t num_tess_patches;
+   struct {
+      uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
+      uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
+      bool has_vertex_buffers; /* needs vertex buffers and base/start */
+      bool needs_draw_id;
+      bool needs_instance_id;
+      struct radv_vs_output_info outinfo;
+      struct radv_es_output_info es_info;
+      bool as_es;
+      bool as_ls;
+      bool export_prim_id;
+      bool tcs_in_out_eq;
+      uint64_t tcs_temp_only_input_mask;
+      uint8_t num_linked_outputs;
+      bool needs_base_instance;
+   } vs;
+   struct {
+      uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
+      uint8_t num_stream_output_components[4];
+      uint8_t output_streams[VARYING_SLOT_VAR31 + 1];
+      uint8_t max_stream;
+      unsigned gsvs_vertex_size;
+      unsigned max_gsvs_emit_size;
+      unsigned vertices_in;
+      unsigned vertices_out;
+      unsigned output_prim;
+      unsigned invocations;
+      unsigned es_type; /* GFX9: VS or TES */
+      uint8_t num_linked_inputs;
+   } gs;
+   struct {
+      uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
+      struct radv_vs_output_info outinfo;
+      struct radv_es_output_info es_info;
+      bool as_es;
+      unsigned primitive_mode;
+      enum gl_tess_spacing spacing;
+      bool ccw;
+      bool point_mode;
+      bool export_prim_id;
+      uint8_t num_linked_inputs;
+      uint8_t num_linked_patch_inputs;
+      uint8_t num_linked_outputs;
+   } tes;
+   struct {
+      bool uses_sample_shading;
+      bool needs_sample_positions;
+      bool writes_memory;
+      bool writes_z;
+      bool writes_stencil;
+      bool writes_sample_mask;
+      bool has_pcoord;
+      bool prim_id_input;
+      bool layer_input;
+      bool viewport_index_input;
+      uint8_t num_input_clips_culls;
+      uint32_t input_mask;
+      uint32_t flat_shaded_mask;
+      uint32_t explicit_shaded_mask;
+      uint32_t float16_shaded_mask;
+      uint32_t num_interp;
+      bool can_discard;
+      bool early_fragment_test;
+      bool post_depth_coverage;
+      bool reads_sample_mask_in;
+      uint8_t depth_layout;
+      bool uses_persp_or_linear_interp;
+      bool allow_flat_shading;
+   } ps;
+   struct {
+      bool uses_grid_size;
+      bool uses_block_id[3];
+      bool uses_thread_id[3];
+      bool uses_local_invocation_idx;
+      unsigned block_size[3];
+   } cs;
+   struct {
+      uint64_t tes_inputs_read;
+      uint64_t tes_patch_inputs_read;
+      unsigned tcs_vertices_out;
+      uint32_t num_lds_blocks;
+      uint8_t num_linked_inputs;
+      uint8_t num_linked_outputs;
+      uint8_t num_linked_patch_outputs;
+      bool tes_reads_tess_factors : 1;
+   } tcs;
+
+   struct radv_streamout_info so;
+
+   struct gfx9_gs_info gs_ring_info;
+   struct gfx10_ngg_info ngg_info;
+
+   unsigned float_controls_mode;
 };
 
-enum radv_shader_binary_type {
-	RADV_BINARY_TYPE_LEGACY,
-	RADV_BINARY_TYPE_RTLD
-};
+enum radv_shader_binary_type { RADV_BINARY_TYPE_LEGACY, RADV_BINARY_TYPE_RTLD };
 
 struct radv_shader_binary {
-	enum radv_shader_binary_type type;
-	gl_shader_stage stage;
-	bool is_gs_copy_shader;
+   enum radv_shader_binary_type type;
+   gl_shader_stage stage;
+   bool is_gs_copy_shader;
 
-	struct radv_shader_info info;
+   struct radv_shader_info info;
 
-	/* Self-referential size so we avoid consistency issues. */
-	uint32_t total_size;
+   /* Self-referential size so we avoid consistency issues. */
+   uint32_t total_size;
 };
 
 struct radv_shader_binary_legacy {
-	struct radv_shader_binary base;
-	struct ac_shader_config config;
-	unsigned code_size;
-	unsigned exec_size;
-	unsigned ir_size;
-	unsigned disasm_size;
-	unsigned stats_size;
-
-	/* data has size of stats_size + code_size + ir_size + disasm_size + 2,
-	 * where the +2 is for 0 of the ir strings. */
-	uint8_t data[0];
+   struct radv_shader_binary base;
+   struct ac_shader_config config;
+   unsigned code_size;
+   unsigned exec_size;
+   unsigned ir_size;
+   unsigned disasm_size;
+   unsigned stats_size;
+
+   /* data has size of stats_size + code_size + ir_size + disasm_size + 2,
+    * where the +2 is for 0 of the ir strings. */
+   uint8_t data[0];
 };
 
 struct radv_shader_binary_rtld {
-	struct radv_shader_binary base;
-	unsigned elf_size;
-	unsigned llvm_ir_size;
-	uint8_t data[0];
+   struct radv_shader_binary base;
+   unsigned elf_size;
+   unsigned llvm_ir_size;
+   uint8_t data[0];
 };
 
 struct radv_shader_variant {
-	uint32_t ref_count;
-
-	struct radeon_winsys_bo *bo;
-	uint64_t bo_offset;
-	struct ac_shader_config config;
-	uint8_t *code_ptr;
-	uint32_t code_size;
-	uint32_t exec_size;
-	struct radv_shader_info info;
-
-	/* debug only */
-	char *spirv;
-	uint32_t spirv_size;
-	char *nir_string;
-	char *disasm_string;
-	char *ir_string;
-	uint32_t *statistics;
-
-	struct list_head slab_list;
+   uint32_t ref_count;
+
+   struct radeon_winsys_bo *bo;
+   uint64_t bo_offset;
+   struct ac_shader_config config;
+   uint8_t *code_ptr;
+   uint32_t code_size;
+   uint32_t exec_size;
+   struct radv_shader_info info;
+
+   /* debug only */
+   char *spirv;
+   uint32_t spirv_size;
+   char *nir_string;
+   char *disasm_string;
+   char *ir_string;
+   uint32_t *statistics;
+
+   struct list_head slab_list;
 };
 
 struct radv_shader_slab {
-	struct list_head slabs;
-	struct list_head shaders;
-	struct radeon_winsys_bo *bo;
-	uint64_t size;
-	char *ptr;
+   struct list_head slabs;
+   struct list_head shaders;
+   struct radeon_winsys_bo *bo;
+   uint64_t size;
+   char *ptr;
 };
 
-void
-radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
-		  bool optimize_conservatively, bool allow_copies);
-bool
-radv_nir_lower_ycbcr_textures(nir_shader *shader,
-                             const struct radv_pipeline_layout *layout);
-
-nir_shader *
-radv_shader_compile_to_nir(struct radv_device *device,
-			   struct vk_shader_module *module,
-			   const char *entrypoint_name,
-			   gl_shader_stage stage,
-			   const VkSpecializationInfo *spec_info,
-			   const VkPipelineCreateFlags flags,
-			   const struct radv_pipeline_layout *layout,
-			   const struct radv_pipeline_key *key);
-
-void
-radv_destroy_shader_slabs(struct radv_device *device);
-
-VkResult
-radv_create_shaders(struct radv_pipeline *pipeline,
-		    struct radv_device *device,
-		    struct radv_pipeline_cache *cache,
-		    const struct radv_pipeline_key *key,
-		    const VkPipelineShaderStageCreateInfo **pStages,
-		    const VkPipelineCreateFlags flags,
-		    VkPipelineCreationFeedbackEXT *pipeline_feedback,
-		    VkPipelineCreationFeedbackEXT **stage_feedbacks);
-
-struct radv_shader_variant *
-radv_shader_variant_create(struct radv_device *device,
-			   const struct radv_shader_binary *binary,
-			   bool keep_shader_info);
-struct radv_shader_variant *
-radv_shader_variant_compile(struct radv_device *device,
-			    struct vk_shader_module *module,
-			    struct nir_shader *const *shaders,
-			    int shader_count,
-			    struct radv_pipeline_layout *layout,
-			    const struct radv_shader_variant_key *key,
-			    struct radv_shader_info *info,
-			    bool keep_shader_info, bool keep_statistic_info,
-			    bool disable_optimizations,
-			    struct radv_shader_binary **binary_out);
+void radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
+                       bool optimize_conservatively, bool allow_copies);
+bool radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipeline_layout *layout);
+
+nir_shader *radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
+                                       const char *entrypoint_name, gl_shader_stage stage,
+                                       const VkSpecializationInfo *spec_info,
+                                       const VkPipelineCreateFlags flags,
+                                       const struct radv_pipeline_layout *layout,
+                                       const struct radv_pipeline_key *key);
+
+void radv_destroy_shader_slabs(struct radv_device *device);
+
+VkResult radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
+                             struct radv_pipeline_cache *cache, const struct radv_pipeline_key *key,
+                             const VkPipelineShaderStageCreateInfo **pStages,
+                             const VkPipelineCreateFlags flags,
+                             VkPipelineCreationFeedbackEXT *pipeline_feedback,
+                             VkPipelineCreationFeedbackEXT **stage_feedbacks);
+
+struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
+                                                       const struct radv_shader_binary *binary,
+                                                       bool keep_shader_info);
+struct radv_shader_variant *radv_shader_variant_compile(
+   struct radv_device *device, struct vk_shader_module *module, struct nir_shader *const *shaders,
+   int shader_count, struct radv_pipeline_layout *layout, const struct radv_shader_variant_key *key,
+   struct radv_shader_info *info, bool keep_shader_info, bool keep_statistic_info,
+   bool disable_optimizations, struct radv_shader_binary **binary_out);
 
 struct radv_shader_variant *
 radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir,
-			   struct radv_shader_info *info,
-			   struct radv_shader_binary **binary_out,
-			   bool multiview,  bool keep_shader_info,
-			   bool keep_statistic_info,
-			   bool disable_optimizations);
-
-struct radv_shader_variant *
-radv_create_trap_handler_shader(struct radv_device *device);
+                           struct radv_shader_info *info, struct radv_shader_binary **binary_out,
+                           bool multiview, bool keep_shader_info, bool keep_statistic_info,
+                           bool disable_optimizations);
 
-void
-radv_shader_variant_destroy(struct radv_device *device,
-			    struct radv_shader_variant *variant);
+struct radv_shader_variant *radv_create_trap_handler_shader(struct radv_device *device);
 
+void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant);
 
-unsigned
-radv_get_max_waves(struct radv_device *device,
-                   struct radv_shader_variant *variant,
-                   gl_shader_stage stage);
+unsigned radv_get_max_waves(struct radv_device *device, struct radv_shader_variant *variant,
+                            gl_shader_stage stage);
 
-unsigned
-radv_get_max_workgroup_size(enum chip_class chip_class,
-                            gl_shader_stage stage,
-                            const unsigned *sizes);
+unsigned radv_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
+                                     const unsigned *sizes);
 
-const char *
-radv_get_shader_name(struct radv_shader_info *info,
-		     gl_shader_stage stage);
+const char *radv_get_shader_name(struct radv_shader_info *info, gl_shader_stage stage);
 
-bool
-radv_can_dump_shader(struct radv_device *device,
-		     struct vk_shader_module *module,
-		     bool is_gs_copy_shader);
+bool radv_can_dump_shader(struct radv_device *device, struct vk_shader_module *module,
+                          bool is_gs_copy_shader);
 
-bool
-radv_can_dump_shader_stats(struct radv_device *device,
-			   struct vk_shader_module *module);
+bool radv_can_dump_shader_stats(struct radv_device *device, struct vk_shader_module *module);
 
-VkResult
-radv_dump_shader_stats(struct radv_device *device,
-		       struct radv_pipeline *pipeline,
-		       gl_shader_stage stage, FILE *output);
+VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline,
+                                gl_shader_stage stage, FILE *output);
 
 static inline unsigned
-calculate_tess_lds_size(enum chip_class chip_class,
-			unsigned tcs_num_input_vertices,
-			unsigned tcs_num_output_vertices,
-			unsigned tcs_num_inputs,
-			unsigned tcs_num_patches,
-			unsigned tcs_num_outputs,
-			unsigned tcs_num_patch_outputs)
+calculate_tess_lds_size(enum chip_class chip_class, unsigned tcs_num_input_vertices,
+                        unsigned tcs_num_output_vertices, unsigned tcs_num_inputs,
+                        unsigned tcs_num_patches, unsigned tcs_num_outputs,
+                        unsigned tcs_num_patch_outputs)
 {
-	unsigned input_vertex_size = tcs_num_inputs * 16;
-	unsigned output_vertex_size = tcs_num_outputs * 16;
+   unsigned input_vertex_size = tcs_num_inputs * 16;
+   unsigned output_vertex_size = tcs_num_outputs * 16;
 
-	unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size;
+   unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size;
 
-	unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
-	unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
+   unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
+   unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
 
-	unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
+   unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
 
-	unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches;
+   unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches;
 
-	if (chip_class >= GFX7) {
-		assert(lds_size <= 65536);
-		lds_size = align(lds_size, 512) / 512;
-	} else {
-		assert(lds_size <= 32768);
-		lds_size = align(lds_size, 256) / 256;
-	}
+   if (chip_class >= GFX7) {
+      assert(lds_size <= 65536);
+      lds_size = align(lds_size, 512) / 512;
+   } else {
+      assert(lds_size <= 32768);
+      lds_size = align(lds_size, 256) / 256;
+   }
 
-	return lds_size;
+   return lds_size;
 }
 
 static inline unsigned
-get_tcs_num_patches(unsigned tcs_num_input_vertices,
-			unsigned tcs_num_output_vertices,
-			unsigned tcs_num_inputs,
-			unsigned tcs_num_outputs,
-			unsigned tcs_num_patch_outputs,
-			unsigned tess_offchip_block_dw_size,
-			enum chip_class chip_class,
-			enum radeon_family family)
+get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices,
+                    unsigned tcs_num_inputs, unsigned tcs_num_outputs,
+                    unsigned tcs_num_patch_outputs, unsigned tess_offchip_block_dw_size,
+                    enum chip_class chip_class, enum radeon_family family)
 {
-	uint32_t input_vertex_size = tcs_num_inputs * 16;
-	uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
-	uint32_t output_vertex_size = tcs_num_outputs * 16;
-	uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
-	uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
-
-	/* Ensure that we only need one wave per SIMD so we don't need to check
-	 * resource usage. Also ensures that the number of tcs in and out
-	 * vertices per threadgroup are at most 256.
-	 */
-	unsigned num_patches = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices) * 4;
-	/* Make sure that the data fits in LDS. This assumes the shaders only
-	 * use LDS for the inputs and outputs.
-	 */
-	unsigned hardware_lds_size = 32768;
-
-	/* Looks like STONEY hangs if we use more than 32 KiB LDS in a single
-	 * threadgroup, even though there is more than 32 KiB LDS.
-	 *
-	 * Test: dEQP-VK.tessellation.shader_input_output.barrier
-	 */
-	if (chip_class >= GFX7 && family != CHIP_STONEY)
-		hardware_lds_size = 65536;
-
-	if (input_patch_size + output_patch_size)
-		num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
-	/* Make sure the output data fits in the offchip buffer */
-	if (output_patch_size)
-		num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size);
-	/* Not necessary for correctness, but improves performance. The
-	 * specific value is taken from the proprietary driver.
-	 */
-	num_patches = MIN2(num_patches, 40);
-
-	/* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
-	if (chip_class == GFX6) {
-		unsigned one_wave = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices);
-		num_patches = MIN2(num_patches, one_wave);
-	}
-	return num_patches;
+   uint32_t input_vertex_size = tcs_num_inputs * 16;
+   uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
+   uint32_t output_vertex_size = tcs_num_outputs * 16;
+   uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
+   uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
+
+   /* Ensure that we only need one wave per SIMD so we don't need to check
+    * resource usage. Also ensures that the number of tcs in and out
+    * vertices per threadgroup are at most 256.
+    */
+   unsigned num_patches = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices) * 4;
+   /* Make sure that the data fits in LDS. This assumes the shaders only
+    * use LDS for the inputs and outputs.
+    */
+   unsigned hardware_lds_size = 32768;
+
+   /* Looks like STONEY hangs if we use more than 32 KiB LDS in a single
+    * threadgroup, even though there is more than 32 KiB LDS.
+    *
+    * Test: dEQP-VK.tessellation.shader_input_output.barrier
+    */
+   if (chip_class >= GFX7 && family != CHIP_STONEY)
+      hardware_lds_size = 65536;
+
+   if (input_patch_size + output_patch_size)
+      num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
+   /* Make sure the output data fits in the offchip buffer */
+   if (output_patch_size)
+      num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size);
+   /* Not necessary for correctness, but improves performance. The
+    * specific value is taken from the proprietary driver.
+    */
+   num_patches = MIN2(num_patches, 40);
+
+   /* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
+   if (chip_class == GFX6) {
+      unsigned one_wave = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices);
+      num_patches = MIN2(num_patches, one_wave);
+   }
+   return num_patches;
 }
 
-void
-radv_lower_io(struct radv_device *device, nir_shader *nir);
+void radv_lower_io(struct radv_device *device, nir_shader *nir);
 
-bool
-radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir,
-                     struct radv_shader_info *info, const struct radv_pipeline_key *pl_key);
+bool radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir,
+                          struct radv_shader_info *info, const struct radv_pipeline_key *pl_key);
 
 #endif
diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c
index c3a8167571b..75f70d6c475 100644
--- a/src/amd/vulkan/radv_shader_args.c
+++ b/src/amd/vulkan/radv_shader_args.c
@@ -26,744 +26,663 @@
  * IN THE SOFTWARE.
  */
 
+#include "radv_shader_args.h"
 #include "radv_private.h"
 #include "radv_shader.h"
-#include "radv_shader_args.h"
 
 static void
-set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx,
-	uint8_t num_sgprs)
+set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs)
 {
-	ud_info->sgpr_idx = *sgpr_idx;
-	ud_info->num_sgprs = num_sgprs;
-	*sgpr_idx += num_sgprs;
+   ud_info->sgpr_idx = *sgpr_idx;
+   ud_info->num_sgprs = num_sgprs;
+   *sgpr_idx += num_sgprs;
 }
 
 static void
-set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx,
-	       uint8_t num_sgprs)
+set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
 {
-	struct radv_userdata_info *ud_info =
-		&args->shader_info->user_sgprs_locs.shader_data[idx];
-	assert(ud_info);
+   struct radv_userdata_info *ud_info = &args->shader_info->user_sgprs_locs.shader_data[idx];
+   assert(ud_info);
 
-	set_loc(ud_info, sgpr_idx, num_sgprs);
+   set_loc(ud_info, sgpr_idx, num_sgprs);
 }
 
 static void
 set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
 {
-	bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
+   bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
 
-	set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
+   set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
 }
 
 static void
 set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
 {
-	struct radv_userdata_locations *locs =
-		&args->shader_info->user_sgprs_locs;
-	struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
-	assert(ud_info);
+   struct radv_userdata_locations *locs = &args->shader_info->user_sgprs_locs;
+   struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
+   assert(ud_info);
 
-	set_loc(ud_info, sgpr_idx, 1);
+   set_loc(ud_info, sgpr_idx, 1);
 
-	locs->descriptor_sets_enabled |= 1u << idx;
+   locs->descriptor_sets_enabled |= 1u << idx;
 }
 
 struct user_sgpr_info {
-	bool indirect_all_descriptor_sets;
-	uint8_t remaining_sgprs;
+   bool indirect_all_descriptor_sets;
+   uint8_t remaining_sgprs;
 };
 
-static bool needs_view_index_sgpr(struct radv_shader_args *args,
-				  gl_shader_stage stage)
+static bool
+needs_view_index_sgpr(struct radv_shader_args *args, gl_shader_stage stage)
 {
-	switch (stage) {
-	case MESA_SHADER_VERTEX:
-		if (args->shader_info->needs_multiview_view_index ||
-		    (!args->options->key.vs_common_out.as_es && !args->options->key.vs_common_out.as_ls && args->options->key.has_multiview_view_index))
-			return true;
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		if (args->shader_info->needs_multiview_view_index || (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index))
-			return true;
-		break;
-	case MESA_SHADER_TESS_CTRL:
-		if (args->shader_info->needs_multiview_view_index)
-			return true;
-		break;
-	case MESA_SHADER_GEOMETRY:
-		if (args->shader_info->needs_multiview_view_index ||
-		    (args->options->key.vs_common_out.as_ngg &&
-		     args->options->key.has_multiview_view_index))
-			return true;
-		break;
-	default:
-		break;
-	}
-	return false;
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      if (args->shader_info->needs_multiview_view_index ||
+          (!args->options->key.vs_common_out.as_es && !args->options->key.vs_common_out.as_ls &&
+           args->options->key.has_multiview_view_index))
+         return true;
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      if (args->shader_info->needs_multiview_view_index ||
+          (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index))
+         return true;
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      if (args->shader_info->needs_multiview_view_index)
+         return true;
+      break;
+   case MESA_SHADER_GEOMETRY:
+      if (args->shader_info->needs_multiview_view_index ||
+          (args->options->key.vs_common_out.as_ngg && args->options->key.has_multiview_view_index))
+         return true;
+      break;
+   default:
+      break;
+   }
+   return false;
 }
 
 static uint8_t
 count_vs_user_sgprs(struct radv_shader_args *args)
 {
-	uint8_t count = 1; /* vertex offset */
+   uint8_t count = 1; /* vertex offset */
 
-	if (args->shader_info->vs.has_vertex_buffers)
-		count++;
-	if (args->shader_info->vs.needs_draw_id)
-		count++;
-	if (args->shader_info->vs.needs_base_instance)
-		count++;
+   if (args->shader_info->vs.has_vertex_buffers)
+      count++;
+   if (args->shader_info->vs.needs_draw_id)
+      count++;
+   if (args->shader_info->vs.needs_base_instance)
+      count++;
 
-	return count;
+   return count;
 }
 
-static void allocate_inline_push_consts(struct radv_shader_args *args,
-					struct user_sgpr_info *user_sgpr_info)
+static void
+allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
 {
-	uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
-
-	/* Only supported if shaders use push constants. */
-	if (args->shader_info->min_push_constant_used == UINT8_MAX)
-		return;
-
-	/* Only supported if shaders don't have indirect push constants. */
-	if (args->shader_info->has_indirect_push_constants)
-		return;
-
-	/* Only supported for 32-bit push constants. */
-	if (!args->shader_info->has_only_32bit_push_constants)
-		return;
-
-	uint8_t num_push_consts =
-		(args->shader_info->max_push_constant_used -
-		 args->shader_info->min_push_constant_used) / 4;
-
-	/* Check if the number of user SGPRs is large enough. */
-	if (num_push_consts < remaining_sgprs) {
-		args->shader_info->num_inline_push_consts = num_push_consts;
-	} else {
-		args->shader_info->num_inline_push_consts = remaining_sgprs;
-	}
-
-	/* Clamp to the maximum number of allowed inlined push constants. */
-	if (args->shader_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
-		args->shader_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
-
-	if (args->shader_info->num_inline_push_consts == num_push_consts &&
-	    !args->shader_info->loads_dynamic_offsets) {
-		/* Disable the default push constants path if all constants are
-		 * inlined and if shaders don't use dynamic descriptors.
-		 */
-		args->shader_info->loads_push_constants = false;
-	}
-
-	args->shader_info->base_inline_push_consts =
-		args->shader_info->min_push_constant_used / 4;
+   uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
+
+   /* Only supported if shaders use push constants. */
+   if (args->shader_info->min_push_constant_used == UINT8_MAX)
+      return;
+
+   /* Only supported if shaders don't have indirect push constants. */
+   if (args->shader_info->has_indirect_push_constants)
+      return;
+
+   /* Only supported for 32-bit push constants. */
+   if (!args->shader_info->has_only_32bit_push_constants)
+      return;
+
+   uint8_t num_push_consts =
+      (args->shader_info->max_push_constant_used - args->shader_info->min_push_constant_used) / 4;
+
+   /* Check if the number of user SGPRs is large enough. */
+   if (num_push_consts < remaining_sgprs) {
+      args->shader_info->num_inline_push_consts = num_push_consts;
+   } else {
+      args->shader_info->num_inline_push_consts = remaining_sgprs;
+   }
+
+   /* Clamp to the maximum number of allowed inlined push constants. */
+   if (args->shader_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
+      args->shader_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
+
+   if (args->shader_info->num_inline_push_consts == num_push_consts &&
+       !args->shader_info->loads_dynamic_offsets) {
+      /* Disable the default push constants path if all constants are
+       * inlined and if shaders don't use dynamic descriptors.
+       */
+      args->shader_info->loads_push_constants = false;
+   }
+
+   args->shader_info->base_inline_push_consts = args->shader_info->min_push_constant_used / 4;
 }
 
-static void allocate_user_sgprs(struct radv_shader_args *args,
-				gl_shader_stage stage,
-				bool has_previous_stage,
-				gl_shader_stage previous_stage,
-				bool needs_view_index,
-				struct user_sgpr_info *user_sgpr_info)
+static void
+allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage,
+                    gl_shader_stage previous_stage, bool needs_view_index,
+                    struct user_sgpr_info *user_sgpr_info)
 {
-	uint8_t user_sgpr_count = 0;
-
-	memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
-
-	/* 2 user sgprs will always be allocated for scratch/rings */
-	user_sgpr_count += 2;
-
-	switch (stage) {
-	case MESA_SHADER_COMPUTE:
-		if (args->shader_info->cs.uses_grid_size)
-			user_sgpr_count += 3;
-		break;
-	case MESA_SHADER_FRAGMENT:
-		user_sgpr_count += args->shader_info->ps.needs_sample_positions;
-		break;
-	case MESA_SHADER_VERTEX:
-		if (!args->is_gs_copy_shader)
-			user_sgpr_count += count_vs_user_sgprs(args);
-		break;
-	case MESA_SHADER_TESS_CTRL:
-		if (has_previous_stage) {
-			if (previous_stage == MESA_SHADER_VERTEX)
-				user_sgpr_count += count_vs_user_sgprs(args);
-		}
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		break;
-	case MESA_SHADER_GEOMETRY:
-		if (has_previous_stage) {
-			if (previous_stage == MESA_SHADER_VERTEX) {
-				user_sgpr_count += count_vs_user_sgprs(args);
-			}
-		}
-		break;
-	default:
-		break;
-	}
-
-	if (needs_view_index)
-		user_sgpr_count++;
-
-	if (args->shader_info->loads_push_constants)
-		user_sgpr_count++;
-
-	if (args->shader_info->so.num_outputs)
-		user_sgpr_count++;
-
-	uint32_t available_sgprs = args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
-	uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
-	uint32_t num_desc_set =
-		util_bitcount(args->shader_info->desc_set_used_mask);
-
-	if (remaining_sgprs < num_desc_set) {
-		user_sgpr_info->indirect_all_descriptor_sets = true;
-		user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
-	} else {
-		user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
-	}
-
-	allocate_inline_push_consts(args, user_sgpr_info);
+   uint8_t user_sgpr_count = 0;
+
+   memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
+
+   /* 2 user sgprs will always be allocated for scratch/rings */
+   user_sgpr_count += 2;
+
+   switch (stage) {
+   case MESA_SHADER_COMPUTE:
+      if (args->shader_info->cs.uses_grid_size)
+         user_sgpr_count += 3;
+      break;
+   case MESA_SHADER_FRAGMENT:
+      user_sgpr_count += args->shader_info->ps.needs_sample_positions;
+      break;
+   case MESA_SHADER_VERTEX:
+      if (!args->is_gs_copy_shader)
+         user_sgpr_count += count_vs_user_sgprs(args);
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      if (has_previous_stage) {
+         if (previous_stage == MESA_SHADER_VERTEX)
+            user_sgpr_count += count_vs_user_sgprs(args);
+      }
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      break;
+   case MESA_SHADER_GEOMETRY:
+      if (has_previous_stage) {
+         if (previous_stage == MESA_SHADER_VERTEX) {
+            user_sgpr_count += count_vs_user_sgprs(args);
+         }
+      }
+      break;
+   default:
+      break;
+   }
+
+   if (needs_view_index)
+      user_sgpr_count++;
+
+   if (args->shader_info->loads_push_constants)
+      user_sgpr_count++;
+
+   if (args->shader_info->so.num_outputs)
+      user_sgpr_count++;
+
+   uint32_t available_sgprs =
+      args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
+   uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
+   uint32_t num_desc_set = util_bitcount(args->shader_info->desc_set_used_mask);
+
+   if (remaining_sgprs < num_desc_set) {
+      user_sgpr_info->indirect_all_descriptor_sets = true;
+      user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
+   } else {
+      user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
+   }
+
+   allocate_inline_push_consts(args, user_sgpr_info);
 }
 
 static void
 declare_global_input_sgprs(struct radv_shader_args *args,
-			   const struct user_sgpr_info *user_sgpr_info)
+                           const struct user_sgpr_info *user_sgpr_info)
 {
-	/* 1 for each descriptor set */
-	if (!user_sgpr_info->indirect_all_descriptor_sets) {
-		uint32_t mask = args->shader_info->desc_set_used_mask;
-
-		while (mask) {
-			int i = u_bit_scan(&mask);
-
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR,
-				   &args->descriptor_sets[i]);
-		}
-	} else {
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR,
-			   &args->descriptor_sets[0]);
-	}
-
-	if (args->shader_info->loads_push_constants) {
-		/* 1 for push constants and dynamic descriptors */
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR,
-			   &args->ac.push_constants);
-	}
-
-	for (unsigned i = 0; i < args->shader_info->num_inline_push_consts; i++) {
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-			   &args->ac.inline_push_consts[i]);
-	}
-	args->ac.num_inline_push_consts = args->shader_info->num_inline_push_consts;
-	args->ac.base_inline_push_consts = args->shader_info->base_inline_push_consts;
-
-	if (args->shader_info->so.num_outputs) {
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
-			   &args->streamout_buffers);
-	}
+   /* 1 for each descriptor set */
+   if (!user_sgpr_info->indirect_all_descriptor_sets) {
+      uint32_t mask = args->shader_info->desc_set_used_mask;
+
+      while (mask) {
+         int i = u_bit_scan(&mask);
+
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->descriptor_sets[i]);
+      }
+   } else {
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]);
+   }
+
+   if (args->shader_info->loads_push_constants) {
+      /* 1 for push constants and dynamic descriptors */
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
+   }
+
+   for (unsigned i = 0; i < args->shader_info->num_inline_push_consts; i++) {
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]);
+   }
+   args->ac.num_inline_push_consts = args->shader_info->num_inline_push_consts;
+   args->ac.base_inline_push_consts = args->shader_info->base_inline_push_consts;
+
+   if (args->shader_info->so.num_outputs) {
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers);
+   }
 }
 
 static void
-declare_vs_specific_input_sgprs(struct radv_shader_args *args,
-				gl_shader_stage stage,
-				bool has_previous_stage,
-				gl_shader_stage previous_stage)
+declare_vs_specific_input_sgprs(struct radv_shader_args *args, gl_shader_stage stage,
+                                bool has_previous_stage, gl_shader_stage previous_stage)
 {
-	if (!args->is_gs_copy_shader &&
-	    (stage == MESA_SHADER_VERTEX ||
-	     (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
-		if (args->shader_info->vs.has_vertex_buffers) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
-				   &args->ac.vertex_buffers);
-		}
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
-		if (args->shader_info->vs.needs_draw_id) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
-		}
-		if (args->shader_info->vs.needs_base_instance) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
-		}
-	}
+   if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
+                                    (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
+      if (args->shader_info->vs.has_vertex_buffers) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers);
+      }
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
+      if (args->shader_info->vs.needs_draw_id) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
+      }
+      if (args->shader_info->vs.needs_base_instance) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
+      }
+   }
 }
 
 static void
 declare_vs_input_vgprs(struct radv_shader_args *args)
 {
-	ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
-	if (!args->is_gs_copy_shader) {
-		if (args->options->key.vs_common_out.as_ls) {
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
-			if (args->options->chip_class >= GFX10) {
-				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
-				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
-			} else {
-				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
-				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
-			}
-		} else {
-			if (args->options->chip_class >= GFX10) {
-				if (args->options->key.vs_common_out.as_ngg) {
-					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
-					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
-					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
-				} else {
-					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
-					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
-					ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
-				}
-			} else {
-				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
-				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
-				ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
-			}
-		}
-	}
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
+   if (!args->is_gs_copy_shader) {
+      if (args->options->key.vs_common_out.as_ls) {
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
+         if (args->options->chip_class >= GFX10) {
+            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
+            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
+         } else {
+            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
+            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
+         }
+      } else {
+         if (args->options->chip_class >= GFX10) {
+            if (args->options->key.vs_common_out.as_ngg) {
+               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
+               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
+               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
+            } else {
+               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
+               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
+               ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
+            }
+         } else {
+            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
+            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
+            ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
+         }
+      }
+   }
 }
 
 static void
 declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
 {
-	int i;
-
-	if (args->options->use_ngg_streamout) {
-		if (stage == MESA_SHADER_TESS_EVAL)
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
-		return;
-	}
-
-	/* Streamout SGPRs. */
-	if (args->shader_info->so.num_outputs) {
-		assert(stage == MESA_SHADER_VERTEX ||
-		       stage == MESA_SHADER_TESS_EVAL);
-
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
-	} else if (stage == MESA_SHADER_TESS_EVAL) {
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
-	}
-
-	/* A streamout buffer offset is loaded if the stride is non-zero. */
-	for (i = 0; i < 4; i++) {
-		if (!args->shader_info->so.strides[i])
-			continue;
-
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
-	}
+   int i;
+
+   if (args->options->use_ngg_streamout) {
+      if (stage == MESA_SHADER_TESS_EVAL)
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+      return;
+   }
+
+   /* Streamout SGPRs. */
+   if (args->shader_info->so.num_outputs) {
+      assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
+
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
+   } else if (stage == MESA_SHADER_TESS_EVAL) {
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+   }
+
+   /* A streamout buffer offset is loaded if the stride is non-zero. */
+   for (i = 0; i < 4; i++) {
+      if (!args->shader_info->so.strides[i])
+         continue;
+
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
+   }
 }
 
 static void
 declare_tes_input_vgprs(struct radv_shader_args *args)
 {
-	ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
-	ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
-	ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
-	ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
 }
 
 static void
-set_global_input_locs(struct radv_shader_args *args,
-		      const struct user_sgpr_info *user_sgpr_info,
-		      uint8_t *user_sgpr_idx)
+set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
+                      uint8_t *user_sgpr_idx)
 {
-	uint32_t mask = args->shader_info->desc_set_used_mask;
-
-	if (!user_sgpr_info->indirect_all_descriptor_sets) {
-		while (mask) {
-			int i = u_bit_scan(&mask);
-
-			set_loc_desc(args, i, user_sgpr_idx);
-		}
-	} else {
-		set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS,
-				   user_sgpr_idx);
-
-		args->shader_info->need_indirect_descriptor_sets = true;
-	}
-
-	if (args->shader_info->loads_push_constants) {
-		set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
-	}
-
-	if (args->shader_info->num_inline_push_consts) {
-		set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
-			       args->shader_info->num_inline_push_consts);
-	}
-
-	if (args->streamout_buffers.used) {
-		set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS,
-				   user_sgpr_idx);
-	}
+   uint32_t mask = args->shader_info->desc_set_used_mask;
+
+   if (!user_sgpr_info->indirect_all_descriptor_sets) {
+      while (mask) {
+         int i = u_bit_scan(&mask);
+
+         set_loc_desc(args, i, user_sgpr_idx);
+      }
+   } else {
+      set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx);
+
+      args->shader_info->need_indirect_descriptor_sets = true;
+   }
+
+   if (args->shader_info->loads_push_constants) {
+      set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
+   }
+
+   if (args->shader_info->num_inline_push_consts) {
+      set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
+                     args->shader_info->num_inline_push_consts);
+   }
+
+   if (args->streamout_buffers.used) {
+      set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx);
+   }
 }
 
 static void
-set_vs_specific_input_locs(struct radv_shader_args *args,
-			   gl_shader_stage stage, bool has_previous_stage,
-			   gl_shader_stage previous_stage,
-			   uint8_t *user_sgpr_idx)
+set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage,
+                           bool has_previous_stage, gl_shader_stage previous_stage,
+                           uint8_t *user_sgpr_idx)
 {
-	if (!args->is_gs_copy_shader &&
-	    (stage == MESA_SHADER_VERTEX ||
-	     (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
-		if (args->shader_info->vs.has_vertex_buffers) {
-			set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS,
-					   user_sgpr_idx);
-		}
-
-		unsigned vs_num = count_vs_user_sgprs(args) - args->shader_info->vs.has_vertex_buffers;
-		set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE,
-			       user_sgpr_idx, vs_num);
-	}
+   if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
+                                    (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
+      if (args->shader_info->vs.has_vertex_buffers) {
+         set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx);
+      }
+
+      unsigned vs_num = count_vs_user_sgprs(args) - args->shader_info->vs.has_vertex_buffers;
+      set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
+   }
 }
 
 /* Returns whether the stage is a stage that can be directly before the GS */
-static bool is_pre_gs_stage(gl_shader_stage stage)
+static bool
+is_pre_gs_stage(gl_shader_stage stage)
 {
-	return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
+   return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
 }
 
 void
-radv_declare_shader_args(struct radv_shader_args *args,
-			 gl_shader_stage stage,
-			 bool has_previous_stage,
-			 gl_shader_stage previous_stage)
+radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
+                         bool has_previous_stage, gl_shader_stage previous_stage)
 {
-	struct user_sgpr_info user_sgpr_info;
-	bool needs_view_index = needs_view_index_sgpr(args, stage);
-
-	if (args->options->chip_class >= GFX10) {
-		if (is_pre_gs_stage(stage) && args->options->key.vs_common_out.as_ngg) {
-			/* On GFX10, VS is merged into GS for NGG. */
-			previous_stage = stage;
-			stage = MESA_SHADER_GEOMETRY;
-			has_previous_stage = true;
-		}
-	}
-
-	for (int i = 0; i < MAX_SETS; i++)
-		args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
-	for (int i = 0; i < AC_UD_MAX_UD; i++)
-		args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
-
-
-	allocate_user_sgprs(args, stage, has_previous_stage,
-			    previous_stage, needs_view_index, &user_sgpr_info);
-
-	if (args->options->explicit_scratch_args) {
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR,
-			   &args->ring_offsets);
-	}
-
-	switch (stage) {
-	case MESA_SHADER_COMPUTE:
-		declare_global_input_sgprs(args, &user_sgpr_info);
-
-		if (args->shader_info->cs.uses_grid_size) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT,
-				   &args->ac.num_work_groups);
-		}
-
-		for (int i = 0; i < 3; i++) {
-			if (args->shader_info->cs.uses_block_id[i]) {
-				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-					   &args->ac.workgroup_ids[i]);
-			}
-		}
-
-		if (args->shader_info->cs.uses_local_invocation_idx) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				   &args->ac.tg_size);
-		}
-
-		if (args->options->explicit_scratch_args) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				   &args->ac.scratch_offset);
-		}
-
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
-			   &args->ac.local_invocation_ids);
-		break;
-	case MESA_SHADER_VERTEX:
-		declare_global_input_sgprs(args, &user_sgpr_info);
-
-		declare_vs_specific_input_sgprs(args, stage, has_previous_stage,
-						previous_stage);
-
-		if (needs_view_index) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				   &args->ac.view_index);
-		}
-
-		if (args->options->key.vs_common_out.as_es) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				&args->ac.es2gs_offset);
-		} else if (args->options->key.vs_common_out.as_ls) {
-			/* no extra parameters */
-		} else {
-			declare_streamout_sgprs(args, stage);
-		}
-
-		if (args->options->explicit_scratch_args) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				   &args->ac.scratch_offset);
-		}
-
-		declare_vs_input_vgprs(args);
-		break;
-	case MESA_SHADER_TESS_CTRL:
-		if (has_previous_stage) {
-			// First 6 system regs
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				   &args->ac.merged_wave_info);
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				   &args->ac.tcs_factor_offset);
-
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
-
-			declare_global_input_sgprs(args, &user_sgpr_info);
-
-			declare_vs_specific_input_sgprs(args, stage,
-							has_previous_stage,
-							previous_stage);
-
-			if (needs_view_index) {
-				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-					   &args->ac.view_index);
-			}
-
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				  &args->ac.tcs_patch_id);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.tcs_rel_ids);
-
-			declare_vs_input_vgprs(args);
-		} else {
-			declare_global_input_sgprs(args, &user_sgpr_info);
-
-			if (needs_view_index) {
-				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-					   &args->ac.view_index);
-			}
-
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				   &args->ac.tcs_factor_offset);
-			if (args->options->explicit_scratch_args) {
-				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-					   &args->ac.scratch_offset);
-			}
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.tcs_patch_id);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.tcs_rel_ids);
-		}
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		declare_global_input_sgprs(args, &user_sgpr_info);
-
-		if (needs_view_index)
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				&args->ac.view_index);
-
-		if (args->options->key.vs_common_out.as_es) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				&args->ac.es2gs_offset);
-		} else {
-			declare_streamout_sgprs(args, stage);
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
-		}
-		if (args->options->explicit_scratch_args) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				   &args->ac.scratch_offset);
-		}
-		declare_tes_input_vgprs(args);
-		break;
-	case MESA_SHADER_GEOMETRY:
-		if (has_previous_stage) {
-			// First 6 system regs
-			if (args->options->key.vs_common_out.as_ngg) {
-				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-					&args->ac.gs_tg_info);
-			} else {
-				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-					&args->ac.gs2vs_offset);
-			}
-
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				   &args->ac.merged_wave_info);
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
-
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
-
-			declare_global_input_sgprs(args, &user_sgpr_info);
-
-			if (previous_stage != MESA_SHADER_TESS_EVAL) {
-				declare_vs_specific_input_sgprs(args, stage,
-								has_previous_stage,
-								previous_stage);
-			}
-
-			if (needs_view_index) {
-				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-					   &args->ac.view_index);
-			}
-
-			if (args->options->key.vs_common_out.as_ngg) {
-				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-					   &args->ngg_gs_state);
-			}
-
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_vtx_offset[0]);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_vtx_offset[2]);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_prim_id);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_invocation_id);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_vtx_offset[4]);
-
-			if (previous_stage == MESA_SHADER_VERTEX) {
-				declare_vs_input_vgprs(args);
-			} else {
-				declare_tes_input_vgprs(args);
-			}
-		} else {
-			declare_global_input_sgprs(args, &user_sgpr_info);
-
-			if (needs_view_index) {
-				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-					   &args->ac.view_index);
-			}
-
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
-			if (args->options->explicit_scratch_args) {
-				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-					   &args->ac.scratch_offset);
-			}
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_vtx_offset[0]);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_vtx_offset[1]);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_prim_id);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_vtx_offset[2]);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_vtx_offset[3]);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_vtx_offset[4]);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_vtx_offset[5]);
-			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
-				   &args->ac.gs_invocation_id);
-		}
-		break;
-	case MESA_SHADER_FRAGMENT:
-		declare_global_input_sgprs(args, &user_sgpr_info);
-
-		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
-		if (args->options->explicit_scratch_args) {
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
-				   &args->ac.scratch_offset);
-		}
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);  /* line stipple tex */
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
-		ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);  /* fixed pt */
-		break;
-	default:
-		unreachable("Shader stage not implemented");
-	}
-
-	args->shader_info->num_input_vgprs = 0;
-	args->shader_info->num_input_sgprs = 2;
-	args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
-	args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
-
-	uint8_t user_sgpr_idx = 0;
-
-	set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
-			   &user_sgpr_idx);
-
-	/* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
-	 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
-	if (has_previous_stage)
-		user_sgpr_idx = 0;
-
-	set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
-
-	switch (stage) {
-	case MESA_SHADER_COMPUTE:
-		if (args->shader_info->cs.uses_grid_size) {
-			set_loc_shader(args, AC_UD_CS_GRID_SIZE,
-				       &user_sgpr_idx, 3);
-		}
-		break;
-	case MESA_SHADER_VERTEX:
-		set_vs_specific_input_locs(args, stage, has_previous_stage,
-					   previous_stage, &user_sgpr_idx);
-		if (args->ac.view_index.used)
-			set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
-		break;
-	case MESA_SHADER_TESS_CTRL:
-		set_vs_specific_input_locs(args, stage, has_previous_stage,
-					   previous_stage, &user_sgpr_idx);
-		if (args->ac.view_index.used)
-			set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		if (args->ac.view_index.used)
-			set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
-		break;
-	case MESA_SHADER_GEOMETRY:
-		if (has_previous_stage) {
-			if (previous_stage == MESA_SHADER_VERTEX)
-				set_vs_specific_input_locs(args, stage,
-							   has_previous_stage,
-							   previous_stage,
-							   &user_sgpr_idx);
-		}
-		if (args->ac.view_index.used)
-			set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
-
-		if (args->ngg_gs_state.used)
-			set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
-		break;
-	case MESA_SHADER_FRAGMENT:
-		break;
-	default:
-		unreachable("Shader stage not implemented");
-	}
-
-	args->shader_info->num_user_sgprs = user_sgpr_idx;
+   struct user_sgpr_info user_sgpr_info;
+   bool needs_view_index = needs_view_index_sgpr(args, stage);
+
+   if (args->options->chip_class >= GFX10) {
+      if (is_pre_gs_stage(stage) && args->options->key.vs_common_out.as_ngg) {
+         /* On GFX10, VS is merged into GS for NGG. */
+         previous_stage = stage;
+         stage = MESA_SHADER_GEOMETRY;
+         has_previous_stage = true;
+      }
+   }
+
+   for (int i = 0; i < MAX_SETS; i++)
+      args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
+   for (int i = 0; i < AC_UD_MAX_UD; i++)
+      args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
+
+   allocate_user_sgprs(args, stage, has_previous_stage, previous_stage, needs_view_index,
+                       &user_sgpr_info);
+
+   if (args->options->explicit_scratch_args) {
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
+   }
+
+   switch (stage) {
+   case MESA_SHADER_COMPUTE:
+      declare_global_input_sgprs(args, &user_sgpr_info);
+
+      if (args->shader_info->cs.uses_grid_size) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
+      }
+
+      for (int i = 0; i < 3; i++) {
+         if (args->shader_info->cs.uses_block_id[i]) {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
+         }
+      }
+
+      if (args->shader_info->cs.uses_local_invocation_idx) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
+      }
+
+      if (args->options->explicit_scratch_args) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+      }
+
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
+      break;
+   case MESA_SHADER_VERTEX:
+      declare_global_input_sgprs(args, &user_sgpr_info);
+
+      declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
+
+      if (needs_view_index) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+      }
+
+      if (args->options->key.vs_common_out.as_es) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
+      } else if (args->options->key.vs_common_out.as_ls) {
+         /* no extra parameters */
+      } else {
+         declare_streamout_sgprs(args, stage);
+      }
+
+      if (args->options->explicit_scratch_args) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+      }
+
+      declare_vs_input_vgprs(args);
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      if (has_previous_stage) {
+         // First 6 system regs
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
+
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
+
+         declare_global_input_sgprs(args, &user_sgpr_info);
+
+         declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
+
+         if (needs_view_index) {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+         }
+
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
+
+         declare_vs_input_vgprs(args);
+      } else {
+         declare_global_input_sgprs(args, &user_sgpr_info);
+
+         if (needs_view_index) {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+         }
+
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
+         if (args->options->explicit_scratch_args) {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+         }
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
+      }
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      declare_global_input_sgprs(args, &user_sgpr_info);
+
+      if (needs_view_index)
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+
+      if (args->options->key.vs_common_out.as_es) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
+      } else {
+         declare_streamout_sgprs(args, stage);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
+      }
+      if (args->options->explicit_scratch_args) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+      }
+      declare_tes_input_vgprs(args);
+      break;
+   case MESA_SHADER_GEOMETRY:
+      if (has_previous_stage) {
+         // First 6 system regs
+         if (args->options->key.vs_common_out.as_ngg) {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
+         } else {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
+         }
+
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
+
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
+
+         declare_global_input_sgprs(args, &user_sgpr_info);
+
+         if (previous_stage != MESA_SHADER_TESS_EVAL) {
+            declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
+         }
+
+         if (needs_view_index) {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+         }
+
+         if (args->options->key.vs_common_out.as_ngg) {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state);
+         }
+
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
+
+         if (previous_stage == MESA_SHADER_VERTEX) {
+            declare_vs_input_vgprs(args);
+         } else {
+            declare_tes_input_vgprs(args);
+         }
+      } else {
+         declare_global_input_sgprs(args, &user_sgpr_info);
+
+         if (needs_view_index) {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+         }
+
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
+         if (args->options->explicit_scratch_args) {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+         }
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
+      }
+      break;
+   case MESA_SHADER_FRAGMENT:
+      declare_global_input_sgprs(args, &user_sgpr_info);
+
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
+      if (args->options->explicit_scratch_args) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+      }
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
+      break;
+   default:
+      unreachable("Shader stage not implemented");
+   }
+
+   args->shader_info->num_input_vgprs = 0;
+   args->shader_info->num_input_sgprs = 2;
+   args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
+   args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
+
+   uint8_t user_sgpr_idx = 0;
+
+   set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
+
+   /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
+    * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
+   if (has_previous_stage)
+      user_sgpr_idx = 0;
+
+   set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
+
+   switch (stage) {
+   case MESA_SHADER_COMPUTE:
+      if (args->shader_info->cs.uses_grid_size) {
+         set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 3);
+      }
+      break;
+   case MESA_SHADER_VERTEX:
+      set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
+      if (args->ac.view_index.used)
+         set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
+      if (args->ac.view_index.used)
+         set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      if (args->ac.view_index.used)
+         set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+      break;
+   case MESA_SHADER_GEOMETRY:
+      if (has_previous_stage) {
+         if (previous_stage == MESA_SHADER_VERTEX)
+            set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage,
+                                       &user_sgpr_idx);
+      }
+      if (args->ac.view_index.used)
+         set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+
+      if (args->ngg_gs_state.used)
+         set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
+      break;
+   case MESA_SHADER_FRAGMENT:
+      break;
+   default:
+      unreachable("Shader stage not implemented");
+   }
+
+   args->shader_info->num_user_sgprs = user_sgpr_idx;
 }
-
diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h
index c4c57f38d44..a6828cdf309 100644
--- a/src/amd/vulkan/radv_shader_args.h
+++ b/src/amd/vulkan/radv_shader_args.h
@@ -21,38 +21,36 @@
  * IN THE SOFTWARE.
  */
 
-#include "ac_shader_args.h"
-#include "radv_constants.h"
-#include "util/list.h"
 #include "compiler/shader_enums.h"
+#include "util/list.h"
+#include "util/macros.h"
+#include "ac_shader_args.h"
 #include "amd_family.h"
+#include "radv_constants.h"
 
 struct radv_shader_args {
-	struct ac_shader_args ac;
-	struct radv_shader_info *shader_info;
-	const struct radv_nir_compiler_options *options;
+   struct ac_shader_args ac;
+   struct radv_shader_info *shader_info;
+   const struct radv_nir_compiler_options *options;
 
-	struct ac_arg descriptor_sets[MAX_SETS];
-	struct ac_arg ring_offsets;
+   struct ac_arg descriptor_sets[MAX_SETS];
+   struct ac_arg ring_offsets;
 
-	/* Streamout */
-	struct ac_arg streamout_buffers;
+   /* Streamout */
+   struct ac_arg streamout_buffers;
 
-	/* NGG GS */
-	struct ac_arg ngg_gs_state;
+   /* NGG GS */
+   struct ac_arg ngg_gs_state;
 
-	bool is_gs_copy_shader;
-	bool is_trap_handler_shader;
+   bool is_gs_copy_shader;
+   bool is_trap_handler_shader;
 };
 
 static inline struct radv_shader_args *
 radv_shader_args_from_ac(struct ac_shader_args *args)
 {
-	return container_of(args, struct radv_shader_args, ac);
+   return container_of(args, struct radv_shader_args, ac);
 }
 
-void radv_declare_shader_args(struct radv_shader_args *args,
-			      gl_shader_stage stage,
-			      bool has_previous_stage,
-			      gl_shader_stage previous_stage);
-
+void radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
+                              bool has_previous_stage, gl_shader_stage previous_stage);
diff --git a/src/amd/vulkan/radv_shader_helper.h b/src/amd/vulkan/radv_shader_helper.h
index c64d2df676b..adf1f27dff9 100644
--- a/src/amd/vulkan/radv_shader_helper.h
+++ b/src/amd/vulkan/radv_shader_helper.h
@@ -26,17 +26,13 @@
 extern "C" {
 #endif
 
-bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
-			     bool thread_compiler,
-			     enum radeon_family family,
-			     enum ac_target_machine_options tm_options,
-			     unsigned wave_size);
-void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info,
-				bool thread_compiler);
+bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler,
+                             enum radeon_family family, enum ac_target_machine_options tm_options,
+                             unsigned wave_size);
+void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler);
 
-bool radv_compile_to_elf(struct ac_llvm_compiler *info,
-			 LLVMModuleRef module,
-			 char **pelf_buffer, size_t *pelf_size);
+bool radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer,
+                         size_t *pelf_size);
 
 #ifdef __cplusplus
 }
diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c
index 75e9fd12339..84978192502 100644
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -20,718 +20,691 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
-#include "radv_private.h"
-#include "radv_shader.h"
 #include "nir/nir.h"
 #include "nir/nir_xfb_info.h"
+#include "radv_private.h"
+#include "radv_shader.h"
 
-static void mark_sampler_desc(const nir_variable *var,
-			      struct radv_shader_info *info)
+static void
+mark_sampler_desc(const nir_variable *var, struct radv_shader_info *info)
 {
-	info->desc_set_used_mask |= (1u << var->data.descriptor_set);
+   info->desc_set_used_mask |= (1u << var->data.descriptor_set);
 }
 
 static void
-gather_intrinsic_load_input_info(const nir_shader *nir,
-			       const nir_intrinsic_instr *instr,
-			       struct radv_shader_info *info)
+gather_intrinsic_load_input_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
+                                 struct radv_shader_info *info)
 {
-	switch (nir->info.stage) {
-	case MESA_SHADER_VERTEX: {
-		unsigned idx = nir_intrinsic_io_semantics(instr).location;
-		unsigned component = nir_intrinsic_component(instr);
-		unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
-
-		info->vs.input_usage_mask[idx] |= mask << component;
-		break;
-	}
-	default:
-		break;
-	}
+   switch (nir->info.stage) {
+   case MESA_SHADER_VERTEX: {
+      unsigned idx = nir_intrinsic_io_semantics(instr).location;
+      unsigned component = nir_intrinsic_component(instr);
+      unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+
+      info->vs.input_usage_mask[idx] |= mask << component;
+      break;
+   }
+   default:
+      break;
+   }
 }
 
 static uint32_t
 widen_writemask(uint32_t wrmask)
 {
-	uint32_t new_wrmask = 0;
-	for(unsigned i = 0; i < 4; i++)
-		new_wrmask |= (wrmask & (1 << i) ? 0x3 : 0x0) << (i * 2);
-	return new_wrmask;
+   uint32_t new_wrmask = 0;
+   for (unsigned i = 0; i < 4; i++)
+      new_wrmask |= (wrmask & (1 << i) ? 0x3 : 0x0) << (i * 2);
+   return new_wrmask;
 }
 
 static void
 set_writes_memory(const nir_shader *nir, struct radv_shader_info *info)
 {
-	if (nir->info.stage == MESA_SHADER_FRAGMENT)
-		info->ps.writes_memory = true;
+   if (nir->info.stage == MESA_SHADER_FRAGMENT)
+      info->ps.writes_memory = true;
 }
 
 static void
-gather_intrinsic_store_output_info(const nir_shader *nir,
-				   const nir_intrinsic_instr *instr,
-				   struct radv_shader_info *info)
+gather_intrinsic_store_output_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
+                                   struct radv_shader_info *info)
 {
-	unsigned idx = nir_intrinsic_base(instr);
-	unsigned num_slots = nir_intrinsic_io_semantics(instr).num_slots;
-	unsigned component = nir_intrinsic_component(instr);
-	unsigned write_mask = nir_intrinsic_write_mask(instr);
-	uint8_t *output_usage_mask = NULL;
-
-	if (instr->src[0].ssa->bit_size == 64)
-		write_mask = widen_writemask(write_mask);
-
-	switch (nir->info.stage) {
-	case MESA_SHADER_VERTEX:
-		output_usage_mask = info->vs.output_usage_mask;
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		output_usage_mask = info->tes.output_usage_mask;
-		break;
-	case MESA_SHADER_GEOMETRY:
-		output_usage_mask = info->gs.output_usage_mask;
-		break;
-	default:
-		break;
-	}
-
-	if (output_usage_mask) {
-		for (unsigned i = 0; i < num_slots; i++) {
-			output_usage_mask[idx + i] |=
-				((write_mask >> (i * 4)) & 0xf) << component;
-		}
-	}
+   unsigned idx = nir_intrinsic_base(instr);
+   unsigned num_slots = nir_intrinsic_io_semantics(instr).num_slots;
+   unsigned component = nir_intrinsic_component(instr);
+   unsigned write_mask = nir_intrinsic_write_mask(instr);
+   uint8_t *output_usage_mask = NULL;
+
+   if (instr->src[0].ssa->bit_size == 64)
+      write_mask = widen_writemask(write_mask);
+
+   switch (nir->info.stage) {
+   case MESA_SHADER_VERTEX:
+      output_usage_mask = info->vs.output_usage_mask;
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      output_usage_mask = info->tes.output_usage_mask;
+      break;
+   case MESA_SHADER_GEOMETRY:
+      output_usage_mask = info->gs.output_usage_mask;
+      break;
+   default:
+      break;
+   }
+
+   if (output_usage_mask) {
+      for (unsigned i = 0; i < num_slots; i++) {
+         output_usage_mask[idx + i] |= ((write_mask >> (i * 4)) & 0xf) << component;
+      }
+   }
 }
 
 static void
-gather_push_constant_info(const nir_shader *nir,
-			  const nir_intrinsic_instr *instr,
-			  struct radv_shader_info *info)
+gather_push_constant_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
+                          struct radv_shader_info *info)
 {
-	int base = nir_intrinsic_base(instr);
+   int base = nir_intrinsic_base(instr);
 
-	if (!nir_src_is_const(instr->src[0])) {
-		info->has_indirect_push_constants = true;
-	} else {
-		uint32_t min = base + nir_src_as_uint(instr->src[0]);
-		uint32_t max = min + instr->num_components * 4;
+   if (!nir_src_is_const(instr->src[0])) {
+      info->has_indirect_push_constants = true;
+   } else {
+      uint32_t min = base + nir_src_as_uint(instr->src[0]);
+      uint32_t max = min + instr->num_components * 4;
 
-		info->max_push_constant_used =
-			MAX2(max, info->max_push_constant_used);
-		info->min_push_constant_used =
-			MIN2(min, info->min_push_constant_used);
-	}
+      info->max_push_constant_used = MAX2(max, info->max_push_constant_used);
+      info->min_push_constant_used = MIN2(min, info->min_push_constant_used);
+   }
 
-	if (instr->dest.ssa.bit_size != 32)
-		info->has_only_32bit_push_constants = false;
+   if (instr->dest.ssa.bit_size != 32)
+      info->has_only_32bit_push_constants = false;
 
-	info->loads_push_constants = true;
+   info->loads_push_constants = true;
 }
 
 static void
 gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
-		      struct radv_shader_info *info)
+                      struct radv_shader_info *info)
 {
-	switch (instr->intrinsic) {
-	case nir_intrinsic_load_barycentric_sample:
-	case nir_intrinsic_load_barycentric_pixel:
-	case nir_intrinsic_load_barycentric_centroid: {
-		enum glsl_interp_mode mode = nir_intrinsic_interp_mode(instr);
-		switch (mode) {
-		case INTERP_MODE_NONE:
-		case INTERP_MODE_SMOOTH:
-		case INTERP_MODE_NOPERSPECTIVE:
-			info->ps.uses_persp_or_linear_interp = true;
-			break;
-		default:
-			break;
-		}
-		break;
-	}
-	case nir_intrinsic_load_barycentric_at_offset:
-	case nir_intrinsic_load_barycentric_at_sample:
-		if (nir_intrinsic_interp_mode(instr) != INTERP_MODE_FLAT)
-			info->ps.uses_persp_or_linear_interp = true;
-
-		if (instr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
-			info->ps.needs_sample_positions = true;
-		break;
-	case nir_intrinsic_load_draw_id:
-		info->vs.needs_draw_id = true;
-		break;
-	case nir_intrinsic_load_base_instance:
-		info->vs.needs_base_instance = true;
-		break;
-	case nir_intrinsic_load_instance_id:
-		info->vs.needs_instance_id = true;
-		break;
-	case nir_intrinsic_load_num_work_groups:
-		info->cs.uses_grid_size = true;
-		break;
-	case nir_intrinsic_load_local_invocation_id:
-	case nir_intrinsic_load_work_group_id: {
-		unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
-		while (mask) {
-			unsigned i = u_bit_scan(&mask);
-
-			if (instr->intrinsic == nir_intrinsic_load_work_group_id)
-				info->cs.uses_block_id[i] = true;
-			else
-				info->cs.uses_thread_id[i] = true;
-		}
-		break;
-	}
-	case nir_intrinsic_load_local_invocation_index:
-	case nir_intrinsic_load_subgroup_id:
-	case nir_intrinsic_load_num_subgroups:
-		info->cs.uses_local_invocation_idx = true;
-		break;
-	case nir_intrinsic_load_sample_mask_in:
-		info->ps.reads_sample_mask_in = true;
-		break;
-	case nir_intrinsic_load_view_index:
-		info->needs_multiview_view_index = true;
-		if (nir->info.stage == MESA_SHADER_FRAGMENT)
-			info->ps.layer_input = true;
-		break;
-	case nir_intrinsic_load_layer_id:
-		if (nir->info.stage == MESA_SHADER_FRAGMENT)
-			info->ps.layer_input = true;
-		break;
-	case nir_intrinsic_load_invocation_id:
-		info->uses_invocation_id = true;
-		break;
-	case nir_intrinsic_load_primitive_id:
-		info->uses_prim_id = true;
-		break;
-	case nir_intrinsic_load_push_constant:
-		gather_push_constant_info(nir, instr, info);
-		break;
-	case nir_intrinsic_vulkan_resource_index:
-		info->desc_set_used_mask |= (1u << nir_intrinsic_desc_set(instr));
-		break;
-	case nir_intrinsic_image_deref_load:
-	case nir_intrinsic_image_deref_sparse_load:
-	case nir_intrinsic_image_deref_store:
-	case nir_intrinsic_image_deref_atomic_add:
-	case nir_intrinsic_image_deref_atomic_imin:
-	case nir_intrinsic_image_deref_atomic_umin:
-	case nir_intrinsic_image_deref_atomic_imax:
-	case nir_intrinsic_image_deref_atomic_umax:
-	case nir_intrinsic_image_deref_atomic_and:
-	case nir_intrinsic_image_deref_atomic_or:
-	case nir_intrinsic_image_deref_atomic_xor:
-	case nir_intrinsic_image_deref_atomic_exchange:
-	case nir_intrinsic_image_deref_atomic_comp_swap:
-	case nir_intrinsic_image_deref_size: {
-		nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
-		mark_sampler_desc(var, info);
-
-		if (instr->intrinsic == nir_intrinsic_image_deref_store ||
-		    instr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
-		    instr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
-		    instr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
-		    instr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
-		    instr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
-		    instr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
-		    instr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
-		    instr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
-		    instr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
-		    instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
-			set_writes_memory(nir, info);
-		}
-		break;
-	}
-	case nir_intrinsic_store_ssbo:
-	case nir_intrinsic_ssbo_atomic_add:
-	case nir_intrinsic_ssbo_atomic_imin:
-	case nir_intrinsic_ssbo_atomic_umin:
-	case nir_intrinsic_ssbo_atomic_imax:
-	case nir_intrinsic_ssbo_atomic_umax:
-	case nir_intrinsic_ssbo_atomic_and:
-	case nir_intrinsic_ssbo_atomic_or:
-	case nir_intrinsic_ssbo_atomic_xor:
-	case nir_intrinsic_ssbo_atomic_exchange:
-	case nir_intrinsic_ssbo_atomic_comp_swap:
-	case nir_intrinsic_store_global:
-	case nir_intrinsic_global_atomic_add:
-	case nir_intrinsic_global_atomic_imin:
-	case nir_intrinsic_global_atomic_umin:
-	case nir_intrinsic_global_atomic_imax:
-	case nir_intrinsic_global_atomic_umax:
-	case nir_intrinsic_global_atomic_and:
-	case nir_intrinsic_global_atomic_or:
-	case nir_intrinsic_global_atomic_xor:
-	case nir_intrinsic_global_atomic_exchange:
-	case nir_intrinsic_global_atomic_comp_swap:
-		set_writes_memory(nir, info);
-		break;
-	case nir_intrinsic_load_input:
-		gather_intrinsic_load_input_info(nir, instr, info);
-		break;
-	case nir_intrinsic_store_output:
-		gather_intrinsic_store_output_info(nir, instr, info);
-		break;
-	default:
-		break;
-	}
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_barycentric_sample:
+   case nir_intrinsic_load_barycentric_pixel:
+   case nir_intrinsic_load_barycentric_centroid: {
+      enum glsl_interp_mode mode = nir_intrinsic_interp_mode(instr);
+      switch (mode) {
+      case INTERP_MODE_NONE:
+      case INTERP_MODE_SMOOTH:
+      case INTERP_MODE_NOPERSPECTIVE:
+         info->ps.uses_persp_or_linear_interp = true;
+         break;
+      default:
+         break;
+      }
+      break;
+   }
+   case nir_intrinsic_load_barycentric_at_offset:
+   case nir_intrinsic_load_barycentric_at_sample:
+      if (nir_intrinsic_interp_mode(instr) != INTERP_MODE_FLAT)
+         info->ps.uses_persp_or_linear_interp = true;
+
+      if (instr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
+         info->ps.needs_sample_positions = true;
+      break;
+   case nir_intrinsic_load_draw_id:
+      info->vs.needs_draw_id = true;
+      break;
+   case nir_intrinsic_load_base_instance:
+      info->vs.needs_base_instance = true;
+      break;
+   case nir_intrinsic_load_instance_id:
+      info->vs.needs_instance_id = true;
+      break;
+   case nir_intrinsic_load_num_work_groups:
+      info->cs.uses_grid_size = true;
+      break;
+   case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_work_group_id: {
+      unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+      while (mask) {
+         unsigned i = u_bit_scan(&mask);
+
+         if (instr->intrinsic == nir_intrinsic_load_work_group_id)
+            info->cs.uses_block_id[i] = true;
+         else
+            info->cs.uses_thread_id[i] = true;
+      }
+      break;
+   }
+   case nir_intrinsic_load_local_invocation_index:
+   case nir_intrinsic_load_subgroup_id:
+   case nir_intrinsic_load_num_subgroups:
+      info->cs.uses_local_invocation_idx = true;
+      break;
+   case nir_intrinsic_load_sample_mask_in:
+      info->ps.reads_sample_mask_in = true;
+      break;
+   case nir_intrinsic_load_view_index:
+      info->needs_multiview_view_index = true;
+      if (nir->info.stage == MESA_SHADER_FRAGMENT)
+         info->ps.layer_input = true;
+      break;
+   case nir_intrinsic_load_layer_id:
+      if (nir->info.stage == MESA_SHADER_FRAGMENT)
+         info->ps.layer_input = true;
+      break;
+   case nir_intrinsic_load_invocation_id:
+      info->uses_invocation_id = true;
+      break;
+   case nir_intrinsic_load_primitive_id:
+      info->uses_prim_id = true;
+      break;
+   case nir_intrinsic_load_push_constant:
+      gather_push_constant_info(nir, instr, info);
+      break;
+   case nir_intrinsic_vulkan_resource_index:
+      info->desc_set_used_mask |= (1u << nir_intrinsic_desc_set(instr));
+      break;
+   case nir_intrinsic_image_deref_load:
+   case nir_intrinsic_image_deref_sparse_load:
+   case nir_intrinsic_image_deref_store:
+   case nir_intrinsic_image_deref_atomic_add:
+   case nir_intrinsic_image_deref_atomic_imin:
+   case nir_intrinsic_image_deref_atomic_umin:
+   case nir_intrinsic_image_deref_atomic_imax:
+   case nir_intrinsic_image_deref_atomic_umax:
+   case nir_intrinsic_image_deref_atomic_and:
+   case nir_intrinsic_image_deref_atomic_or:
+   case nir_intrinsic_image_deref_atomic_xor:
+   case nir_intrinsic_image_deref_atomic_exchange:
+   case nir_intrinsic_image_deref_atomic_comp_swap:
+   case nir_intrinsic_image_deref_size: {
+      nir_variable *var =
+         nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+      mark_sampler_desc(var, info);
+
+      if (instr->intrinsic == nir_intrinsic_image_deref_store ||
+          instr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
+          instr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
+          instr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
+          instr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
+          instr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
+          instr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
+          instr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
+          instr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
+          instr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
+          instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
+         set_writes_memory(nir, info);
+      }
+      break;
+   }
+   case nir_intrinsic_store_ssbo:
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_store_global:
+   case nir_intrinsic_global_atomic_add:
+   case nir_intrinsic_global_atomic_imin:
+   case nir_intrinsic_global_atomic_umin:
+   case nir_intrinsic_global_atomic_imax:
+   case nir_intrinsic_global_atomic_umax:
+   case nir_intrinsic_global_atomic_and:
+   case nir_intrinsic_global_atomic_or:
+   case nir_intrinsic_global_atomic_xor:
+   case nir_intrinsic_global_atomic_exchange:
+   case nir_intrinsic_global_atomic_comp_swap:
+      set_writes_memory(nir, info);
+      break;
+   case nir_intrinsic_load_input:
+      gather_intrinsic_load_input_info(nir, instr, info);
+      break;
+   case nir_intrinsic_store_output:
+      gather_intrinsic_store_output_info(nir, instr, info);
+      break;
+   default:
+      break;
+   }
 }
 
 static void
-gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr,
-		struct radv_shader_info *info)
+gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr, struct radv_shader_info *info)
 {
-	for (unsigned i = 0; i < instr->num_srcs; i++) {
-		switch (instr->src[i].src_type) {
-		case nir_tex_src_texture_deref:
-			mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
-			break;
-		case nir_tex_src_sampler_deref:
-			mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
-			break;
-		default:
-			break;
-		}
-	}
+   for (unsigned i = 0; i < instr->num_srcs; i++) {
+      switch (instr->src[i].src_type) {
+      case nir_tex_src_texture_deref:
+         mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
+         break;
+      case nir_tex_src_sampler_deref:
+         mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
+         break;
+      default:
+         break;
+      }
+   }
 }
 
 static void
-gather_info_block(const nir_shader *nir, const nir_block *block,
-		  struct radv_shader_info *info)
+gather_info_block(const nir_shader *nir, const nir_block *block, struct radv_shader_info *info)
 {
-	nir_foreach_instr(instr, block) {
-		switch (instr->type) {
-		case nir_instr_type_intrinsic:
-			gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info);
-			break;
-		case nir_instr_type_tex:
-			gather_tex_info(nir, nir_instr_as_tex(instr), info);
-			break;
-		default:
-			break;
-		}
-	}
+   nir_foreach_instr (instr, block) {
+      switch (instr->type) {
+      case nir_instr_type_intrinsic:
+         gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info);
+         break;
+      case nir_instr_type_tex:
+         gather_tex_info(nir, nir_instr_as_tex(instr), info);
+         break;
+      default:
+         break;
+      }
+   }
 }
 
 static void
 gather_info_input_decl_vs(const nir_shader *nir, const nir_variable *var,
-			  struct radv_shader_info *info,
-			  const struct radv_shader_variant_key *key)
+                          struct radv_shader_info *info, const struct radv_shader_variant_key *key)
 {
-	unsigned attrib_count = glsl_count_attribute_slots(var->type, true);
-	int idx = var->data.location;
+   unsigned attrib_count = glsl_count_attribute_slots(var->type, true);
+   int idx = var->data.location;
 
-	if (idx >= VERT_ATTRIB_GENERIC0 && idx < VERT_ATTRIB_GENERIC0 + MAX_VERTEX_ATTRIBS)
-		info->vs.has_vertex_buffers = true;
+   if (idx >= VERT_ATTRIB_GENERIC0 && idx < VERT_ATTRIB_GENERIC0 + MAX_VERTEX_ATTRIBS)
+      info->vs.has_vertex_buffers = true;
 
-	for (unsigned i = 0; i < attrib_count; ++i) {
-		unsigned attrib_index = var->data.location + i - VERT_ATTRIB_GENERIC0;
+   for (unsigned i = 0; i < attrib_count; ++i) {
+      unsigned attrib_index = var->data.location + i - VERT_ATTRIB_GENERIC0;
 
-		if (key->vs.instance_rate_inputs & (1u << attrib_index)) {
-			info->vs.needs_instance_id = true;
-			info->vs.needs_base_instance = true;
-		}
-	}
+      if (key->vs.instance_rate_inputs & (1u << attrib_index)) {
+         info->vs.needs_instance_id = true;
+         info->vs.needs_base_instance = true;
+      }
+   }
 }
 
 static void
-mark_16bit_ps_input(struct radv_shader_info *info, const struct glsl_type *type,
-		    int location)
+mark_16bit_ps_input(struct radv_shader_info *info, const struct glsl_type *type, int location)
 {
-	if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
-		unsigned attrib_count = glsl_count_attribute_slots(type, false);
-		if (glsl_type_is_16bit(type)) {
-			info->ps.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
-		}
-	} else if (glsl_type_is_array(type)) {
-		unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
-		for (unsigned i = 0; i < glsl_get_length(type); ++i) {
-			mark_16bit_ps_input(info, glsl_get_array_element(type), location + i * stride);
-		}
-	} else {
-		assert(glsl_type_is_struct_or_ifc(type));
-		for (unsigned i = 0; i < glsl_get_length(type); i++) {
-			mark_16bit_ps_input(info, glsl_get_struct_field(type, i), location);
-			location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
-		}
-	}
+   if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
+      unsigned attrib_count = glsl_count_attribute_slots(type, false);
+      if (glsl_type_is_16bit(type)) {
+         info->ps.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
+      }
+   } else if (glsl_type_is_array(type)) {
+      unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
+      for (unsigned i = 0; i < glsl_get_length(type); ++i) {
+         mark_16bit_ps_input(info, glsl_get_array_element(type), location + i * stride);
+      }
+   } else {
+      assert(glsl_type_is_struct_or_ifc(type));
+      for (unsigned i = 0; i < glsl_get_length(type); i++) {
+         mark_16bit_ps_input(info, glsl_get_struct_field(type, i), location);
+         location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
+      }
+   }
 }
 static void
 gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
-			  struct radv_shader_info *info)
+                          struct radv_shader_info *info)
 {
-	unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
-	int idx = var->data.location;
-
-	switch (idx) {
-	case VARYING_SLOT_PNTC:
-		info->ps.has_pcoord = true;
-		break;
-	case VARYING_SLOT_PRIMITIVE_ID:
-		info->ps.prim_id_input = true;
-		break;
-	case VARYING_SLOT_LAYER:
-		info->ps.layer_input = true;
-		break;
-	case VARYING_SLOT_CLIP_DIST0:
-	case VARYING_SLOT_CLIP_DIST1:
-		info->ps.num_input_clips_culls += attrib_count;
-		break;
-	case VARYING_SLOT_VIEWPORT:
-		info->ps.viewport_index_input = true;
-		break;
-	default:
-		break;
-	}
-
-	if (var->data.compact) {
-		unsigned component_count = var->data.location_frac +
-		                           glsl_get_length(var->type);
-		attrib_count = (component_count + 3) / 4;
-	} else {
-		mark_16bit_ps_input(info, var->type, var->data.driver_location);
-	}
-
-	uint64_t mask = ((1ull << attrib_count) - 1);
-
-	if (var->data.interpolation == INTERP_MODE_FLAT)
-		info->ps.flat_shaded_mask |= mask << var->data.driver_location;
-	if (var->data.interpolation == INTERP_MODE_EXPLICIT)
-		info->ps.explicit_shaded_mask |= mask << var->data.driver_location;
-
-	if (var->data.location >= VARYING_SLOT_VAR0)
-		info->ps.input_mask |= mask << (var->data.location - VARYING_SLOT_VAR0);
+   unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
+   int idx = var->data.location;
+
+   switch (idx) {
+   case VARYING_SLOT_PNTC:
+      info->ps.has_pcoord = true;
+      break;
+   case VARYING_SLOT_PRIMITIVE_ID:
+      info->ps.prim_id_input = true;
+      break;
+   case VARYING_SLOT_LAYER:
+      info->ps.layer_input = true;
+      break;
+   case VARYING_SLOT_CLIP_DIST0:
+   case VARYING_SLOT_CLIP_DIST1:
+      info->ps.num_input_clips_culls += attrib_count;
+      break;
+   case VARYING_SLOT_VIEWPORT:
+      info->ps.viewport_index_input = true;
+      break;
+   default:
+      break;
+   }
+
+   if (var->data.compact) {
+      unsigned component_count = var->data.location_frac + glsl_get_length(var->type);
+      attrib_count = (component_count + 3) / 4;
+   } else {
+      mark_16bit_ps_input(info, var->type, var->data.driver_location);
+   }
+
+   uint64_t mask = ((1ull << attrib_count) - 1);
+
+   if (var->data.interpolation == INTERP_MODE_FLAT)
+      info->ps.flat_shaded_mask |= mask << var->data.driver_location;
+   if (var->data.interpolation == INTERP_MODE_EXPLICIT)
+      info->ps.explicit_shaded_mask |= mask << var->data.driver_location;
+
+   if (var->data.location >= VARYING_SLOT_VAR0)
+      info->ps.input_mask |= mask << (var->data.location - VARYING_SLOT_VAR0);
 }
 
 static void
 gather_info_input_decl(const nir_shader *nir, const nir_variable *var,
-		       struct radv_shader_info *info,
-		       const struct radv_shader_variant_key *key)
+                       struct radv_shader_info *info, const struct radv_shader_variant_key *key)
 {
-	switch (nir->info.stage) {
-	case MESA_SHADER_VERTEX:
-		gather_info_input_decl_vs(nir, var, info, key);
-		break;
-	case MESA_SHADER_FRAGMENT:
-		gather_info_input_decl_ps(nir, var, info);
-		break;
-	default:
-		break;
-	}
+   switch (nir->info.stage) {
+   case MESA_SHADER_VERTEX:
+      gather_info_input_decl_vs(nir, var, info, key);
+      break;
+   case MESA_SHADER_FRAGMENT:
+      gather_info_input_decl_ps(nir, var, info);
+      break;
+   default:
+      break;
+   }
 }
 
 static void
 gather_info_output_decl_ps(const nir_shader *nir, const nir_variable *var,
-			   struct radv_shader_info *info)
+                           struct radv_shader_info *info)
 {
-	int idx = var->data.location;
-
-	switch (idx) {
-	case FRAG_RESULT_DEPTH:
-		info->ps.writes_z = true;
-		break;
-	case FRAG_RESULT_STENCIL:
-		info->ps.writes_stencil = true;
-		break;
-	case FRAG_RESULT_SAMPLE_MASK:
-		info->ps.writes_sample_mask = true;
-		break;
-	default:
-		break;
-	}
+   int idx = var->data.location;
+
+   switch (idx) {
+   case FRAG_RESULT_DEPTH:
+      info->ps.writes_z = true;
+      break;
+   case FRAG_RESULT_STENCIL:
+      info->ps.writes_stencil = true;
+      break;
+   case FRAG_RESULT_SAMPLE_MASK:
+      info->ps.writes_sample_mask = true;
+      break;
+   default:
+      break;
+   }
 }
 
 static void
 gather_info_output_decl_gs(const nir_shader *nir, const nir_variable *var,
-			   struct radv_shader_info *info)
+                           struct radv_shader_info *info)
 {
-	unsigned num_components = glsl_get_component_slots(var->type);
-	unsigned stream = var->data.stream;
-	unsigned idx = var->data.location;
+   unsigned num_components = glsl_get_component_slots(var->type);
+   unsigned stream = var->data.stream;
+   unsigned idx = var->data.location;
 
-	assert(stream < 4);
+   assert(stream < 4);
 
-	info->gs.max_stream = MAX2(info->gs.max_stream, stream);
-	info->gs.num_stream_output_components[stream] += num_components;
-	info->gs.output_streams[idx] = stream;
+   info->gs.max_stream = MAX2(info->gs.max_stream, stream);
+   info->gs.num_stream_output_components[stream] += num_components;
+   info->gs.output_streams[idx] = stream;
 }
 
 static void
 gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
-			struct radv_shader_info *info,
-			const struct radv_shader_variant_key *key)
+                        struct radv_shader_info *info, const struct radv_shader_variant_key *key)
 {
-	struct radv_vs_output_info *vs_info = NULL;
-
-	switch (nir->info.stage) {
-	case MESA_SHADER_FRAGMENT:
-		gather_info_output_decl_ps(nir, var, info);
-		break;
-	case MESA_SHADER_VERTEX:
-		if (!key->vs_common_out.as_ls &&
-		    !key->vs_common_out.as_es)
-			vs_info = &info->vs.outinfo;
-
-		/* TODO: Adjust as_ls/as_nng. */
-		if (!key->vs_common_out.as_ls && key->vs_common_out.as_ngg)
-			gather_info_output_decl_gs(nir, var, info);
-		break;
-	case MESA_SHADER_GEOMETRY:
-		vs_info = &info->vs.outinfo;
-		gather_info_output_decl_gs(nir, var, info);
-		break;
-	case MESA_SHADER_TESS_EVAL:
-		if (!key->vs_common_out.as_es)
-			vs_info = &info->tes.outinfo;
-		break;
-	default:
-		break;
-	}
-
-	if (vs_info) {
-		switch (var->data.location) {
-		case VARYING_SLOT_CLIP_DIST0:
-			vs_info->clip_dist_mask =
-				(1 << nir->info.clip_distance_array_size) - 1;
-			vs_info->cull_dist_mask =
-				(1 << nir->info.cull_distance_array_size) - 1;
-			vs_info->cull_dist_mask <<= nir->info.clip_distance_array_size;
-			break;
-		case VARYING_SLOT_PSIZ:
-			vs_info->writes_pointsize = true;
-			break;
-		case VARYING_SLOT_VIEWPORT:
-			vs_info->writes_viewport_index = true;
-			break;
-		case VARYING_SLOT_LAYER:
-			vs_info->writes_layer = true;
-			break;
-		case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
-			vs_info->writes_primitive_shading_rate = true;
-			break;
-		default:
-			break;
-		}
-	}
+   struct radv_vs_output_info *vs_info = NULL;
+
+   switch (nir->info.stage) {
+   case MESA_SHADER_FRAGMENT:
+      gather_info_output_decl_ps(nir, var, info);
+      break;
+   case MESA_SHADER_VERTEX:
+      if (!key->vs_common_out.as_ls && !key->vs_common_out.as_es)
+         vs_info = &info->vs.outinfo;
+
+      /* TODO: Adjust as_ls/as_nng. */
+      if (!key->vs_common_out.as_ls && key->vs_common_out.as_ngg)
+         gather_info_output_decl_gs(nir, var, info);
+      break;
+   case MESA_SHADER_GEOMETRY:
+      vs_info = &info->vs.outinfo;
+      gather_info_output_decl_gs(nir, var, info);
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      if (!key->vs_common_out.as_es)
+         vs_info = &info->tes.outinfo;
+      break;
+   default:
+      break;
+   }
+
+   if (vs_info) {
+      switch (var->data.location) {
+      case VARYING_SLOT_CLIP_DIST0:
+         vs_info->clip_dist_mask = (1 << nir->info.clip_distance_array_size) - 1;
+         vs_info->cull_dist_mask = (1 << nir->info.cull_distance_array_size) - 1;
+         vs_info->cull_dist_mask <<= nir->info.clip_distance_array_size;
+         break;
+      case VARYING_SLOT_PSIZ:
+         vs_info->writes_pointsize = true;
+         break;
+      case VARYING_SLOT_VIEWPORT:
+         vs_info->writes_viewport_index = true;
+         break;
+      case VARYING_SLOT_LAYER:
+         vs_info->writes_layer = true;
+         break;
+      case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
+         vs_info->writes_primitive_shading_rate = true;
+         break;
+      default:
+         break;
+      }
+   }
 }
 
 static void
 gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info)
 {
-	nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
-	struct radv_streamout_info *so = &info->so;
-
-	if (!xfb)
-		return;
+   nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
+   struct radv_streamout_info *so = &info->so;
 
-	assert(xfb->output_count < MAX_SO_OUTPUTS);
-	so->num_outputs = xfb->output_count;
+   if (!xfb)
+      return;
 
-	for (unsigned i = 0; i < xfb->output_count; i++) {
-		struct radv_stream_output *output = &so->outputs[i];
+   assert(xfb->output_count < MAX_SO_OUTPUTS);
+   so->num_outputs = xfb->output_count;
 
-		output->buffer = xfb->outputs[i].buffer;
-		output->stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
-		output->offset = xfb->outputs[i].offset;
-		output->location = xfb->outputs[i].location;
-		output->component_mask = xfb->outputs[i].component_mask;
+   for (unsigned i = 0; i < xfb->output_count; i++) {
+      struct radv_stream_output *output = &so->outputs[i];
 
-		so->enabled_stream_buffers_mask |=
-			(1 << output->buffer) << (output->stream * 4);
+      output->buffer = xfb->outputs[i].buffer;
+      output->stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
+      output->offset = xfb->outputs[i].offset;
+      output->location = xfb->outputs[i].location;
+      output->component_mask = xfb->outputs[i].component_mask;
 
-	}
+      so->enabled_stream_buffers_mask |= (1 << output->buffer) << (output->stream * 4);
+   }
 
-	for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) {
-		so->strides[i] = xfb->buffers[i].stride / 4;
-	}
+   for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) {
+      so->strides[i] = xfb->buffers[i].stride / 4;
+   }
 
-	ralloc_free(xfb);
+   ralloc_free(xfb);
 }
 
 void
 radv_nir_shader_info_init(struct radv_shader_info *info)
 {
-	/* Assume that shaders only have 32-bit push constants by default. */
-	info->min_push_constant_used = UINT8_MAX;
-	info->has_only_32bit_push_constants = true;
+   /* Assume that shaders only have 32-bit push constants by default. */
+   info->min_push_constant_used = UINT8_MAX;
+   info->has_only_32bit_push_constants = true;
 }
 
 void
-radv_nir_shader_info_pass(const struct nir_shader *nir,
-			  const struct radv_pipeline_layout *layout,
-			  const struct radv_shader_variant_key *key,
-			  struct radv_shader_info *info)
+radv_nir_shader_info_pass(const struct nir_shader *nir, const struct radv_pipeline_layout *layout,
+                          const struct radv_shader_variant_key *key, struct radv_shader_info *info)
 {
-	struct nir_function *func =
-		(struct nir_function *)exec_list_get_head_const(&nir->functions);
-
-	if (layout && layout->dynamic_offset_count &&
-	    (layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) {
-		info->loads_push_constants = true;
-		info->loads_dynamic_offsets = true;
-	}
-
-	nir_foreach_shader_in_variable(variable, nir)
-		gather_info_input_decl(nir, variable, info, key);
-
-	nir_foreach_block(block, func->impl) {
-		gather_info_block(nir, block, info);
-	}
-
-	nir_foreach_shader_out_variable(variable, nir)
-		gather_info_output_decl(nir, variable, info, key);
-
-	if (nir->info.stage == MESA_SHADER_VERTEX ||
-	    nir->info.stage == MESA_SHADER_TESS_EVAL ||
-	    nir->info.stage == MESA_SHADER_GEOMETRY)
-		gather_xfb_info(nir, info);
-
-	/* Make sure to export the LayerID if the fragment shader needs it. */
-	if (key->vs_common_out.export_layer_id) {
-		switch (nir->info.stage) {
-		case MESA_SHADER_VERTEX:
-			info->vs.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
-			break;
-		case MESA_SHADER_TESS_EVAL:
-			info->tes.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
-			break;
-		case MESA_SHADER_GEOMETRY:
-			info->gs.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
-			break;
-		default:
-			break;
-		}
-	}
-
-	/* Make sure to export the LayerID if the subpass has multiviews. */
-	if (key->has_multiview_view_index) {
-		switch (nir->info.stage) {
-		case MESA_SHADER_VERTEX:
-			info->vs.outinfo.writes_layer = true;
-			break;
-		case MESA_SHADER_TESS_EVAL:
-			info->tes.outinfo.writes_layer = true;
-			break;
-		case MESA_SHADER_GEOMETRY:
-			info->vs.outinfo.writes_layer = true;
-			break;
-		default:
-			break;
-		}
-	}
-
-	/* Make sure to export the PrimitiveID if the fragment shader needs it. */
-	if (key->vs_common_out.export_prim_id) {
-		switch (nir->info.stage) {
-		case MESA_SHADER_VERTEX:
-			info->vs.outinfo.export_prim_id = true;
-			break;
-		case MESA_SHADER_TESS_EVAL:
-			info->tes.outinfo.export_prim_id = true;
-			break;
-		case MESA_SHADER_GEOMETRY:
-			info->vs.outinfo.export_prim_id = true;
-			break;
-		default:
-			break;
-		}
-	}
-
-	/* Make sure to export the ViewportIndex if the fragment shader needs it. */
-	if (key->vs_common_out.export_viewport_index) {
-		switch (nir->info.stage) {
-		case MESA_SHADER_VERTEX:
-			info->vs.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
-			break;
-		case MESA_SHADER_TESS_EVAL:
-			info->tes.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
-			break;
-		case MESA_SHADER_GEOMETRY:
-			info->gs.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
-			break;
-		default:
-			break;
-		}
-	}
-
-	if (nir->info.stage == MESA_SHADER_FRAGMENT)
-		info->ps.num_interp = nir->num_inputs;
-
-	switch (nir->info.stage) {
-        case MESA_SHADER_COMPUTE:
-                for (int i = 0; i < 3; ++i)
-                        info->cs.block_size[i] = nir->info.cs.local_size[i];
-                break;
-        case MESA_SHADER_FRAGMENT:
-		info->ps.can_discard = nir->info.fs.uses_discard;
-                info->ps.early_fragment_test = nir->info.fs.early_fragment_tests;
-                info->ps.post_depth_coverage = nir->info.fs.post_depth_coverage;
-                info->ps.depth_layout = nir->info.fs.depth_layout;
-                info->ps.uses_sample_shading = nir->info.fs.uses_sample_shading;
-                break;
-        case MESA_SHADER_GEOMETRY:
-                info->gs.vertices_in = nir->info.gs.vertices_in;
-                info->gs.vertices_out = nir->info.gs.vertices_out;
-                info->gs.output_prim = nir->info.gs.output_primitive;
-                info->gs.invocations = nir->info.gs.invocations;
-                break;
-        case MESA_SHADER_TESS_EVAL:
-                info->tes.primitive_mode = nir->info.tess.primitive_mode;
-                info->tes.spacing = nir->info.tess.spacing;
-                info->tes.ccw = nir->info.tess.ccw;
-                info->tes.point_mode = nir->info.tess.point_mode;
-                info->tes.as_es = key->vs_common_out.as_es;
-                info->tes.export_prim_id = key->vs_common_out.export_prim_id;
-                info->is_ngg = key->vs_common_out.as_ngg;
-                info->is_ngg_passthrough = key->vs_common_out.as_ngg_passthrough;
-                break;
-        case MESA_SHADER_TESS_CTRL:
-                info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
-                break;
-        case MESA_SHADER_VERTEX:
-                info->vs.as_es = key->vs_common_out.as_es;
-                info->vs.as_ls = key->vs_common_out.as_ls;
-                info->vs.export_prim_id = key->vs_common_out.export_prim_id;
-                info->is_ngg = key->vs_common_out.as_ngg;
-                info->is_ngg_passthrough = key->vs_common_out.as_ngg_passthrough;
-                break;
-        default:
-                break;
-        }
-
-	if (nir->info.stage == MESA_SHADER_GEOMETRY) {
-		unsigned add_clip = nir->info.clip_distance_array_size +
-				    nir->info.cull_distance_array_size > 4;
-		info->gs.gsvs_vertex_size =
-			(util_bitcount64(nir->info.outputs_written) + add_clip) * 16;
-		info->gs.max_gsvs_emit_size =
-			info->gs.gsvs_vertex_size * nir->info.gs.vertices_out;
-	}
-
-	/* Compute the ESGS item size for VS or TES as ES. */
-	if ((nir->info.stage == MESA_SHADER_VERTEX ||
-	     nir->info.stage == MESA_SHADER_TESS_EVAL) &&
-	    key->vs_common_out.as_es) {
-		struct radv_es_output_info *es_info =
-			nir->info.stage == MESA_SHADER_VERTEX ? &info->vs.es_info : &info->tes.es_info;
-		uint32_t num_outputs_written = nir->info.stage == MESA_SHADER_VERTEX
-			? info->vs.num_linked_outputs
-			: info->tes.num_linked_outputs;
-		es_info->esgs_itemsize = num_outputs_written * 16;
-	}
-
-	info->float_controls_mode = nir->info.float_controls_execution_mode;
-
-	if (nir->info.stage == MESA_SHADER_FRAGMENT) {
-		info->ps.allow_flat_shading =
-			!(info->ps.uses_persp_or_linear_interp ||
-			  info->ps.needs_sample_positions ||
-			  info->ps.writes_memory ||
-			  nir->info.fs.needs_quad_helper_invocations ||
-			  BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
-			  BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
-			  BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
-			  BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
-			  BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
-			  BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION));
-	}
+   struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
+
+   if (layout && layout->dynamic_offset_count &&
+       (layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) {
+      info->loads_push_constants = true;
+      info->loads_dynamic_offsets = true;
+   }
+
+   nir_foreach_shader_in_variable (variable, nir)
+      gather_info_input_decl(nir, variable, info, key);
+
+   nir_foreach_block (block, func->impl) {
+      gather_info_block(nir, block, info);
+   }
+
+   nir_foreach_shader_out_variable(variable, nir) gather_info_output_decl(nir, variable, info, key);
+
+   if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ||
+       nir->info.stage == MESA_SHADER_GEOMETRY)
+      gather_xfb_info(nir, info);
+
+   /* Make sure to export the LayerID if the fragment shader needs it. */
+   if (key->vs_common_out.export_layer_id) {
+      switch (nir->info.stage) {
+      case MESA_SHADER_VERTEX:
+         info->vs.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
+         break;
+      case MESA_SHADER_TESS_EVAL:
+         info->tes.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
+         break;
+      case MESA_SHADER_GEOMETRY:
+         info->gs.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
+         break;
+      default:
+         break;
+      }
+   }
+
+   /* Make sure to export the LayerID if the subpass has multiviews. */
+   if (key->has_multiview_view_index) {
+      switch (nir->info.stage) {
+      case MESA_SHADER_VERTEX:
+         info->vs.outinfo.writes_layer = true;
+         break;
+      case MESA_SHADER_TESS_EVAL:
+         info->tes.outinfo.writes_layer = true;
+         break;
+      case MESA_SHADER_GEOMETRY:
+         info->vs.outinfo.writes_layer = true;
+         break;
+      default:
+         break;
+      }
+   }
+
+   /* Make sure to export the PrimitiveID if the fragment shader needs it. */
+   if (key->vs_common_out.export_prim_id) {
+      switch (nir->info.stage) {
+      case MESA_SHADER_VERTEX:
+         info->vs.outinfo.export_prim_id = true;
+         break;
+      case MESA_SHADER_TESS_EVAL:
+         info->tes.outinfo.export_prim_id = true;
+         break;
+      case MESA_SHADER_GEOMETRY:
+         info->vs.outinfo.export_prim_id = true;
+         break;
+      default:
+         break;
+      }
+   }
+
+   /* Make sure to export the ViewportIndex if the fragment shader needs it. */
+   if (key->vs_common_out.export_viewport_index) {
+      switch (nir->info.stage) {
+      case MESA_SHADER_VERTEX:
+         info->vs.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
+         break;
+      case MESA_SHADER_TESS_EVAL:
+         info->tes.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
+         break;
+      case MESA_SHADER_GEOMETRY:
+         info->gs.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
+         break;
+      default:
+         break;
+      }
+   }
+
+   if (nir->info.stage == MESA_SHADER_FRAGMENT)
+      info->ps.num_interp = nir->num_inputs;
+
+   switch (nir->info.stage) {
+   case MESA_SHADER_COMPUTE:
+      for (int i = 0; i < 3; ++i)
+         info->cs.block_size[i] = nir->info.cs.local_size[i];
+      break;
+   case MESA_SHADER_FRAGMENT:
+      info->ps.can_discard = nir->info.fs.uses_discard;
+      info->ps.early_fragment_test = nir->info.fs.early_fragment_tests;
+      info->ps.post_depth_coverage = nir->info.fs.post_depth_coverage;
+      info->ps.depth_layout = nir->info.fs.depth_layout;
+      info->ps.uses_sample_shading = nir->info.fs.uses_sample_shading;
+      break;
+   case MESA_SHADER_GEOMETRY:
+      info->gs.vertices_in = nir->info.gs.vertices_in;
+      info->gs.vertices_out = nir->info.gs.vertices_out;
+      info->gs.output_prim = nir->info.gs.output_primitive;
+      info->gs.invocations = nir->info.gs.invocations;
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      info->tes.primitive_mode = nir->info.tess.primitive_mode;
+      info->tes.spacing = nir->info.tess.spacing;
+      info->tes.ccw = nir->info.tess.ccw;
+      info->tes.point_mode = nir->info.tess.point_mode;
+      info->tes.as_es = key->vs_common_out.as_es;
+      info->tes.export_prim_id = key->vs_common_out.export_prim_id;
+      info->is_ngg = key->vs_common_out.as_ngg;
+      info->is_ngg_passthrough = key->vs_common_out.as_ngg_passthrough;
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
+      break;
+   case MESA_SHADER_VERTEX:
+      info->vs.as_es = key->vs_common_out.as_es;
+      info->vs.as_ls = key->vs_common_out.as_ls;
+      info->vs.export_prim_id = key->vs_common_out.export_prim_id;
+      info->is_ngg = key->vs_common_out.as_ngg;
+      info->is_ngg_passthrough = key->vs_common_out.as_ngg_passthrough;
+      break;
+   default:
+      break;
+   }
+
+   if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+      unsigned add_clip =
+         nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4;
+      info->gs.gsvs_vertex_size = (util_bitcount64(nir->info.outputs_written) + add_clip) * 16;
+      info->gs.max_gsvs_emit_size = info->gs.gsvs_vertex_size * nir->info.gs.vertices_out;
+   }
+
+   /* Compute the ESGS item size for VS or TES as ES. */
+   if ((nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
+       key->vs_common_out.as_es) {
+      struct radv_es_output_info *es_info =
+         nir->info.stage == MESA_SHADER_VERTEX ? &info->vs.es_info : &info->tes.es_info;
+      uint32_t num_outputs_written = nir->info.stage == MESA_SHADER_VERTEX
+                                        ? info->vs.num_linked_outputs
+                                        : info->tes.num_linked_outputs;
+      es_info->esgs_itemsize = num_outputs_written * 16;
+   }
+
+   info->float_controls_mode = nir->info.float_controls_execution_mode;
+
+   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      info->ps.allow_flat_shading =
+         !(info->ps.uses_persp_or_linear_interp || info->ps.needs_sample_positions ||
+           info->ps.writes_memory || nir->info.fs.needs_quad_helper_invocations ||
+           BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
+           BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
+           BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
+           BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
+           BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
+           BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION));
+   }
 }
diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c
index bde7924c40e..ed3a6e8168f 100644
--- a/src/amd/vulkan/radv_sqtt.c
+++ b/src/amd/vulkan/radv_sqtt.c
@@ -23,8 +23,8 @@
 
 #include <inttypes.h>
 
-#include "radv_private.h"
 #include "radv_cs.h"
+#include "radv_private.h"
 #include "sid.h"
 
 #define SQTT_BUFFER_ALIGN_SHIFT 12
@@ -32,650 +32,604 @@
 static bool
 radv_se_is_disabled(struct radv_device *device, unsigned se)
 {
-	/* No active CU on the SE means it is disabled. */
-	return device->physical_device->rad_info.cu_mask[se][0] == 0;
+   /* No active CU on the SE means it is disabled. */
+   return device->physical_device->rad_info.cu_mask[se][0] == 0;
 }
 
 static void
-radv_emit_thread_trace_start(struct radv_device *device,
-			     struct radeon_cmdbuf *cs,
-			     uint32_t queue_family_index)
+radv_emit_thread_trace_start(struct radv_device *device, struct radeon_cmdbuf *cs,
+                             uint32_t queue_family_index)
 {
-	uint32_t shifted_size = device->thread_trace.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
-	struct radeon_info *rad_info = &device->physical_device->rad_info;
-	unsigned max_se = rad_info->max_se;
-
-	assert(device->physical_device->rad_info.chip_class >= GFX8);
-
-	for (unsigned se = 0; se < max_se; se++) {
-		uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
-		uint64_t data_va = ac_thread_trace_get_data_va(rad_info, &device->thread_trace, va, se);
-		uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
-		int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
-
-		if (radv_se_is_disabled(device, se))
-			continue;
-
-		/* Target SEx and SH0. */
-		radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
-				       S_030800_SE_INDEX(se) |
-				       S_030800_SH_INDEX(0) |
-				       S_030800_INSTANCE_BROADCAST_WRITES(1));
-
-		if (device->physical_device->rad_info.chip_class >= GFX10) {
-			/* Order seems important for the following 2 registers. */
-			radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
-							 S_008D04_SIZE(shifted_size) |
-							 S_008D04_BASE_HI(shifted_va >> 32));
-
-			radeon_set_privileged_config_reg(cs, R_008D00_SQ_THREAD_TRACE_BUF0_BASE,
-							 S_008D00_BASE_LO(shifted_va));
-
-			radeon_set_privileged_config_reg(cs, R_008D14_SQ_THREAD_TRACE_MASK,
-							 S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
-							 S_008D14_SA_SEL(0) |
-							 S_008D14_WGP_SEL(first_active_cu / 2) |
-							 S_008D14_SIMD_SEL(0));
-
-			uint32_t thread_trace_token_mask =
-				S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC |
-						     V_008D18_REG_INCLUDE_SHDEC |
-						     V_008D18_REG_INCLUDE_GFXUDEC |
-						     V_008D18_REG_INCLUDE_COMP |
-						     V_008D18_REG_INCLUDE_CONTEXT |
-						     V_008D18_REG_INCLUDE_CONFIG);
-
-			/* Performance counters with SQTT are considered
-			 * deprecated.
-			 */
-			thread_trace_token_mask |= S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF);
-
-			radeon_set_privileged_config_reg(cs, R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
-							 thread_trace_token_mask);
-
-			uint32_t thread_trace_ctrl = S_008D1C_MODE(1) |
-						     S_008D1C_HIWATER(5) |
-						     S_008D1C_UTIL_TIMER(1) |
-						     S_008D1C_RT_FREQ(2) | /* 4096 clk */
-						     S_008D1C_DRAW_EVENT_EN(1) |
-						     S_008D1C_REG_STALL_EN(1) |
-						     S_008D1C_SPI_STALL_EN(1) |
-						     S_008D1C_SQ_STALL_EN(1) |
-						     S_008D1C_REG_DROP_ON_STALL(0);
-
-			if (device->physical_device->rad_info.chip_class == GFX10_3)
-				thread_trace_ctrl |= S_008D1C_LOWATER_OFFSET(4);
-
-			/* Should be emitted last (it enables thread traces). */
-			radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL,
-							 thread_trace_ctrl);
-		} else {
-			/* Order seems important for the following 4 registers. */
-			radeon_set_uconfig_reg(cs, R_030CDC_SQ_THREAD_TRACE_BASE2,
-					       S_030CDC_ADDR_HI(shifted_va >> 32));
-
-			radeon_set_uconfig_reg(cs, R_030CC0_SQ_THREAD_TRACE_BASE,
-					       S_030CC0_ADDR(shifted_va));
-
-			radeon_set_uconfig_reg(cs, R_030CC4_SQ_THREAD_TRACE_SIZE,
-					       S_030CC4_SIZE(shifted_size));
-
-			radeon_set_uconfig_reg(cs, R_030CD4_SQ_THREAD_TRACE_CTRL,
-					       S_030CD4_RESET_BUFFER(1));
-
-			uint32_t thread_trace_mask = S_030CC8_CU_SEL(first_active_cu) |
-						     S_030CC8_SH_SEL(0) |
-						     S_030CC8_SIMD_EN(0xf) |
-						     S_030CC8_VM_ID_MASK(0) |
-						     S_030CC8_REG_STALL_EN(1) |
-						     S_030CC8_SPI_STALL_EN(1) |
-						     S_030CC8_SQ_STALL_EN(1);
-
-			if (device->physical_device->rad_info.chip_class < GFX9) {
-				thread_trace_mask |= S_030CC8_RANDOM_SEED(0xffff);
-			}
-
-			radeon_set_uconfig_reg(cs, R_030CC8_SQ_THREAD_TRACE_MASK,
-					       thread_trace_mask);
-
-			/* Trace all tokens and registers. */
-			radeon_set_uconfig_reg(cs, R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
-					       S_030CCC_TOKEN_MASK(0xbfff) |
-					       S_030CCC_REG_MASK(0xff) |
-					       S_030CCC_REG_DROP_ON_STALL(0));
-
-			/* Enable SQTT perf counters for all CUs. */
-			radeon_set_uconfig_reg(cs, R_030CD0_SQ_THREAD_TRACE_PERF_MASK,
-					       S_030CD0_SH0_MASK(0xffff) |
-					       S_030CD0_SH1_MASK(0xffff));
-
-			radeon_set_uconfig_reg(cs, R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2,
-					       S_030CE0_INST_MASK(0xffffffff));
-
-			radeon_set_uconfig_reg(cs, R_030CEC_SQ_THREAD_TRACE_HIWATER,
-					       S_030CEC_HIWATER(4));
-
-			if (device->physical_device->rad_info.chip_class == GFX9) {
-				/* Reset thread trace status errors. */
-				radeon_set_uconfig_reg(cs, R_030CE8_SQ_THREAD_TRACE_STATUS,
-						       S_030CE8_UTC_ERROR(0));
-			}
-
-			/* Enable the thread trace mode. */
-			uint32_t thread_trace_mode = S_030CD8_MASK_PS(1) |
-						     S_030CD8_MASK_VS(1) |
-						     S_030CD8_MASK_GS(1) |
-						     S_030CD8_MASK_ES(1) |
-						     S_030CD8_MASK_HS(1) |
-						     S_030CD8_MASK_LS(1) |
-						     S_030CD8_MASK_CS(1) |
-						     S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
-						     S_030CD8_MODE(1);
-
-			if (device->physical_device->rad_info.chip_class == GFX9) {
-				/* Count SQTT traffic in TCC perf counters. */
-				thread_trace_mode |= S_030CD8_TC_PERF_EN(1);
-			}
-
-			radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE,
-					       thread_trace_mode);
-		}
-	}
-
-	/* Restore global broadcasting. */
-	radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
-		               S_030800_SE_BROADCAST_WRITES(1) |
-			       S_030800_SH_BROADCAST_WRITES(1) |
-			       S_030800_INSTANCE_BROADCAST_WRITES(1));
-
-	/* Start the thread trace with a different event based on the queue. */
-	if (queue_family_index == RADV_QUEUE_COMPUTE &&
-	    device->physical_device->rad_info.chip_class >= GFX7) {
-		radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
-				  S_00B878_THREAD_TRACE_ENABLE(1));
-	} else {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
-	}
+   uint32_t shifted_size = device->thread_trace.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
+   struct radeon_info *rad_info = &device->physical_device->rad_info;
+   unsigned max_se = rad_info->max_se;
+
+   assert(device->physical_device->rad_info.chip_class >= GFX8);
+
+   for (unsigned se = 0; se < max_se; se++) {
+      uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
+      uint64_t data_va = ac_thread_trace_get_data_va(rad_info, &device->thread_trace, va, se);
+      uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
+      int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
+
+      if (radv_se_is_disabled(device, se))
+         continue;
+
+      /* Target SEx and SH0. */
+      radeon_set_uconfig_reg(
+         cs, R_030800_GRBM_GFX_INDEX,
+         S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+      if (device->physical_device->rad_info.chip_class >= GFX10) {
+         /* Order seems important for the following 2 registers. */
+         radeon_set_privileged_config_reg(
+            cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
+            S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32));
+
+         radeon_set_privileged_config_reg(cs, R_008D00_SQ_THREAD_TRACE_BUF0_BASE,
+                                          S_008D00_BASE_LO(shifted_va));
+
+         radeon_set_privileged_config_reg(
+            cs, R_008D14_SQ_THREAD_TRACE_MASK,
+            S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
+               S_008D14_SA_SEL(0) | S_008D14_WGP_SEL(first_active_cu / 2) | S_008D14_SIMD_SEL(0));
+
+         uint32_t thread_trace_token_mask = S_008D18_REG_INCLUDE(
+            V_008D18_REG_INCLUDE_SQDEC | V_008D18_REG_INCLUDE_SHDEC | V_008D18_REG_INCLUDE_GFXUDEC |
+            V_008D18_REG_INCLUDE_COMP | V_008D18_REG_INCLUDE_CONTEXT | V_008D18_REG_INCLUDE_CONFIG);
+
+         /* Performance counters with SQTT are considered
+          * deprecated.
+          */
+         thread_trace_token_mask |= S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF);
+
+         radeon_set_privileged_config_reg(cs, R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
+                                          thread_trace_token_mask);
+
+         uint32_t thread_trace_ctrl = S_008D1C_MODE(1) | S_008D1C_HIWATER(5) |
+                                      S_008D1C_UTIL_TIMER(1) | S_008D1C_RT_FREQ(2) | /* 4096 clk */
+                                      S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) |
+                                      S_008D1C_SPI_STALL_EN(1) | S_008D1C_SQ_STALL_EN(1) |
+                                      S_008D1C_REG_DROP_ON_STALL(0);
+
+         if (device->physical_device->rad_info.chip_class == GFX10_3)
+            thread_trace_ctrl |= S_008D1C_LOWATER_OFFSET(4);
+
+         /* Should be emitted last (it enables thread traces). */
+         radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL, thread_trace_ctrl);
+      } else {
+         /* Order seems important for the following 4 registers. */
+         radeon_set_uconfig_reg(cs, R_030CDC_SQ_THREAD_TRACE_BASE2,
+                                S_030CDC_ADDR_HI(shifted_va >> 32));
+
+         radeon_set_uconfig_reg(cs, R_030CC0_SQ_THREAD_TRACE_BASE, S_030CC0_ADDR(shifted_va));
+
+         radeon_set_uconfig_reg(cs, R_030CC4_SQ_THREAD_TRACE_SIZE, S_030CC4_SIZE(shifted_size));
+
+         radeon_set_uconfig_reg(cs, R_030CD4_SQ_THREAD_TRACE_CTRL, S_030CD4_RESET_BUFFER(1));
+
+         uint32_t thread_trace_mask = S_030CC8_CU_SEL(first_active_cu) | S_030CC8_SH_SEL(0) |
+                                      S_030CC8_SIMD_EN(0xf) | S_030CC8_VM_ID_MASK(0) |
+                                      S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
+                                      S_030CC8_SQ_STALL_EN(1);
+
+         if (device->physical_device->rad_info.chip_class < GFX9) {
+            thread_trace_mask |= S_030CC8_RANDOM_SEED(0xffff);
+         }
+
+         radeon_set_uconfig_reg(cs, R_030CC8_SQ_THREAD_TRACE_MASK, thread_trace_mask);
+
+         /* Trace all tokens and registers. */
+         radeon_set_uconfig_reg(
+            cs, R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
+            S_030CCC_TOKEN_MASK(0xbfff) | S_030CCC_REG_MASK(0xff) | S_030CCC_REG_DROP_ON_STALL(0));
+
+         /* Enable SQTT perf counters for all CUs. */
+         radeon_set_uconfig_reg(cs, R_030CD0_SQ_THREAD_TRACE_PERF_MASK,
+                                S_030CD0_SH0_MASK(0xffff) | S_030CD0_SH1_MASK(0xffff));
+
+         radeon_set_uconfig_reg(cs, R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2,
+                                S_030CE0_INST_MASK(0xffffffff));
+
+         radeon_set_uconfig_reg(cs, R_030CEC_SQ_THREAD_TRACE_HIWATER, S_030CEC_HIWATER(4));
+
+         if (device->physical_device->rad_info.chip_class == GFX9) {
+            /* Reset thread trace status errors. */
+            radeon_set_uconfig_reg(cs, R_030CE8_SQ_THREAD_TRACE_STATUS, S_030CE8_UTC_ERROR(0));
+         }
+
+         /* Enable the thread trace mode. */
+         uint32_t thread_trace_mode =
+            S_030CD8_MASK_PS(1) | S_030CD8_MASK_VS(1) | S_030CD8_MASK_GS(1) | S_030CD8_MASK_ES(1) |
+            S_030CD8_MASK_HS(1) | S_030CD8_MASK_LS(1) | S_030CD8_MASK_CS(1) |
+            S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
+            S_030CD8_MODE(1);
+
+         if (device->physical_device->rad_info.chip_class == GFX9) {
+            /* Count SQTT traffic in TCC perf counters. */
+            thread_trace_mode |= S_030CD8_TC_PERF_EN(1);
+         }
+
+         radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE, thread_trace_mode);
+      }
+   }
+
+   /* Restore global broadcasting. */
+   radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
+                          S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+                             S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+   /* Start the thread trace with a different event based on the queue. */
+   if (queue_family_index == RADV_QUEUE_COMPUTE &&
+       device->physical_device->rad_info.chip_class >= GFX7) {
+      radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, S_00B878_THREAD_TRACE_ENABLE(1));
+   } else {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
+   }
 }
 
-static const uint32_t gfx8_thread_trace_info_regs[] =
-{
-	R_030CE4_SQ_THREAD_TRACE_WPTR,
-	R_030CE8_SQ_THREAD_TRACE_STATUS,
-	R_008E40_SQ_THREAD_TRACE_CNTR,
+static const uint32_t gfx8_thread_trace_info_regs[] = {
+   R_030CE4_SQ_THREAD_TRACE_WPTR,
+   R_030CE8_SQ_THREAD_TRACE_STATUS,
+   R_008E40_SQ_THREAD_TRACE_CNTR,
 };
 
-static const uint32_t gfx9_thread_trace_info_regs[] =
-{
-	R_030CE4_SQ_THREAD_TRACE_WPTR,
-	R_030CE8_SQ_THREAD_TRACE_STATUS,
-	R_030CF0_SQ_THREAD_TRACE_CNTR,
+static const uint32_t gfx9_thread_trace_info_regs[] = {
+   R_030CE4_SQ_THREAD_TRACE_WPTR,
+   R_030CE8_SQ_THREAD_TRACE_STATUS,
+   R_030CF0_SQ_THREAD_TRACE_CNTR,
 };
 
-static const uint32_t gfx10_thread_trace_info_regs[] =
-{
-	R_008D10_SQ_THREAD_TRACE_WPTR,
-	R_008D20_SQ_THREAD_TRACE_STATUS,
-	R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
+static const uint32_t gfx10_thread_trace_info_regs[] = {
+   R_008D10_SQ_THREAD_TRACE_WPTR,
+   R_008D20_SQ_THREAD_TRACE_STATUS,
+   R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
 };
 
 static void
-radv_copy_thread_trace_info_regs(struct radv_device *device,
-				 struct radeon_cmdbuf *cs,
-				 unsigned se_index)
+radv_copy_thread_trace_info_regs(struct radv_device *device, struct radeon_cmdbuf *cs,
+                                 unsigned se_index)
 {
-	const uint32_t *thread_trace_info_regs = NULL;
-
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		thread_trace_info_regs = gfx10_thread_trace_info_regs;
-	} else if (device->physical_device->rad_info.chip_class == GFX9) {
-		thread_trace_info_regs = gfx9_thread_trace_info_regs;
-	} else {
-		assert(device->physical_device->rad_info.chip_class == GFX8);
-		thread_trace_info_regs = gfx8_thread_trace_info_regs;
-	}
-
-	/* Get the VA where the info struct is stored for this SE. */
-	uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
-	uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
-
-	/* Copy back the info struct one DWORD at a time. */
-	for (unsigned i = 0; i < 3; i++) {
-		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
-				COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
-				COPY_DATA_WR_CONFIRM);
-		radeon_emit(cs, thread_trace_info_regs[i] >> 2);
-		radeon_emit(cs, 0); /* unused */
-		radeon_emit(cs, (info_va + i * 4));
-		radeon_emit(cs, (info_va + i * 4) >> 32);
-	}
+   const uint32_t *thread_trace_info_regs = NULL;
+
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      thread_trace_info_regs = gfx10_thread_trace_info_regs;
+   } else if (device->physical_device->rad_info.chip_class == GFX9) {
+      thread_trace_info_regs = gfx9_thread_trace_info_regs;
+   } else {
+      assert(device->physical_device->rad_info.chip_class == GFX8);
+      thread_trace_info_regs = gfx8_thread_trace_info_regs;
+   }
+
+   /* Get the VA where the info struct is stored for this SE. */
+   uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
+   uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
+
+   /* Copy back the info struct one DWORD at a time. */
+   for (unsigned i = 0; i < 3; i++) {
+      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
+                         COPY_DATA_WR_CONFIRM);
+      radeon_emit(cs, thread_trace_info_regs[i] >> 2);
+      radeon_emit(cs, 0); /* unused */
+      radeon_emit(cs, (info_va + i * 4));
+      radeon_emit(cs, (info_va + i * 4) >> 32);
+   }
 }
 
 static void
-radv_emit_thread_trace_stop(struct radv_device *device,
-			    struct radeon_cmdbuf *cs,
-			    uint32_t queue_family_index)
+radv_emit_thread_trace_stop(struct radv_device *device, struct radeon_cmdbuf *cs,
+                            uint32_t queue_family_index)
 {
-	unsigned max_se = device->physical_device->rad_info.max_se;
-
-	assert(device->physical_device->rad_info.chip_class >= GFX8);
-
-	/* Stop the thread trace with a different event based on the queue. */
-	if (queue_family_index == RADV_QUEUE_COMPUTE &&
-	    device->physical_device->rad_info.chip_class >= GFX7) {
-		radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
-				  S_00B878_THREAD_TRACE_ENABLE(0));
-	} else {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0));
-	}
-
-	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-	radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
-
-	for (unsigned se = 0; se < max_se; se++) {
-		if (radv_se_is_disabled(device, se))
-			continue;
-
-		/* Target SEi and SH0. */
-		radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
-				       S_030800_SE_INDEX(se) |
-				       S_030800_SH_INDEX(0) |
-				       S_030800_INSTANCE_BROADCAST_WRITES(1));
-
-		if (device->physical_device->rad_info.chip_class >= GFX10) {
-			/* Make sure to wait for the trace buffer. */
-			radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-			radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
-			radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2);  /* register */
-			radeon_emit(cs, 0);
-			radeon_emit(cs, 0); /* reference value */
-			radeon_emit(cs, S_008D20_FINISH_DONE(1)); /* mask */
-			radeon_emit(cs, 4); /* poll interval */
-
-			/* Disable the thread trace mode. */
-			radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL,
-							 S_008D1C_MODE(0));
-
-			/* Wait for thread trace completion. */
-			radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-			radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
-			radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2);  /* register */
-			radeon_emit(cs, 0);
-			radeon_emit(cs, 0); /* reference value */
-			radeon_emit(cs, S_008D20_BUSY(1)); /* mask */
-			radeon_emit(cs, 4); /* poll interval */
-		} else {
-			/* Disable the thread trace mode. */
-			radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE,
-					       S_030CD8_MODE(0));
-
-			/* Wait for thread trace completion. */
-			radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-			radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
-			radeon_emit(cs, R_030CE8_SQ_THREAD_TRACE_STATUS >> 2);  /* register */
-			radeon_emit(cs, 0);
-			radeon_emit(cs, 0); /* reference value */
-			radeon_emit(cs, S_030CE8_BUSY(1)); /* mask */
-			radeon_emit(cs, 4); /* poll interval */
-		}
-
-		radv_copy_thread_trace_info_regs(device, cs, se);
-	}
-
-	/* Restore global broadcasting. */
-	radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
-		               S_030800_SE_BROADCAST_WRITES(1) |
-			       S_030800_SH_BROADCAST_WRITES(1) |
-			       S_030800_INSTANCE_BROADCAST_WRITES(1));
+   unsigned max_se = device->physical_device->rad_info.max_se;
+
+   assert(device->physical_device->rad_info.chip_class >= GFX8);
+
+   /* Stop the thread trace with a different event based on the queue. */
+   if (queue_family_index == RADV_QUEUE_COMPUTE &&
+       device->physical_device->rad_info.chip_class >= GFX7) {
+      radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, S_00B878_THREAD_TRACE_ENABLE(0));
+   } else {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0));
+   }
+
+   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+   radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
+
+   for (unsigned se = 0; se < max_se; se++) {
+      if (radv_se_is_disabled(device, se))
+         continue;
+
+      /* Target SEi and SH0. */
+      radeon_set_uconfig_reg(
+         cs, R_030800_GRBM_GFX_INDEX,
+         S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+      if (device->physical_device->rad_info.chip_class >= GFX10) {
+         /* Make sure to wait for the trace buffer. */
+         radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+         radeon_emit(
+            cs,
+            WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
+         radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
+         radeon_emit(cs, 0);
+         radeon_emit(cs, 0);                       /* reference value */
+         radeon_emit(cs, S_008D20_FINISH_DONE(1)); /* mask */
+         radeon_emit(cs, 4);                       /* poll interval */
+
+         /* Disable the thread trace mode. */
+         radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL, S_008D1C_MODE(0));
+
+         /* Wait for thread trace completion. */
+         radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+         radeon_emit(
+            cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+         radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
+         radeon_emit(cs, 0);
+         radeon_emit(cs, 0);                /* reference value */
+         radeon_emit(cs, S_008D20_BUSY(1)); /* mask */
+         radeon_emit(cs, 4);                /* poll interval */
+      } else {
+         /* Disable the thread trace mode. */
+         radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE, S_030CD8_MODE(0));
+
+         /* Wait for thread trace completion. */
+         radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+         radeon_emit(
+            cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+         radeon_emit(cs, R_030CE8_SQ_THREAD_TRACE_STATUS >> 2); /* register */
+         radeon_emit(cs, 0);
+         radeon_emit(cs, 0);                /* reference value */
+         radeon_emit(cs, S_030CE8_BUSY(1)); /* mask */
+         radeon_emit(cs, 4);                /* poll interval */
+      }
+
+      radv_copy_thread_trace_info_regs(device, cs, se);
+   }
+
+   /* Restore global broadcasting. */
+   radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
+                          S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+                             S_030800_INSTANCE_BROADCAST_WRITES(1));
 }
 
 void
-radv_emit_thread_trace_userdata(const struct radv_device *device,
-				struct radeon_cmdbuf *cs,
-				const void *data, uint32_t num_dwords)
+radv_emit_thread_trace_userdata(const struct radv_device *device, struct radeon_cmdbuf *cs,
+                                const void *data, uint32_t num_dwords)
 {
-	const uint32_t *dwords = (uint32_t *)data;
+   const uint32_t *dwords = (uint32_t *)data;
 
-	while (num_dwords > 0) {
-		uint32_t count = MIN2(num_dwords, 2);
+   while (num_dwords > 0) {
+      uint32_t count = MIN2(num_dwords, 2);
 
-		radeon_check_space(device->ws, cs, 2 + count);
+      radeon_check_space(device->ws, cs, 2 + count);
 
-		/* Without the perfctr bit the CP might not always pass the
-		 * write on correctly. */
-		if (device->physical_device->rad_info.chip_class >= GFX10)
-			radeon_set_uconfig_reg_seq_perfctr(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
-		else
-			radeon_set_uconfig_reg_seq(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
-		radeon_emit_array(cs, dwords, count);
+      /* Without the perfctr bit the CP might not always pass the
+       * write on correctly. */
+      if (device->physical_device->rad_info.chip_class >= GFX10)
+         radeon_set_uconfig_reg_seq_perfctr(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
+      else
+         radeon_set_uconfig_reg_seq(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
+      radeon_emit_array(cs, dwords, count);
 
-		dwords += count;
-		num_dwords -= count;
-	}
+      dwords += count;
+      num_dwords -= count;
+   }
 }
 
 static void
-radv_emit_spi_config_cntl(struct radv_device *device,
-			  struct radeon_cmdbuf *cs, bool enable)
+radv_emit_spi_config_cntl(struct radv_device *device, struct radeon_cmdbuf *cs, bool enable)
 {
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) |
-					   S_031100_EXP_PRIORITY_ORDER(3) |
-					   S_031100_ENABLE_SQG_TOP_EVENTS(enable) |
-					   S_031100_ENABLE_SQG_BOP_EVENTS(enable);
-
-		if (device->physical_device->rad_info.chip_class >= GFX10)
-			spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
-
-		radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
-	} else {
-		/* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */
-		radeon_set_privileged_config_reg(cs, R_009100_SPI_CONFIG_CNTL,
-						 S_009100_ENABLE_SQG_TOP_EVENTS(enable) |
-						 S_009100_ENABLE_SQG_BOP_EVENTS(enable));
-	}
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      uint32_t spi_config_cntl =
+         S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) |
+         S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable);
+
+      if (device->physical_device->rad_info.chip_class >= GFX10)
+         spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
+
+      radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
+   } else {
+      /* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */
+      radeon_set_privileged_config_reg(
+         cs, R_009100_SPI_CONFIG_CNTL,
+         S_009100_ENABLE_SQG_TOP_EVENTS(enable) | S_009100_ENABLE_SQG_BOP_EVENTS(enable));
+   }
 }
 
 static void
-radv_emit_inhibit_clockgating(struct radv_device *device,
-			      struct radeon_cmdbuf *cs, bool inhibit)
+radv_emit_inhibit_clockgating(struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit)
 {
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL,
-				       S_037390_PERFMON_CLOCK_STATE(inhibit));
-	} else if (device->physical_device->rad_info.chip_class >= GFX8) {
-		radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL,
-				       S_0372FC_PERFMON_CLOCK_STATE(inhibit));
-	}
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL,
+                             S_037390_PERFMON_CLOCK_STATE(inhibit));
+   } else if (device->physical_device->rad_info.chip_class >= GFX8) {
+      radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL,
+                             S_0372FC_PERFMON_CLOCK_STATE(inhibit));
+   }
 }
 
 static void
-radv_emit_wait_for_idle(struct radv_device *device,
-			struct radeon_cmdbuf *cs, int family)
+radv_emit_wait_for_idle(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
 {
-	enum rgp_flush_bits sqtt_flush_bits = 0;
-	si_cs_emit_cache_flush(cs, device->physical_device->rad_info.chip_class,
-			       NULL, 0,
-			       family == RING_COMPUTE &&
-			       device->physical_device->rad_info.chip_class >= GFX7,
-			       (family == RADV_QUEUE_COMPUTE ?
-				RADV_CMD_FLAG_CS_PARTIAL_FLUSH :
-				(RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
-			       RADV_CMD_FLAG_INV_ICACHE |
-			       RADV_CMD_FLAG_INV_SCACHE |
-			       RADV_CMD_FLAG_INV_VCACHE |
-			       RADV_CMD_FLAG_INV_L2, &sqtt_flush_bits, 0);
+   enum rgp_flush_bits sqtt_flush_bits = 0;
+   si_cs_emit_cache_flush(
+      cs, device->physical_device->rad_info.chip_class, NULL, 0,
+      family == RING_COMPUTE && device->physical_device->rad_info.chip_class >= GFX7,
+      (family == RADV_QUEUE_COMPUTE
+          ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
+          : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
+         RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
+         RADV_CMD_FLAG_INV_L2,
+      &sqtt_flush_bits, 0);
 }
 
 static bool
 radv_thread_trace_init_bo(struct radv_device *device)
 {
-	unsigned max_se = device->physical_device->rad_info.max_se;
-	struct radeon_winsys *ws = device->ws;
-	uint64_t size;
-
-	/* The buffer size and address need to be aligned in HW regs. Align the
-	 * size as early as possible so that we do all the allocation & addressing
-	 * correctly. */
-	device->thread_trace.buffer_size = align64(device->thread_trace.buffer_size,
-	                                           1u << SQTT_BUFFER_ALIGN_SHIFT);
-
-	/* Compute total size of the thread trace BO for all SEs. */
-	size = align64(sizeof(struct ac_thread_trace_info) * max_se,
-		       1 << SQTT_BUFFER_ALIGN_SHIFT);
-	size += device->thread_trace.buffer_size * (uint64_t)max_se;
-
-	device->thread_trace.bo = ws->buffer_create(ws, size, 4096,
-						    RADEON_DOMAIN_VRAM,
-						    RADEON_FLAG_CPU_ACCESS |
-						    RADEON_FLAG_NO_INTERPROCESS_SHARING |
-						    RADEON_FLAG_ZERO_VRAM,
-						    RADV_BO_PRIORITY_SCRATCH);
-	if (!device->thread_trace.bo)
-		return false;
-
-	device->thread_trace.ptr = ws->buffer_map(device->thread_trace.bo);
-	if (!device->thread_trace.ptr)
-		return false;
-
-	return true;
+   unsigned max_se = device->physical_device->rad_info.max_se;
+   struct radeon_winsys *ws = device->ws;
+   uint64_t size;
+
+   /* The buffer size and address need to be aligned in HW regs. Align the
+    * size as early as possible so that we do all the allocation & addressing
+    * correctly. */
+   device->thread_trace.buffer_size =
+      align64(device->thread_trace.buffer_size, 1u << SQTT_BUFFER_ALIGN_SHIFT);
+
+   /* Compute total size of the thread trace BO for all SEs. */
+   size = align64(sizeof(struct ac_thread_trace_info) * max_se, 1 << SQTT_BUFFER_ALIGN_SHIFT);
+   size += device->thread_trace.buffer_size * (uint64_t)max_se;
+
+   device->thread_trace.bo = ws->buffer_create(
+      ws, size, 4096, RADEON_DOMAIN_VRAM,
+      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
+      RADV_BO_PRIORITY_SCRATCH);
+   if (!device->thread_trace.bo)
+      return false;
+
+   device->thread_trace.ptr = ws->buffer_map(device->thread_trace.bo);
+   if (!device->thread_trace.ptr)
+      return false;
+
+   return true;
 }
 
 bool
 radv_thread_trace_init(struct radv_device *device)
 {
-	struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
+   struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
 
-	/* Default buffer size set to 32MB per SE. */
-	device->thread_trace.buffer_size =
-		radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 32 * 1024 * 1024);
-	device->thread_trace.start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
+   /* Default buffer size set to 32MB per SE. */
+   device->thread_trace.buffer_size =
+      radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 32 * 1024 * 1024);
+   device->thread_trace.start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
 
-	const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
-	if (trigger_file)
-		device->thread_trace.trigger_file = strdup(trigger_file);
+   const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
+   if (trigger_file)
+      device->thread_trace.trigger_file = strdup(trigger_file);
 
-	if (!radv_thread_trace_init_bo(device))
-		return false;
+   if (!radv_thread_trace_init_bo(device))
+      return false;
 
-	list_inithead(&thread_trace_data->rgp_pso_correlation.record);
-	simple_mtx_init(&thread_trace_data->rgp_pso_correlation.lock, mtx_plain);
+   list_inithead(&thread_trace_data->rgp_pso_correlation.record);
+   simple_mtx_init(&thread_trace_data->rgp_pso_correlation.lock, mtx_plain);
 
-	list_inithead(&thread_trace_data->rgp_loader_events.record);
-	simple_mtx_init(&thread_trace_data->rgp_loader_events.lock, mtx_plain);
+   list_inithead(&thread_trace_data->rgp_loader_events.record);
+   simple_mtx_init(&thread_trace_data->rgp_loader_events.lock, mtx_plain);
 
-	list_inithead(&thread_trace_data->rgp_code_object.record);
-	simple_mtx_init(&thread_trace_data->rgp_code_object.lock, mtx_plain);
+   list_inithead(&thread_trace_data->rgp_code_object.record);
+   simple_mtx_init(&thread_trace_data->rgp_code_object.lock, mtx_plain);
 
-	return true;
+   return true;
 }
 
 void
 radv_thread_trace_finish(struct radv_device *device)
 {
-	struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
-	struct radeon_winsys *ws = device->ws;
+   struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
+   struct radeon_winsys *ws = device->ws;
 
-	if (unlikely(device->thread_trace.bo))
-		ws->buffer_destroy(ws, device->thread_trace.bo);
+   if (unlikely(device->thread_trace.bo))
+      ws->buffer_destroy(ws, device->thread_trace.bo);
 
-	for (unsigned i = 0; i < 2; i++) {
-		if (device->thread_trace.start_cs[i])
-			ws->cs_destroy(device->thread_trace.start_cs[i]);
-		if (device->thread_trace.stop_cs[i])
-			ws->cs_destroy(device->thread_trace.stop_cs[i]);
-	}
+   for (unsigned i = 0; i < 2; i++) {
+      if (device->thread_trace.start_cs[i])
+         ws->cs_destroy(device->thread_trace.start_cs[i]);
+      if (device->thread_trace.stop_cs[i])
+         ws->cs_destroy(device->thread_trace.stop_cs[i]);
+   }
 
-	assert(thread_trace_data->rgp_pso_correlation.record_count == 0);
-	simple_mtx_destroy(&thread_trace_data->rgp_pso_correlation.lock);
+   assert(thread_trace_data->rgp_pso_correlation.record_count == 0);
+   simple_mtx_destroy(&thread_trace_data->rgp_pso_correlation.lock);
 
-	assert(thread_trace_data->rgp_loader_events.record_count == 0);
-	simple_mtx_destroy(&thread_trace_data->rgp_loader_events.lock);
+   assert(thread_trace_data->rgp_loader_events.record_count == 0);
+   simple_mtx_destroy(&thread_trace_data->rgp_loader_events.lock);
 
-	assert(thread_trace_data->rgp_code_object.record_count == 0);
-	simple_mtx_destroy(&thread_trace_data->rgp_code_object.lock);
+   assert(thread_trace_data->rgp_code_object.record_count == 0);
+   simple_mtx_destroy(&thread_trace_data->rgp_code_object.lock);
 }
 
 static bool
 radv_thread_trace_resize_bo(struct radv_device *device)
 {
-	struct radeon_winsys *ws = device->ws;
+   struct radeon_winsys *ws = device->ws;
 
-	/* Destroy the previous thread trace BO. */
-	ws->buffer_destroy(ws, device->thread_trace.bo);
+   /* Destroy the previous thread trace BO. */
+   ws->buffer_destroy(ws, device->thread_trace.bo);
 
-	/* Double the size of the thread trace buffer per SE. */
-	device->thread_trace.buffer_size *= 2;
+   /* Double the size of the thread trace buffer per SE. */
+   device->thread_trace.buffer_size *= 2;
 
-	fprintf(stderr, "Failed to get the thread trace because the buffer "
-			"was too small, resizing to %d KB\n",
-		device->thread_trace.buffer_size / 1024);
+   fprintf(stderr,
+           "Failed to get the thread trace because the buffer "
+           "was too small, resizing to %d KB\n",
+           device->thread_trace.buffer_size / 1024);
 
-	/* Re-create the thread trace BO. */
-	return radv_thread_trace_init_bo(device);
+   /* Re-create the thread trace BO. */
+   return radv_thread_trace_init_bo(device);
 }
 
 bool
 radv_begin_thread_trace(struct radv_queue *queue)
 {
-	struct radv_device *device = queue->device;
-	int family = queue->queue_family_index;
-	struct radeon_winsys *ws = device->ws;
-	struct radeon_cmdbuf *cs;
-	VkResult result;
-
-	/* Destroy the previous start CS and create a new one. */
-	if (device->thread_trace.start_cs[family]) {
-		ws->cs_destroy(device->thread_trace.start_cs[family]);
-		device->thread_trace.start_cs[family] = NULL;
-	}
-
-	cs = ws->cs_create(ws, family);
-	if (!cs)
-		return false;
-
-	switch (family) {
-	case RADV_QUEUE_GENERAL:
-		radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
-		radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
-		radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
-		break;
-	case RADV_QUEUE_COMPUTE:
-		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, 0);
-		break;
-	}
-
-	radv_cs_add_buffer(ws, cs, device->thread_trace.bo);
-
-	/* Make sure to wait-for-idle before starting SQTT. */
-	radv_emit_wait_for_idle(device, cs, family);
-
-	/* Disable clock gating before starting SQTT. */
-	radv_emit_inhibit_clockgating(device, cs, true);
-
-	/* Enable SQG events that collects thread trace data. */
-	radv_emit_spi_config_cntl(device, cs, true);
-
-	/* Start SQTT. */
-	radv_emit_thread_trace_start(device, cs, family);
-
-	result = ws->cs_finalize(cs);
-	if (result != VK_SUCCESS) {
-		ws->cs_destroy(cs);
-		return false;
-	}
-
-	device->thread_trace.start_cs[family] = cs;
-
-	return radv_queue_internal_submit(queue, cs);
+   struct radv_device *device = queue->device;
+   int family = queue->queue_family_index;
+   struct radeon_winsys *ws = device->ws;
+   struct radeon_cmdbuf *cs;
+   VkResult result;
+
+   /* Destroy the previous start CS and create a new one. */
+   if (device->thread_trace.start_cs[family]) {
+      ws->cs_destroy(device->thread_trace.start_cs[family]);
+      device->thread_trace.start_cs[family] = NULL;
+   }
+
+   cs = ws->cs_create(ws, family);
+   if (!cs)
+      return false;
+
+   switch (family) {
+   case RADV_QUEUE_GENERAL:
+      radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+      radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
+      radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
+      break;
+   case RADV_QUEUE_COMPUTE:
+      radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+      radeon_emit(cs, 0);
+      break;
+   }
+
+   radv_cs_add_buffer(ws, cs, device->thread_trace.bo);
+
+   /* Make sure to wait-for-idle before starting SQTT. */
+   radv_emit_wait_for_idle(device, cs, family);
+
+   /* Disable clock gating before starting SQTT. */
+   radv_emit_inhibit_clockgating(device, cs, true);
+
+   /* Enable SQG events that collects thread trace data. */
+   radv_emit_spi_config_cntl(device, cs, true);
+
+   /* Start SQTT. */
+   radv_emit_thread_trace_start(device, cs, family);
+
+   result = ws->cs_finalize(cs);
+   if (result != VK_SUCCESS) {
+      ws->cs_destroy(cs);
+      return false;
+   }
+
+   device->thread_trace.start_cs[family] = cs;
+
+   return radv_queue_internal_submit(queue, cs);
 }
 
 bool
 radv_end_thread_trace(struct radv_queue *queue)
 {
-	struct radv_device *device = queue->device;
-	int family = queue->queue_family_index;
-	struct radeon_winsys *ws = device->ws;
-	struct radeon_cmdbuf *cs;
-	VkResult result;
-
-	/* Destroy the previous stop CS and create a new one. */
-	if (queue->device->thread_trace.stop_cs[family]) {
-		ws->cs_destroy(device->thread_trace.stop_cs[family]);
-		device->thread_trace.stop_cs[family] = NULL;
-	}
-
-	cs = ws->cs_create(ws, family);
-	if (!cs)
-		return false;
-
-	switch (family) {
-	case RADV_QUEUE_GENERAL:
-		radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
-		radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
-		radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
-		break;
-	case RADV_QUEUE_COMPUTE:
-		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, 0);
-		break;
-	}
-
-	radv_cs_add_buffer(ws, cs, device->thread_trace.bo);
-
-	/* Make sure to wait-for-idle before stopping SQTT. */
-	radv_emit_wait_for_idle(device, cs, family);
-
-	/* Stop SQTT. */
-	radv_emit_thread_trace_stop(device, cs, family);
-
-	/* Restore previous state by disabling SQG events. */
-	radv_emit_spi_config_cntl(device, cs, false);
-
-	/* Restore previous state by re-enabling clock gating. */
-	radv_emit_inhibit_clockgating(device, cs, false);
-
-	result = ws->cs_finalize(cs);
-	if (result != VK_SUCCESS) {
-		ws->cs_destroy(cs);
-		return false;
-	}
-
-	device->thread_trace.stop_cs[family] = cs;
-
-	return radv_queue_internal_submit(queue, cs);
+   struct radv_device *device = queue->device;
+   int family = queue->queue_family_index;
+   struct radeon_winsys *ws = device->ws;
+   struct radeon_cmdbuf *cs;
+   VkResult result;
+
+   /* Destroy the previous stop CS and create a new one. */
+   if (queue->device->thread_trace.stop_cs[family]) {
+      ws->cs_destroy(device->thread_trace.stop_cs[family]);
+      device->thread_trace.stop_cs[family] = NULL;
+   }
+
+   cs = ws->cs_create(ws, family);
+   if (!cs)
+      return false;
+
+   switch (family) {
+   case RADV_QUEUE_GENERAL:
+      radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+      radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
+      radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
+      break;
+   case RADV_QUEUE_COMPUTE:
+      radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+      radeon_emit(cs, 0);
+      break;
+   }
+
+   radv_cs_add_buffer(ws, cs, device->thread_trace.bo);
+
+   /* Make sure to wait-for-idle before stopping SQTT. */
+   radv_emit_wait_for_idle(device, cs, family);
+
+   /* Stop SQTT. */
+   radv_emit_thread_trace_stop(device, cs, family);
+
+   /* Restore previous state by disabling SQG events. */
+   radv_emit_spi_config_cntl(device, cs, false);
+
+   /* Restore previous state by re-enabling clock gating. */
+   radv_emit_inhibit_clockgating(device, cs, false);
+
+   result = ws->cs_finalize(cs);
+   if (result != VK_SUCCESS) {
+      ws->cs_destroy(cs);
+      return false;
+   }
+
+   device->thread_trace.stop_cs[family] = cs;
+
+   return radv_queue_internal_submit(queue, cs);
 }
 
 bool
-radv_get_thread_trace(struct radv_queue *queue,
-		      struct ac_thread_trace *thread_trace)
+radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace)
 {
-	struct radv_device *device = queue->device;
-	struct radeon_info *rad_info = &device->physical_device->rad_info;
-	unsigned max_se = rad_info->max_se;
-	void *thread_trace_ptr = device->thread_trace.ptr;
-
-	memset(thread_trace, 0, sizeof(*thread_trace));
-
-	for (unsigned se = 0; se < max_se; se++) {
-		uint64_t info_offset = ac_thread_trace_get_info_offset(se);
-		uint64_t data_offset = ac_thread_trace_get_data_offset(rad_info, &device->thread_trace, se);
-		void *info_ptr = (uint8_t *)thread_trace_ptr + info_offset;
-		void *data_ptr = (uint8_t *)thread_trace_ptr + data_offset;
-		struct ac_thread_trace_info *info =
-			(struct ac_thread_trace_info *)info_ptr;
-		struct ac_thread_trace_se thread_trace_se = {0};
-		int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
-
-		if (radv_se_is_disabled(device, se))
-			continue;
-
-		if (!ac_is_thread_trace_complete(&device->physical_device->rad_info, &device->thread_trace, info)) {
-			if (!radv_thread_trace_resize_bo(device)) {
-				fprintf(stderr, "Failed to resize the thread "
-						"trace buffer.\n");
-				abort();
-			}
-			return false;
-		}
-
-		thread_trace_se.data_ptr = data_ptr;
-		thread_trace_se.info = *info;
-		thread_trace_se.shader_engine = se;
-
-		/* RGP seems to expect units of WGP on GFX10+. */
-		thread_trace_se.compute_unit =
-			device->physical_device->rad_info.chip_class >= GFX10 ? (first_active_cu / 2) : first_active_cu;
-
-		thread_trace_se.compute_unit = 0;
-
-		thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
-		thread_trace->num_traces++;
-	}
-
-	return true;
+   struct radv_device *device = queue->device;
+   struct radeon_info *rad_info = &device->physical_device->rad_info;
+   unsigned max_se = rad_info->max_se;
+   void *thread_trace_ptr = device->thread_trace.ptr;
+
+   memset(thread_trace, 0, sizeof(*thread_trace));
+
+   for (unsigned se = 0; se < max_se; se++) {
+      uint64_t info_offset = ac_thread_trace_get_info_offset(se);
+      uint64_t data_offset = ac_thread_trace_get_data_offset(rad_info, &device->thread_trace, se);
+      void *info_ptr = (uint8_t *)thread_trace_ptr + info_offset;
+      void *data_ptr = (uint8_t *)thread_trace_ptr + data_offset;
+      struct ac_thread_trace_info *info = (struct ac_thread_trace_info *)info_ptr;
+      struct ac_thread_trace_se thread_trace_se = {0};
+      int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
+
+      if (radv_se_is_disabled(device, se))
+         continue;
+
+      if (!ac_is_thread_trace_complete(&device->physical_device->rad_info, &device->thread_trace,
+                                       info)) {
+         if (!radv_thread_trace_resize_bo(device)) {
+            fprintf(stderr, "Failed to resize the thread "
+                            "trace buffer.\n");
+            abort();
+         }
+         return false;
+      }
+
+      thread_trace_se.data_ptr = data_ptr;
+      thread_trace_se.info = *info;
+      thread_trace_se.shader_engine = se;
+
+      /* RGP seems to expect units of WGP on GFX10+. */
+      thread_trace_se.compute_unit = device->physical_device->rad_info.chip_class >= GFX10
+                                        ? (first_active_cu / 2)
+                                        : first_active_cu;
+
+      thread_trace_se.compute_unit = 0;
+
+      thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
+      thread_trace->num_traces++;
+   }
+
+   return true;
 }
diff --git a/src/amd/vulkan/radv_util.c b/src/amd/vulkan/radv_util.c
index 16666535192..153f65dd90f 100644
--- a/src/amd/vulkan/radv_util.c
+++ b/src/amd/vulkan/radv_util.c
@@ -21,117 +21,110 @@
  * IN THE SOFTWARE.
  */
 
+#include <assert.h>
+#include <errno.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <errno.h>
-#include <assert.h>
 
-#include "radv_private.h"
 #include "radv_debug.h"
+#include "radv_private.h"
 #include "vk_enum_to_str.h"
 
 #include "util/u_math.h"
 
 /** Log an error message.  */
-void radv_printflike(1, 2)
-	radv_loge(const char *format, ...)
+void radv_printflike(1, 2) radv_loge(const char *format, ...)
 {
-	va_list va;
+   va_list va;
 
-	va_start(va, format);
-	radv_loge_v(format, va);
-	va_end(va);
+   va_start(va, format);
+   radv_loge_v(format, va);
+   va_end(va);
 }
 
 /** \see radv_loge() */
 void
 radv_loge_v(const char *format, va_list va)
 {
-	fprintf(stderr, "vk: error: ");
-	vfprintf(stderr, format, va);
-	fprintf(stderr, "\n");
+   fprintf(stderr, "vk: error: ");
+   vfprintf(stderr, format, va);
+   fprintf(stderr, "\n");
 }
 
 /** Log an error message.  */
-void radv_printflike(1, 2)
-	radv_logi(const char *format, ...)
+void radv_printflike(1, 2) radv_logi(const char *format, ...)
 {
-	va_list va;
+   va_list va;
 
-	va_start(va, format);
-	radv_logi_v(format, va);
-	va_end(va);
+   va_start(va, format);
+   radv_logi_v(format, va);
+   va_end(va);
 }
 
 /** \see radv_logi() */
 void
 radv_logi_v(const char *format, va_list va)
 {
-	fprintf(stderr, "radv: info: ");
-	vfprintf(stderr, format, va);
-	fprintf(stderr, "\n");
+   fprintf(stderr, "radv: info: ");
+   vfprintf(stderr, format, va);
+   fprintf(stderr, "\n");
 }
 
-void radv_printflike(3, 4)
-	__radv_finishme(const char *file, int line, const char *format, ...)
+void radv_printflike(3, 4) __radv_finishme(const char *file, int line, const char *format, ...)
 {
-	va_list ap;
-	char buffer[256];
+   va_list ap;
+   char buffer[256];
 
-	va_start(ap, format);
-	vsnprintf(buffer, sizeof(buffer), format, ap);
-	va_end(ap);
+   va_start(ap, format);
+   vsnprintf(buffer, sizeof(buffer), format, ap);
+   va_end(ap);
 
-	fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
+   fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
 }
 
 VkResult
-__vk_errorv(struct radv_instance *instance, const void *object,
-	    VkDebugReportObjectTypeEXT type, VkResult error, const char *file,
-	    int line, const char *format, va_list ap)
+__vk_errorv(struct radv_instance *instance, const void *object, VkDebugReportObjectTypeEXT type,
+            VkResult error, const char *file, int line, const char *format, va_list ap)
 {
-	char buffer[256];
-	char report[512];
+   char buffer[256];
+   char report[512];
 
-	const char *error_str = vk_Result_to_str(error);
+   const char *error_str = vk_Result_to_str(error);
 
 #ifndef DEBUG
-	if (instance && !(instance->debug_flags & RADV_DEBUG_ERRORS))
-		return error;
+   if (instance && !(instance->debug_flags & RADV_DEBUG_ERRORS))
+      return error;
 #endif
 
-	if (format) {
-		vsnprintf(buffer, sizeof(buffer), format, ap);
+   if (format) {
+      vsnprintf(buffer, sizeof(buffer), format, ap);
 
-		snprintf(report, sizeof(report), "%s:%d: %s (%s)", file, line,
-			 buffer, error_str);
-	} else {
-		snprintf(report, sizeof(report), "%s:%d: %s", file, line,
-			 error_str);
-	}
+      snprintf(report, sizeof(report), "%s:%d: %s (%s)", file, line, buffer, error_str);
+   } else {
+      snprintf(report, sizeof(report), "%s:%d: %s", file, line, error_str);
+   }
 
-	if (instance) {
-		vk_debug_report(&instance->vk, VK_DEBUG_REPORT_ERROR_BIT_EXT,
-				object, line, 0, "radv", report);
-	}
+   if (instance) {
+      vk_debug_report(&instance->vk, VK_DEBUG_REPORT_ERROR_BIT_EXT, object, line, 0, "radv",
+                      report);
+   }
 
-	fprintf(stderr, "%s\n", report);
+   fprintf(stderr, "%s\n", report);
 
-	return error;
+   return error;
 }
 
 VkResult
-__vk_errorf(struct radv_instance *instance, const void *object,
-	    VkDebugReportObjectTypeEXT type, VkResult error, const char *file,
-	    int line, const char *format, ...)
+__vk_errorf(struct radv_instance *instance, const void *object, VkDebugReportObjectTypeEXT type,
+            VkResult error, const char *file, int line, const char *format, ...)
 {
-	va_list ap;
+   va_list ap;
 
-	va_start(ap, format);
-	__vk_errorv(instance, object, type, error, file, line, format, ap);
-	va_end(ap);
+   va_start(ap, format);
+   __vk_errorv(instance, object, type, error, file, line, format, ap);
+   va_end(ap);
 
-	return error;
+   return error;
 }
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index da3114b0334..8e9d70b56c9 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -23,315 +23,263 @@
  * IN THE SOFTWARE.
  */
 
-#include "radv_private.h"
+#include "util/macros.h"
 #include "radv_meta.h"
-#include "wsi_common.h"
+#include "radv_private.h"
 #include "vk_util.h"
-#include "util/macros.h"
+#include "wsi_common.h"
 
 static PFN_vkVoidFunction
 radv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-	return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+   return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
 }
 
 static void
-radv_wsi_set_memory_ownership(VkDevice _device,
-                              VkDeviceMemory _mem,
-                              VkBool32 ownership)
+radv_wsi_set_memory_ownership(VkDevice _device, VkDeviceMemory _mem, VkBool32 ownership)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
 
-	if (device->use_global_bo_list) {
-		device->ws->buffer_make_resident(device->ws, mem->bo, ownership);
-	}
+   if (device->use_global_bo_list) {
+      device->ws->buffer_make_resident(device->ws, mem->bo, ownership);
+   }
 }
 
 VkResult
 radv_init_wsi(struct radv_physical_device *physical_device)
 {
-	VkResult result =  wsi_device_init(&physical_device->wsi_device,
-					   radv_physical_device_to_handle(physical_device),
-					   radv_wsi_proc_addr,
-					   &physical_device->instance->vk.alloc,
-					   physical_device->master_fd,
-					   &physical_device->instance->dri_options,
-					   false);
-	if (result != VK_SUCCESS)
-		return result;
-
-	physical_device->wsi_device.supports_modifiers = physical_device->rad_info.chip_class >= GFX9;
-	physical_device->wsi_device.set_memory_ownership = radv_wsi_set_memory_ownership;
-	return VK_SUCCESS;
+   VkResult result =
+      wsi_device_init(&physical_device->wsi_device, radv_physical_device_to_handle(physical_device),
+                      radv_wsi_proc_addr, &physical_device->instance->vk.alloc,
+                      physical_device->master_fd, &physical_device->instance->dri_options, false);
+   if (result != VK_SUCCESS)
+      return result;
+
+   physical_device->wsi_device.supports_modifiers = physical_device->rad_info.chip_class >= GFX9;
+   physical_device->wsi_device.set_memory_ownership = radv_wsi_set_memory_ownership;
+   return VK_SUCCESS;
 }
 
 void
 radv_finish_wsi(struct radv_physical_device *physical_device)
 {
-	wsi_device_finish(&physical_device->wsi_device,
-			  &physical_device->instance->vk.alloc);
+   wsi_device_finish(&physical_device->wsi_device, &physical_device->instance->vk.alloc);
 }
 
-void radv_DestroySurfaceKHR(
-	VkInstance                                   _instance,
-	VkSurfaceKHR                                 _surface,
-	const VkAllocationCallbacks*                 pAllocator)
+void
+radv_DestroySurfaceKHR(VkInstance _instance, VkSurfaceKHR _surface,
+                       const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_instance, instance, _instance);
-	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
+   RADV_FROM_HANDLE(radv_instance, instance, _instance);
+   ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 
-	vk_free2(&instance->vk.alloc, pAllocator, surface);
+   vk_free2(&instance->vk.alloc, pAllocator, surface);
 }
 
-VkResult radv_GetPhysicalDeviceSurfaceSupportKHR(
-	VkPhysicalDevice                            physicalDevice,
-	uint32_t                                    queueFamilyIndex,
-	VkSurfaceKHR                                surface,
-	VkBool32*                                   pSupported)
+VkResult
+radv_GetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex,
+                                        VkSurfaceKHR surface, VkBool32 *pSupported)
 {
-	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
 
-	return wsi_common_get_surface_support(&device->wsi_device,
-					      queueFamilyIndex,
-					      surface,
-					      pSupported);
+   return wsi_common_get_surface_support(&device->wsi_device, queueFamilyIndex, surface,
+                                         pSupported);
 }
 
-VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
-	VkPhysicalDevice                            physicalDevice,
-	VkSurfaceKHR                                surface,
-	VkSurfaceCapabilitiesKHR*                   pSurfaceCapabilities)
+VkResult
+radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
+                                             VkSurfaceCapabilitiesKHR *pSurfaceCapabilities)
 {
-	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
 
-	return wsi_common_get_surface_capabilities(&device->wsi_device,
-						   surface,
-						   pSurfaceCapabilities);
+   return wsi_common_get_surface_capabilities(&device->wsi_device, surface, pSurfaceCapabilities);
 }
 
-VkResult radv_GetPhysicalDeviceSurfaceCapabilities2KHR(
-	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceSurfaceInfo2KHR*      pSurfaceInfo,
-	VkSurfaceCapabilities2KHR*                  pSurfaceCapabilities)
+VkResult
+radv_GetPhysicalDeviceSurfaceCapabilities2KHR(VkPhysicalDevice physicalDevice,
+                                              const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo,
+                                              VkSurfaceCapabilities2KHR *pSurfaceCapabilities)
 {
-	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
 
-	return wsi_common_get_surface_capabilities2(&device->wsi_device,
-						    pSurfaceInfo,
-						    pSurfaceCapabilities);
+   return wsi_common_get_surface_capabilities2(&device->wsi_device, pSurfaceInfo,
+                                               pSurfaceCapabilities);
 }
 
-VkResult radv_GetPhysicalDeviceSurfaceCapabilities2EXT(
- 	VkPhysicalDevice                            physicalDevice,
-	VkSurfaceKHR                                surface,
-	VkSurfaceCapabilities2EXT*                  pSurfaceCapabilities)
+VkResult
+radv_GetPhysicalDeviceSurfaceCapabilities2EXT(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
+                                              VkSurfaceCapabilities2EXT *pSurfaceCapabilities)
 {
-	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
 
-	return wsi_common_get_surface_capabilities2ext(&device->wsi_device,
-						       surface,
-						       pSurfaceCapabilities);
+   return wsi_common_get_surface_capabilities2ext(&device->wsi_device, surface,
+                                                  pSurfaceCapabilities);
 }
 
-VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR(
-	VkPhysicalDevice                            physicalDevice,
-	VkSurfaceKHR                                surface,
-	uint32_t*                                   pSurfaceFormatCount,
-	VkSurfaceFormatKHR*                         pSurfaceFormats)
+VkResult
+radv_GetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
+                                        uint32_t *pSurfaceFormatCount,
+                                        VkSurfaceFormatKHR *pSurfaceFormats)
 {
-	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
 
-	return wsi_common_get_surface_formats(&device->wsi_device,
-					      surface,
-					      pSurfaceFormatCount,
-					      pSurfaceFormats);
+   return wsi_common_get_surface_formats(&device->wsi_device, surface, pSurfaceFormatCount,
+                                         pSurfaceFormats);
 }
 
-VkResult radv_GetPhysicalDeviceSurfaceFormats2KHR(
-	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceSurfaceInfo2KHR*      pSurfaceInfo,
-	uint32_t*                                   pSurfaceFormatCount,
-	VkSurfaceFormat2KHR*                        pSurfaceFormats)
+VkResult
+radv_GetPhysicalDeviceSurfaceFormats2KHR(VkPhysicalDevice physicalDevice,
+                                         const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo,
+                                         uint32_t *pSurfaceFormatCount,
+                                         VkSurfaceFormat2KHR *pSurfaceFormats)
 {
-	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
 
-	return wsi_common_get_surface_formats2(&device->wsi_device,
-					       pSurfaceInfo,
-					       pSurfaceFormatCount,
-					       pSurfaceFormats);
+   return wsi_common_get_surface_formats2(&device->wsi_device, pSurfaceInfo, pSurfaceFormatCount,
+                                          pSurfaceFormats);
 }
 
-VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR(
-	VkPhysicalDevice                            physicalDevice,
-	VkSurfaceKHR                                surface,
-	uint32_t*                                   pPresentModeCount,
-	VkPresentModeKHR*                           pPresentModes)
+VkResult
+radv_GetPhysicalDeviceSurfacePresentModesKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
+                                             uint32_t *pPresentModeCount,
+                                             VkPresentModeKHR *pPresentModes)
 {
-	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
 
-	return wsi_common_get_surface_present_modes(&device->wsi_device,
-						    surface,
-						    pPresentModeCount,
-						    pPresentModes);
+   return wsi_common_get_surface_present_modes(&device->wsi_device, surface, pPresentModeCount,
+                                               pPresentModes);
 }
 
-VkResult radv_CreateSwapchainKHR(
-	VkDevice                                     _device,
-	const VkSwapchainCreateInfoKHR*              pCreateInfo,
-	const VkAllocationCallbacks*                 pAllocator,
-	VkSwapchainKHR*                              pSwapchain)
+VkResult
+radv_CreateSwapchainKHR(VkDevice _device, const VkSwapchainCreateInfoKHR *pCreateInfo,
+                        const VkAllocationCallbacks *pAllocator, VkSwapchainKHR *pSwapchain)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	const VkAllocationCallbacks *alloc;
-	if (pAllocator)
-		alloc = pAllocator;
-	else
-		alloc = &device->vk.alloc;
-
-	return wsi_common_create_swapchain(&device->physical_device->wsi_device,
-					   radv_device_to_handle(device),
-					   pCreateInfo,
-					   alloc,
-					   pSwapchain);
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   const VkAllocationCallbacks *alloc;
+   if (pAllocator)
+      alloc = pAllocator;
+   else
+      alloc = &device->vk.alloc;
+
+   return wsi_common_create_swapchain(&device->physical_device->wsi_device,
+                                      radv_device_to_handle(device), pCreateInfo, alloc,
+                                      pSwapchain);
 }
 
-void radv_DestroySwapchainKHR(
-	VkDevice                                     _device,
-	VkSwapchainKHR                               swapchain,
-	const VkAllocationCallbacks*                 pAllocator)
+void
+radv_DestroySwapchainKHR(VkDevice _device, VkSwapchainKHR swapchain,
+                         const VkAllocationCallbacks *pAllocator)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	const VkAllocationCallbacks *alloc;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   const VkAllocationCallbacks *alloc;
 
-	if (pAllocator)
-		alloc = pAllocator;
-	else
-		alloc = &device->vk.alloc;
+   if (pAllocator)
+      alloc = pAllocator;
+   else
+      alloc = &device->vk.alloc;
 
-	wsi_common_destroy_swapchain(_device, swapchain, alloc);
+   wsi_common_destroy_swapchain(_device, swapchain, alloc);
 }
 
-VkResult radv_GetSwapchainImagesKHR(
-	VkDevice                                     device,
-	VkSwapchainKHR                               swapchain,
-	uint32_t*                                    pSwapchainImageCount,
-	VkImage*                                     pSwapchainImages)
+VkResult
+radv_GetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain,
+                           uint32_t *pSwapchainImageCount, VkImage *pSwapchainImages)
 {
-	return wsi_common_get_images(swapchain,
-				     pSwapchainImageCount,
-				     pSwapchainImages);
+   return wsi_common_get_images(swapchain, pSwapchainImageCount, pSwapchainImages);
 }
 
-VkResult radv_AcquireNextImageKHR(
-	VkDevice                                     device,
-	VkSwapchainKHR                               swapchain,
-	uint64_t                                     timeout,
-	VkSemaphore                                  semaphore,
-	VkFence                                      fence,
-	uint32_t*                                    pImageIndex)
+VkResult
+radv_AcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout,
+                         VkSemaphore semaphore, VkFence fence, uint32_t *pImageIndex)
 {
-	VkAcquireNextImageInfoKHR acquire_info = {
-		.sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
-		.swapchain = swapchain,
-		.timeout = timeout,
-		.semaphore = semaphore,
-		.fence = fence,
-		.deviceMask = 0,
-	};
-
-	return radv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex);
+   VkAcquireNextImageInfoKHR acquire_info = {
+      .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
+      .swapchain = swapchain,
+      .timeout = timeout,
+      .semaphore = semaphore,
+      .fence = fence,
+      .deviceMask = 0,
+   };
+
+   return radv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex);
 }
 
-VkResult radv_AcquireNextImage2KHR(
-	VkDevice                                     _device,
-	const VkAcquireNextImageInfoKHR*             pAcquireInfo,
-	uint32_t*                                    pImageIndex)
+VkResult
+radv_AcquireNextImage2KHR(VkDevice _device, const VkAcquireNextImageInfoKHR *pAcquireInfo,
+                          uint32_t *pImageIndex)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	struct radv_physical_device *pdevice = device->physical_device;
-	RADV_FROM_HANDLE(radv_fence, fence, pAcquireInfo->fence);
-	RADV_FROM_HANDLE(radv_semaphore, semaphore, pAcquireInfo->semaphore);
-
-	VkResult result = wsi_common_acquire_next_image2(&pdevice->wsi_device,
-							 _device,
-                                                         pAcquireInfo,
-							 pImageIndex);
-
-	if (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) {
-		if (fence) {
-			struct radv_fence_part *part =
-				fence->temporary.kind != RADV_FENCE_NONE ?
-				&fence->temporary : &fence->permanent;
-
-			device->ws->signal_syncobj(device->ws, part->syncobj, 0);
-		}
-		if (semaphore) {
-			struct radv_semaphore_part *part =
-				semaphore->temporary.kind != RADV_SEMAPHORE_NONE ?
-					&semaphore->temporary : &semaphore->permanent;
-
-			switch (part->kind) {
-			case RADV_SEMAPHORE_NONE:
-				/* Do not need to do anything. */
-				break;
-			case RADV_SEMAPHORE_TIMELINE:
-			case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
-				unreachable("WSI only allows binary semaphores.");
-			case RADV_SEMAPHORE_SYNCOBJ:
-				device->ws->signal_syncobj(device->ws, part->syncobj, 0);
-				break;
-			}
-		}
-	}
-	return result;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   struct radv_physical_device *pdevice = device->physical_device;
+   RADV_FROM_HANDLE(radv_fence, fence, pAcquireInfo->fence);
+   RADV_FROM_HANDLE(radv_semaphore, semaphore, pAcquireInfo->semaphore);
+
+   VkResult result =
+      wsi_common_acquire_next_image2(&pdevice->wsi_device, _device, pAcquireInfo, pImageIndex);
+
+   if (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) {
+      if (fence) {
+         struct radv_fence_part *part =
+            fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
+
+         device->ws->signal_syncobj(device->ws, part->syncobj, 0);
+      }
+      if (semaphore) {
+         struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
+                                               ? &semaphore->temporary
+                                               : &semaphore->permanent;
+
+         switch (part->kind) {
+         case RADV_SEMAPHORE_NONE:
+            /* Do not need to do anything. */
+            break;
+         case RADV_SEMAPHORE_TIMELINE:
+         case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
+            unreachable("WSI only allows binary semaphores.");
+         case RADV_SEMAPHORE_SYNCOBJ:
+            device->ws->signal_syncobj(device->ws, part->syncobj, 0);
+            break;
+         }
+      }
+   }
+   return result;
 }
 
-VkResult radv_QueuePresentKHR(
-	VkQueue                                  _queue,
-	const VkPresentInfoKHR*                  pPresentInfo)
+VkResult
+radv_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
 {
-	RADV_FROM_HANDLE(radv_queue, queue, _queue);
-	return wsi_common_queue_present(&queue->device->physical_device->wsi_device,
-					radv_device_to_handle(queue->device),
-				        _queue,
-				        queue->queue_family_index,
-				        pPresentInfo);
+   RADV_FROM_HANDLE(radv_queue, queue, _queue);
+   return wsi_common_queue_present(&queue->device->physical_device->wsi_device,
+                                   radv_device_to_handle(queue->device), _queue,
+                                   queue->queue_family_index, pPresentInfo);
 }
 
-
-VkResult radv_GetDeviceGroupPresentCapabilitiesKHR(
-    VkDevice                                    device,
-    VkDeviceGroupPresentCapabilitiesKHR*        pCapabilities)
+VkResult
+radv_GetDeviceGroupPresentCapabilitiesKHR(VkDevice device,
+                                          VkDeviceGroupPresentCapabilitiesKHR *pCapabilities)
 {
-   memset(pCapabilities->presentMask, 0,
-          sizeof(pCapabilities->presentMask));
+   memset(pCapabilities->presentMask, 0, sizeof(pCapabilities->presentMask));
    pCapabilities->presentMask[0] = 0x1;
    pCapabilities->modes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR;
 
    return VK_SUCCESS;
 }
 
-VkResult radv_GetDeviceGroupSurfacePresentModesKHR(
-    VkDevice                                    device,
-    VkSurfaceKHR                                surface,
-    VkDeviceGroupPresentModeFlagsKHR*           pModes)
+VkResult
+radv_GetDeviceGroupSurfacePresentModesKHR(VkDevice device, VkSurfaceKHR surface,
+                                          VkDeviceGroupPresentModeFlagsKHR *pModes)
 {
    *pModes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR;
 
    return VK_SUCCESS;
 }
 
-VkResult radv_GetPhysicalDevicePresentRectanglesKHR(
-	VkPhysicalDevice                            physicalDevice,
-	VkSurfaceKHR                                surface,
-	uint32_t*                                   pRectCount,
-	VkRect2D*                                   pRects)
+VkResult
+radv_GetPhysicalDevicePresentRectanglesKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
+                                           uint32_t *pRectCount, VkRect2D *pRects)
 {
-	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
 
-	return wsi_common_get_present_rectangles(&device->wsi_device,
-						 surface,
-						 pRectCount, pRects);
+   return wsi_common_get_present_rectangles(&device->wsi_device, surface, pRectCount, pRects);
 }
diff --git a/src/amd/vulkan/radv_wsi_display.c b/src/amd/vulkan/radv_wsi_display.c
index 64c7acd3734..6548466e5b5 100644
--- a/src/amd/vulkan/radv_wsi_display.c
+++ b/src/amd/vulkan/radv_wsi_display.c
@@ -20,39 +20,36 @@
  * OF THIS SOFTWARE.
  */
 
+#include <amdgpu.h>
+#include <fcntl.h>
 #include <stdbool.h>
 #include <string.h>
 #include <unistd.h>
-#include <fcntl.h>
-#include "radv_private.h"
-#include "radv_cs.h"
-#include "util/disk_cache.h"
-#include "util/strtod.h"
-#include "vk_util.h"
 #include <xf86drm.h>
 #include <xf86drmMode.h>
-#include <amdgpu.h>
 #include "drm-uapi/amdgpu_drm.h"
+#include "util/debug.h"
+#include "util/disk_cache.h"
+#include "util/strtod.h"
 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
-#include "vk_format.h"
+#include "radv_cs.h"
+#include "radv_private.h"
 #include "sid.h"
-#include "util/debug.h"
+#include "vk_format.h"
+#include "vk_util.h"
 #include "wsi_common_display.h"
 
-#define MM_PER_PIXEL     (1.0/96.0 * 25.4)
+#define MM_PER_PIXEL (1.0 / 96.0 * 25.4)
 
 VkResult
 radv_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physical_device,
                                            uint32_t *property_count,
                                            VkDisplayPropertiesKHR *properties)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_display_get_physical_device_display_properties(
-		physical_device,
-		&pdevice->wsi_device,
-		property_count,
-		properties);
+   return wsi_display_get_physical_device_display_properties(physical_device, &pdevice->wsi_device,
+                                                             property_count, properties);
 }
 
 VkResult
@@ -60,122 +57,84 @@ radv_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physical_device,
                                             uint32_t *property_count,
                                             VkDisplayProperties2KHR *properties)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_display_get_physical_device_display_properties2(
-		physical_device,
-		&pdevice->wsi_device,
-		property_count,
-		properties);
+   return wsi_display_get_physical_device_display_properties2(physical_device, &pdevice->wsi_device,
+                                                              property_count, properties);
 }
 
 VkResult
-radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(
-	VkPhysicalDevice physical_device,
-	uint32_t *property_count,
-	VkDisplayPlanePropertiesKHR *properties)
+radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(VkPhysicalDevice physical_device,
+                                                uint32_t *property_count,
+                                                VkDisplayPlanePropertiesKHR *properties)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_display_get_physical_device_display_plane_properties(
-		physical_device,
-		&pdevice->wsi_device,
-		property_count,
-		properties);
+   return wsi_display_get_physical_device_display_plane_properties(
+      physical_device, &pdevice->wsi_device, property_count, properties);
 }
 
 VkResult
-radv_GetPhysicalDeviceDisplayPlaneProperties2KHR(
-	VkPhysicalDevice physical_device,
-	uint32_t *property_count,
-	VkDisplayPlaneProperties2KHR *properties)
+radv_GetPhysicalDeviceDisplayPlaneProperties2KHR(VkPhysicalDevice physical_device,
+                                                 uint32_t *property_count,
+                                                 VkDisplayPlaneProperties2KHR *properties)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_display_get_physical_device_display_plane_properties2(
-		physical_device,
-		&pdevice->wsi_device,
-		property_count,
-		properties);
+   return wsi_display_get_physical_device_display_plane_properties2(
+      physical_device, &pdevice->wsi_device, property_count, properties);
 }
 
 VkResult
-radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device,
-                                         uint32_t plane_index,
-                                         uint32_t *display_count,
-                                         VkDisplayKHR *displays)
+radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device, uint32_t plane_index,
+                                         uint32_t *display_count, VkDisplayKHR *displays)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
-
-	return wsi_display_get_display_plane_supported_displays(
-		physical_device,
-		&pdevice->wsi_device,
-		plane_index,
-		display_count,
-		displays);
-}
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
+   return wsi_display_get_display_plane_supported_displays(physical_device, &pdevice->wsi_device,
+                                                           plane_index, display_count, displays);
+}
 
 VkResult
-radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device,
-                                 VkDisplayKHR display,
-                                 uint32_t *property_count,
-                                 VkDisplayModePropertiesKHR *properties)
+radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device, VkDisplayKHR display,
+                                 uint32_t *property_count, VkDisplayModePropertiesKHR *properties)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_display_get_display_mode_properties(physical_device,
-						       &pdevice->wsi_device,
-						       display,
-						       property_count,
-						       properties);
+   return wsi_display_get_display_mode_properties(physical_device, &pdevice->wsi_device, display,
+                                                  property_count, properties);
 }
 
 VkResult
-radv_GetDisplayModeProperties2KHR(VkPhysicalDevice physical_device,
-                                  VkDisplayKHR display,
-                                  uint32_t *property_count,
-                                  VkDisplayModeProperties2KHR *properties)
+radv_GetDisplayModeProperties2KHR(VkPhysicalDevice physical_device, VkDisplayKHR display,
+                                  uint32_t *property_count, VkDisplayModeProperties2KHR *properties)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_display_get_display_mode_properties2(physical_device,
-						        &pdevice->wsi_device,
-						        display,
-						        property_count,
-						        properties);
+   return wsi_display_get_display_mode_properties2(physical_device, &pdevice->wsi_device, display,
+                                                   property_count, properties);
 }
 
 VkResult
-radv_CreateDisplayModeKHR(VkPhysicalDevice physical_device,
-                          VkDisplayKHR display,
+radv_CreateDisplayModeKHR(VkPhysicalDevice physical_device, VkDisplayKHR display,
                           const VkDisplayModeCreateInfoKHR *create_info,
-                          const VkAllocationCallbacks *allocator,
-                          VkDisplayModeKHR *mode)
+                          const VkAllocationCallbacks *allocator, VkDisplayModeKHR *mode)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
-
-	return wsi_display_create_display_mode(physical_device,
-					       &pdevice->wsi_device,
-					       display,
-					       create_info,
-					       allocator,
-					       mode);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+   return wsi_display_create_display_mode(physical_device, &pdevice->wsi_device, display,
+                                          create_info, allocator, mode);
 }
 
 VkResult
-radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device,
-                                    VkDisplayModeKHR mode_khr,
+radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device, VkDisplayModeKHR mode_khr,
                                     uint32_t plane_index,
                                     VkDisplayPlaneCapabilitiesKHR *capabilities)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_get_display_plane_capabilities(physical_device,
-						  &pdevice->wsi_device,
-						  mode_khr,
-						  plane_index,
-						  capabilities);
+   return wsi_get_display_plane_capabilities(physical_device, &pdevice->wsi_device, mode_khr,
+                                             plane_index, capabilities);
 }
 
 VkResult
@@ -183,187 +142,152 @@ radv_GetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physical_device,
                                      const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo,
                                      VkDisplayPlaneCapabilities2KHR *capabilities)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_get_display_plane_capabilities2(physical_device,
-						   &pdevice->wsi_device,
-						   pDisplayPlaneInfo,
-						   capabilities);
+   return wsi_get_display_plane_capabilities2(physical_device, &pdevice->wsi_device,
+                                              pDisplayPlaneInfo, capabilities);
 }
 
 VkResult
-radv_CreateDisplayPlaneSurfaceKHR(
-	VkInstance _instance,
-	const VkDisplaySurfaceCreateInfoKHR *create_info,
-	const VkAllocationCallbacks *allocator,
-	VkSurfaceKHR *surface)
+radv_CreateDisplayPlaneSurfaceKHR(VkInstance _instance,
+                                  const VkDisplaySurfaceCreateInfoKHR *create_info,
+                                  const VkAllocationCallbacks *allocator, VkSurfaceKHR *surface)
 {
-	RADV_FROM_HANDLE(radv_instance, instance, _instance);
-	const VkAllocationCallbacks *alloc;
+   RADV_FROM_HANDLE(radv_instance, instance, _instance);
+   const VkAllocationCallbacks *alloc;
 
-	if (allocator)
-		alloc = allocator;
-	else
-		alloc = &instance->vk.alloc;
+   if (allocator)
+      alloc = allocator;
+   else
+      alloc = &instance->vk.alloc;
 
-	return wsi_create_display_surface(_instance, alloc,
-					  create_info, surface);
+   return wsi_create_display_surface(_instance, alloc, create_info, surface);
 }
 
 VkResult
-radv_ReleaseDisplayEXT(VkPhysicalDevice physical_device,
-		       VkDisplayKHR     display)
+radv_ReleaseDisplayEXT(VkPhysicalDevice physical_device, VkDisplayKHR display)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_release_display(physical_device,
-				   &pdevice->wsi_device,
-				   display);
+   return wsi_release_display(physical_device, &pdevice->wsi_device, display);
 }
 
 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
 VkResult
-radv_AcquireXlibDisplayEXT(VkPhysicalDevice     physical_device,
-			   Display              *dpy,
-			   VkDisplayKHR         display)
+radv_AcquireXlibDisplayEXT(VkPhysicalDevice physical_device, Display *dpy, VkDisplayKHR display)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_acquire_xlib_display(physical_device,
-					&pdevice->wsi_device,
-					dpy,
-					display);
+   return wsi_acquire_xlib_display(physical_device, &pdevice->wsi_device, dpy, display);
 }
 
 VkResult
-radv_GetRandROutputDisplayEXT(VkPhysicalDevice  physical_device,
-			      Display           *dpy,
-			      RROutput          output,
-			      VkDisplayKHR      *display)
+radv_GetRandROutputDisplayEXT(VkPhysicalDevice physical_device, Display *dpy, RROutput output,
+                              VkDisplayKHR *display)
 {
-	RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+   RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
 
-	return wsi_get_randr_output_display(physical_device,
-					    &pdevice->wsi_device,
-					    dpy,
-					    output,
-					    display);
+   return wsi_get_randr_output_display(physical_device, &pdevice->wsi_device, dpy, output, display);
 }
 #endif /* VK_USE_PLATFORM_XLIB_XRANDR_EXT */
 
 /* VK_EXT_display_control */
 
 VkResult
-radv_DisplayPowerControlEXT(VkDevice                    _device,
-			    VkDisplayKHR                display,
-			    const VkDisplayPowerInfoEXT *display_power_info)
+radv_DisplayPowerControlEXT(VkDevice _device, VkDisplayKHR display,
+                            const VkDisplayPowerInfoEXT *display_power_info)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device, device, _device);
 
-	return wsi_display_power_control(_device,
-					 &device->physical_device->wsi_device,
-					 display,
-					 display_power_info);
+   return wsi_display_power_control(_device, &device->physical_device->wsi_device, display,
+                                    display_power_info);
 }
 
 VkResult
-radv_RegisterDeviceEventEXT(VkDevice                    _device,
-			    const VkDeviceEventInfoEXT  *device_event_info,
-			    const VkAllocationCallbacks *allocator,
-			    VkFence                     *_fence)
+radv_RegisterDeviceEventEXT(VkDevice _device, const VkDeviceEventInfoEXT *device_event_info,
+                            const VkAllocationCallbacks *allocator, VkFence *_fence)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	VkResult ret;
-	int fd;
-
-	ret = radv_CreateFence(_device, &(VkFenceCreateInfo) {
-		.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
-		.pNext = &(VkExportFenceCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
-			.handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
-		},
-	}, allocator, _fence);
-	if (ret != VK_SUCCESS)
-		return ret;
-
-	RADV_FROM_HANDLE(radv_fence, fence, *_fence);
-
-	assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
-
-	if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
-		ret = VK_ERROR_OUT_OF_HOST_MEMORY;
-	} else {
-		ret = wsi_register_device_event(_device,
-						&device->physical_device->wsi_device,
-						device_event_info,
-						allocator,
-						NULL,
-						fd);
-		close(fd);
-	}
-
-	if (ret != VK_SUCCESS)
-		radv_DestroyFence(_device, *_fence, allocator);
-
-	return ret;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   VkResult ret;
+   int fd;
+
+   ret = radv_CreateFence(_device,
+                          &(VkFenceCreateInfo){
+                             .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+                             .pNext =
+                                &(VkExportFenceCreateInfo){
+                                   .sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
+                                   .handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
+                                },
+                          },
+                          allocator, _fence);
+   if (ret != VK_SUCCESS)
+      return ret;
+
+   RADV_FROM_HANDLE(radv_fence, fence, *_fence);
+
+   assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
+
+   if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
+      ret = VK_ERROR_OUT_OF_HOST_MEMORY;
+   } else {
+      ret = wsi_register_device_event(_device, &device->physical_device->wsi_device,
+                                      device_event_info, allocator, NULL, fd);
+      close(fd);
+   }
+
+   if (ret != VK_SUCCESS)
+      radv_DestroyFence(_device, *_fence, allocator);
+
+   return ret;
 }
 
 VkResult
-radv_RegisterDisplayEventEXT(VkDevice                           _device,
-			     VkDisplayKHR                       display,
-			     const VkDisplayEventInfoEXT        *display_event_info,
-			     const VkAllocationCallbacks        *allocator,
-			     VkFence                            *_fence)
+radv_RegisterDisplayEventEXT(VkDevice _device, VkDisplayKHR display,
+                             const VkDisplayEventInfoEXT *display_event_info,
+                             const VkAllocationCallbacks *allocator, VkFence *_fence)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
-	VkResult ret;
-	int fd;
-
-	ret = radv_CreateFence(_device, &(VkFenceCreateInfo) {
-		.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
-		.pNext = &(VkExportFenceCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
-			.handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
-		},
-	}, allocator, _fence);
-	if (ret != VK_SUCCESS)
-		return ret;
-
-	RADV_FROM_HANDLE(radv_fence, fence, *_fence);
-
-	assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
-
-	if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
-		ret = VK_ERROR_OUT_OF_HOST_MEMORY;
-	} else {
-		ret = wsi_register_display_event(_device,
-						 &device->physical_device->wsi_device,
-						 display,
-						 display_event_info,
-						 allocator,
-						 NULL,
-						 fd);
-		close(fd);
-	}
-
-	if (ret != VK_SUCCESS)
-		radv_DestroyFence(_device, *_fence, allocator);
-
-	return ret;
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   VkResult ret;
+   int fd;
+
+   ret = radv_CreateFence(_device,
+                          &(VkFenceCreateInfo){
+                             .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+                             .pNext =
+                                &(VkExportFenceCreateInfo){
+                                   .sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
+                                   .handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
+                                },
+                          },
+                          allocator, _fence);
+   if (ret != VK_SUCCESS)
+      return ret;
+
+   RADV_FROM_HANDLE(radv_fence, fence, *_fence);
+
+   assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
+
+   if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
+      ret = VK_ERROR_OUT_OF_HOST_MEMORY;
+   } else {
+      ret = wsi_register_display_event(_device, &device->physical_device->wsi_device, display,
+                                       display_event_info, allocator, NULL, fd);
+      close(fd);
+   }
+
+   if (ret != VK_SUCCESS)
+      radv_DestroyFence(_device, *_fence, allocator);
+
+   return ret;
 }
 
 VkResult
-radv_GetSwapchainCounterEXT(VkDevice                    _device,
-			    VkSwapchainKHR              swapchain,
-			    VkSurfaceCounterFlagBitsEXT flag_bits,
-			    uint64_t                    *value)
+radv_GetSwapchainCounterEXT(VkDevice _device, VkSwapchainKHR swapchain,
+                            VkSurfaceCounterFlagBitsEXT flag_bits, uint64_t *value)
 {
-	RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_device, device, _device);
 
-	return wsi_get_swapchain_counter(_device,
-					 &device->physical_device->wsi_device,
-					 swapchain,
-					 flag_bits,
-					 value);
+   return wsi_get_swapchain_counter(_device, &device->physical_device->wsi_device, swapchain,
+                                    flag_bits, value);
 }
-
diff --git a/src/amd/vulkan/radv_wsi_wayland.c b/src/amd/vulkan/radv_wsi_wayland.c
index dba5a0610a0..0f5751b3898 100644
--- a/src/amd/vulkan/radv_wsi_wayland.c
+++ b/src/amd/vulkan/radv_wsi_wayland.c
@@ -23,24 +23,22 @@
  * IN THE SOFTWARE.
  */
 
-#include "wsi_common_wayland.h"
 #include "radv_private.h"
+#include "wsi_common_wayland.h"
 
-VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(
-    VkPhysicalDevice                            physicalDevice,
-    uint32_t                                    queueFamilyIndex,
-    struct wl_display*                          display)
+VkBool32
+radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice,
+                                                    uint32_t queueFamilyIndex,
+                                                    struct wl_display *display)
 {
    RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
 
    return wsi_wl_get_presentation_support(&physical_device->wsi_device, display);
 }
 
-VkResult radv_CreateWaylandSurfaceKHR(
-    VkInstance                                  _instance,
-    const VkWaylandSurfaceCreateInfoKHR*        pCreateInfo,
-    const VkAllocationCallbacks*                pAllocator,
-    VkSurfaceKHR*                               pSurface)
+VkResult
+radv_CreateWaylandSurfaceKHR(VkInstance _instance, const VkWaylandSurfaceCreateInfoKHR *pCreateInfo,
+                             const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface)
 {
    RADV_FROM_HANDLE(radv_instance, instance, _instance);
    const VkAllocationCallbacks *alloc;
diff --git a/src/amd/vulkan/radv_wsi_x11.c b/src/amd/vulkan/radv_wsi_x11.c
index 8dee70555c1..e74cadf2012 100644
--- a/src/amd/vulkan/radv_wsi_x11.c
+++ b/src/amd/vulkan/radv_wsi_x11.c
@@ -27,64 +27,55 @@
 
 #include <X11/Xlib-xcb.h>
 #include <X11/xshmfence.h>
-#include <xcb/xcb.h>
 #include <xcb/dri3.h>
 #include <xcb/present.h>
+#include <xcb/xcb.h>
 
-#include "wsi_common_x11.h"
 #include "radv_private.h"
+#include "wsi_common_x11.h"
 
-VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR(
-    VkPhysicalDevice                            physicalDevice,
-    uint32_t                                    queueFamilyIndex,
-    xcb_connection_t*                           connection,
-    xcb_visualid_t                              visual_id)
+VkBool32
+radv_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice,
+                                                uint32_t queueFamilyIndex,
+                                                xcb_connection_t *connection,
+                                                xcb_visualid_t visual_id)
 {
    RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
 
-   return wsi_get_physical_device_xcb_presentation_support(
-      &device->wsi_device,
-      queueFamilyIndex,
-      connection, visual_id);
+   return wsi_get_physical_device_xcb_presentation_support(&device->wsi_device, queueFamilyIndex,
+                                                           connection, visual_id);
 }
 
-VkBool32 radv_GetPhysicalDeviceXlibPresentationSupportKHR(
-    VkPhysicalDevice                            physicalDevice,
-    uint32_t                                    queueFamilyIndex,
-    Display*                                    dpy,
-    VisualID                                    visualID)
+VkBool32
+radv_GetPhysicalDeviceXlibPresentationSupportKHR(VkPhysicalDevice physicalDevice,
+                                                 uint32_t queueFamilyIndex, Display *dpy,
+                                                 VisualID visualID)
 {
    RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
 
-   return wsi_get_physical_device_xcb_presentation_support(
-      &device->wsi_device,
-      queueFamilyIndex,
-      XGetXCBConnection(dpy), visualID);
+   return wsi_get_physical_device_xcb_presentation_support(&device->wsi_device, queueFamilyIndex,
+                                                           XGetXCBConnection(dpy), visualID);
 }
 
-VkResult radv_CreateXcbSurfaceKHR(
-    VkInstance                                  _instance,
-    const VkXcbSurfaceCreateInfoKHR*            pCreateInfo,
-    const VkAllocationCallbacks*                pAllocator,
-    VkSurfaceKHR*                               pSurface)
+VkResult
+radv_CreateXcbSurfaceKHR(VkInstance _instance, const VkXcbSurfaceCreateInfoKHR *pCreateInfo,
+                         const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface)
 {
    RADV_FROM_HANDLE(radv_instance, instance, _instance);
    const VkAllocationCallbacks *alloc;
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR);
 
    if (pAllocator)
-     alloc = pAllocator;
+      alloc = pAllocator;
    else
-     alloc = &instance->vk.alloc;
+      alloc = &instance->vk.alloc;
 
    return wsi_create_xcb_surface(alloc, pCreateInfo, pSurface);
 }
 
-VkResult radv_CreateXlibSurfaceKHR(
-    VkInstance                                  _instance,
-    const VkXlibSurfaceCreateInfoKHR*           pCreateInfo,
-    const VkAllocationCallbacks*                pAllocator,
-    VkSurfaceKHR*                               pSurface)
+VkResult
+radv_CreateXlibSurfaceKHR(VkInstance _instance, const VkXlibSurfaceCreateInfoKHR *pCreateInfo,
+                          const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface)
 {
    RADV_FROM_HANDLE(radv_instance, instance, _instance);
    const VkAllocationCallbacks *alloc;
@@ -92,9 +83,9 @@ VkResult radv_CreateXlibSurfaceKHR(
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR);
 
    if (pAllocator)
-     alloc = pAllocator;
+      alloc = pAllocator;
    else
-     alloc = &instance->vk.alloc;
+      alloc = &instance->vk.alloc;
 
    return wsi_create_xlib_surface(alloc, pCreateInfo, pSurface);
 }
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 90421222a81..eeccc33b790 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -27,2036 +27,1883 @@
 
 /* command buffer handling for AMD GCN */
 
+#include "radv_cs.h"
 #include "radv_private.h"
 #include "radv_shader.h"
-#include "radv_cs.h"
 #include "sid.h"
 
 static void
 si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
-                                  struct radeon_cmdbuf *cs,
-				  unsigned raster_config,
-				  unsigned raster_config_1)
+                                  struct radeon_cmdbuf *cs, unsigned raster_config,
+                                  unsigned raster_config_1)
 {
-	unsigned num_se = MAX2(physical_device->rad_info.max_se, 1);
-	unsigned raster_config_se[4];
-	unsigned se;
-
-	ac_get_harvested_configs(&physical_device->rad_info,
-				 raster_config,
-				 &raster_config_1,
-				 raster_config_se);
-
-	for (se = 0; se < num_se; se++) {
-		/* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
-		if (physical_device->rad_info.chip_class < GFX7)
-			radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
-					      S_00802C_SE_INDEX(se) |
-					      S_00802C_SH_BROADCAST_WRITES(1) |
-					      S_00802C_INSTANCE_BROADCAST_WRITES(1));
-		else
-			radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
-					       S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
-					       S_030800_INSTANCE_BROADCAST_WRITES(1));
-		radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
-	}
-
-	/* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
-	if (physical_device->rad_info.chip_class < GFX7)
-		radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
-				      S_00802C_SE_BROADCAST_WRITES(1) |
-				      S_00802C_SH_BROADCAST_WRITES(1) |
-				      S_00802C_INSTANCE_BROADCAST_WRITES(1));
-	else
-		radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
-				       S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
-				       S_030800_INSTANCE_BROADCAST_WRITES(1));
-
-	if (physical_device->rad_info.chip_class >= GFX7)
-		radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
+   unsigned num_se = MAX2(physical_device->rad_info.max_se, 1);
+   unsigned raster_config_se[4];
+   unsigned se;
+
+   ac_get_harvested_configs(&physical_device->rad_info, raster_config, &raster_config_1,
+                            raster_config_se);
+
+   for (se = 0; se < num_se; se++) {
+      /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
+      if (physical_device->rad_info.chip_class < GFX7)
+         radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
+                               S_00802C_SE_INDEX(se) | S_00802C_SH_BROADCAST_WRITES(1) |
+                                  S_00802C_INSTANCE_BROADCAST_WRITES(1));
+      else
+         radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
+                                S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
+                                   S_030800_INSTANCE_BROADCAST_WRITES(1));
+      radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
+   }
+
+   /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
+   if (physical_device->rad_info.chip_class < GFX7)
+      radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
+                            S_00802C_SE_BROADCAST_WRITES(1) | S_00802C_SH_BROADCAST_WRITES(1) |
+                               S_00802C_INSTANCE_BROADCAST_WRITES(1));
+   else
+      radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
+                             S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+                                S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+   if (physical_device->rad_info.chip_class >= GFX7)
+      radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
 }
 
 void
-si_emit_compute(struct radv_device *device,
-                struct radeon_cmdbuf *cs)
+si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
 {
-	radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
-	radeon_emit(cs, 0);
-	radeon_emit(cs, 0);
-	radeon_emit(cs, 0);
-
-	radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
-	/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
-	 * renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
-	radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
-	radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
-
-	if (device->physical_device->rad_info.chip_class >= GFX7) {
-		/* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
-		radeon_set_sh_reg_seq(cs,
-				      R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
-		radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) |
-			    S_00B858_SH1_CU_EN(0xffff));
-		radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) |
-			    S_00B858_SH1_CU_EN(0xffff));
-
-		if (device->border_color_data.bo) {
-			uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
-
-			radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2);
-			radeon_emit(cs, bc_va >> 8);
-			radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40));
-		}
-	}
-
-	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY,
-				       device->physical_device->rad_info.chip_class >= GFX10 ? 0x20 : 0);
-	}
-
-	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		radeon_set_sh_reg(cs, R_00B890_COMPUTE_USER_ACCUM_0, 0);
-		radeon_set_sh_reg(cs, R_00B894_COMPUTE_USER_ACCUM_1, 0);
-		radeon_set_sh_reg(cs, R_00B898_COMPUTE_USER_ACCUM_2, 0);
-		radeon_set_sh_reg(cs, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
-		radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
-		radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
-	}
-
-	/* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
-	 * and is now per pipe, so it should be handled in the
-	 * kernel if we want to use something other than the default value,
-	 * which is now 0x22f.
-	 */
-	if (device->physical_device->rad_info.chip_class <= GFX6) {
-		/* XXX: This should be:
-		 * (number of compute units) * 4 * (waves per simd) - 1 */
-
-		radeon_set_sh_reg(cs, R_00B82C_COMPUTE_MAX_WAVE_ID,
-		                  0x190 /* Default value */);
-
-		if (device->border_color_data.bo) {
-			uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
-			radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8);
-		}
-	}
-
-	if (device->tma_bo) {
-		uint64_t tba_va, tma_va;
-
-		assert(device->physical_device->rad_info.chip_class == GFX8);
-
-		tba_va = radv_buffer_get_va(device->trap_handler_shader->bo) +
-			 device->trap_handler_shader->bo_offset;
-		tma_va = radv_buffer_get_va(device->tma_bo);
-
-		radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4);
-		radeon_emit(cs, tba_va >> 8);
-		radeon_emit(cs, tba_va >> 40);
-		radeon_emit(cs, tma_va >> 8);
-		radeon_emit(cs, tma_va >> 40);
-	}
+   radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
+   radeon_emit(cs, 0);
+   radeon_emit(cs, 0);
+   radeon_emit(cs, 0);
+
+   radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
+   /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
+    * renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
+   radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
+   radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
+
+   if (device->physical_device->rad_info.chip_class >= GFX7) {
+      /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
+      radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
+      radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
+      radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
+
+      if (device->border_color_data.bo) {
+         uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
+
+         radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2);
+         radeon_emit(cs, bc_va >> 8);
+         radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40));
+      }
+   }
+
+   if (device->physical_device->rad_info.chip_class >= GFX9) {
+      radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY,
+                             device->physical_device->rad_info.chip_class >= GFX10 ? 0x20 : 0);
+   }
+
+   if (device->physical_device->rad_info.chip_class >= GFX10) {
+      radeon_set_sh_reg(cs, R_00B890_COMPUTE_USER_ACCUM_0, 0);
+      radeon_set_sh_reg(cs, R_00B894_COMPUTE_USER_ACCUM_1, 0);
+      radeon_set_sh_reg(cs, R_00B898_COMPUTE_USER_ACCUM_2, 0);
+      radeon_set_sh_reg(cs, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
+      radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
+      radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
+   }
+
+   /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
+    * and is now per pipe, so it should be handled in the
+    * kernel if we want to use something other than the default value,
+    * which is now 0x22f.
+    */
+   if (device->physical_device->rad_info.chip_class <= GFX6) {
+      /* XXX: This should be:
+       * (number of compute units) * 4 * (waves per simd) - 1 */
+
+      radeon_set_sh_reg(cs, R_00B82C_COMPUTE_MAX_WAVE_ID, 0x190 /* Default value */);
+
+      if (device->border_color_data.bo) {
+         uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
+         radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8);
+      }
+   }
+
+   if (device->tma_bo) {
+      uint64_t tba_va, tma_va;
+
+      assert(device->physical_device->rad_info.chip_class == GFX8);
+
+      tba_va = radv_buffer_get_va(device->trap_handler_shader->bo) +
+               device->trap_handler_shader->bo_offset;
+      tma_va = radv_buffer_get_va(device->tma_bo);
+
+      radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4);
+      radeon_emit(cs, tba_va >> 8);
+      radeon_emit(cs, tba_va >> 40);
+      radeon_emit(cs, tma_va >> 8);
+      radeon_emit(cs, tma_va >> 40);
+   }
 }
 
 /* 12.4 fixed-point */
-static unsigned radv_pack_float_12p4(float x)
+static unsigned
+radv_pack_float_12p4(float x)
 {
-	return x <= 0    ? 0 :
-	       x >= 4096 ? 0xffff : x * 16;
+   return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16;
 }
 
 static void
-si_set_raster_config(struct radv_physical_device *physical_device,
-		     struct radeon_cmdbuf *cs)
+si_set_raster_config(struct radv_physical_device *physical_device, struct radeon_cmdbuf *cs)
 {
-	unsigned num_rb = MIN2(physical_device->rad_info.max_render_backends, 16);
-	unsigned rb_mask = physical_device->rad_info.enabled_rb_mask;
-	unsigned raster_config, raster_config_1;
-
-	ac_get_raster_config(&physical_device->rad_info,
-			     &raster_config,
-			     &raster_config_1, NULL);
-
-	/* Always use the default config when all backends are enabled
-	 * (or when we failed to determine the enabled backends).
-	 */
-	if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
-		radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG,
-				       raster_config);
-		if (physical_device->rad_info.chip_class >= GFX7)
-			radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1,
-					       raster_config_1);
-	} else {
-		si_write_harvested_raster_configs(physical_device, cs,
-						  raster_config,
-						  raster_config_1);
-	}
+   unsigned num_rb = MIN2(physical_device->rad_info.max_render_backends, 16);
+   unsigned rb_mask = physical_device->rad_info.enabled_rb_mask;
+   unsigned raster_config, raster_config_1;
+
+   ac_get_raster_config(&physical_device->rad_info, &raster_config, &raster_config_1, NULL);
+
+   /* Always use the default config when all backends are enabled
+    * (or when we failed to determine the enabled backends).
+    */
+   if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
+      radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config);
+      if (physical_device->rad_info.chip_class >= GFX7)
+         radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
+   } else {
+      si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1);
+   }
 }
 
 void
-si_emit_graphics(struct radv_device *device,
-		 struct radeon_cmdbuf *cs)
+si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
 {
-	struct radv_physical_device *physical_device = device->physical_device;
-
-	bool has_clear_state = physical_device->rad_info.has_clear_state;
-	int i;
-
-	radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
-	radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
-	radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
-
-	if (has_clear_state) {
-		radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
-		radeon_emit(cs, 0);
-	}
-
-	if (physical_device->rad_info.chip_class <= GFX8)
-		si_set_raster_config(physical_device, cs);
-
-	radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
-	if (!has_clear_state)
-		radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
-
-	/* FIXME calculate these values somehow ??? */
-	if (physical_device->rad_info.chip_class <= GFX8) {
-		radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
-		radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40);
-	}
-
-	if (!has_clear_state) {
-		radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);
-		radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
-		radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
-	}
-
-	if (physical_device->rad_info.chip_class <= GFX9)
-		radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
-	if (!has_clear_state)
-		radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
-	if (physical_device->rad_info.chip_class < GFX7)
-		radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
-				      S_008A14_CLIP_VTX_REORDER_ENA(1));
-
-	if (!has_clear_state)
-		radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
-
-	/* CLEAR_STATE doesn't clear these correctly on certain generations.
-	 * I don't know why. Deduced by trial and error.
-	 */
-	if (physical_device->rad_info.chip_class <= GFX7 || !has_clear_state) {
-		radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
-		radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL,
-				       S_028204_WINDOW_OFFSET_DISABLE(1));
-		radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL,
-				       S_028240_WINDOW_OFFSET_DISABLE(1));
-		radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
-				       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
-		radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
-		radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
-				       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
-	}
-
-	if (!has_clear_state) {
-		for (i = 0; i < 16; i++) {
-			radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
-			radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
-		}
-	}
-
-	if (!has_clear_state) {
-		radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
-		radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
-		/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on GFX6 */
-		radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
-		radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
-		radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
-		radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
-		radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
-	}
-
-	radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE,
-			       S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
-			       S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
-
-	if (physical_device->rad_info.chip_class >= GFX10) {
-		radeon_set_context_reg(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);
-		radeon_set_uconfig_reg(cs, R_030964_GE_MAX_VTX_INDX, ~0);
-		radeon_set_uconfig_reg(cs, R_030924_GE_MIN_VTX_INDX, 0);
-		radeon_set_uconfig_reg(cs, R_030928_GE_INDX_OFFSET, 0);
-		radeon_set_uconfig_reg(cs, R_03097C_GE_STEREO_CNTL, 0);
-		radeon_set_uconfig_reg(cs, R_030988_GE_USER_VGPR_EN, 0);
-	} else if (physical_device->rad_info.chip_class == GFX9) {
-		radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0);
-		radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0);
-		radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
-	} else {
-		/* These registers, when written, also overwrite the
-		 * CLEAR_STATE context, so we can't rely on CLEAR_STATE setting
-		 * them.  It would be an issue if there was another UMD
-		 * changing them.
-		 */
-		radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
-		radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
-		radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
-	}
-
-	unsigned cu_mask_ps = 0xffffffff;
-
-	/* It's wasteful to enable all CUs for PS if shader arrays have a
-	 * different number of CUs. The reason is that the hardware sends the
-	 * same number of PS waves to each shader array, so the slowest shader
-	 * array limits the performance.  Disable the extra CUs for PS in
-	 * other shader arrays to save power and thus increase clocks for busy
-	 * CUs. In the future, we might disable or enable this tweak only for
-	 * certain apps.
-	 */
-	if (physical_device->rad_info.chip_class >= GFX10_3)
-		cu_mask_ps = u_bit_consecutive(0, physical_device->rad_info.min_good_cu_per_sa);
-
-	if (physical_device->rad_info.chip_class >= GFX7) {
-		if (physical_device->rad_info.chip_class >= GFX10) {
-			/* Logical CUs 16 - 31 */
-			radeon_set_sh_reg_idx(physical_device, cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
-					      3, S_00B404_CU_EN(0xffff));
-			radeon_set_sh_reg_idx(physical_device, cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS,
-					      3, S_00B104_CU_EN(0xffff));
-			radeon_set_sh_reg_idx(physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
-					      3, S_00B004_CU_EN(cu_mask_ps >> 16));
-		}
-
-		if (physical_device->rad_info.chip_class >= GFX9) {
-			radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
-					      3, S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
-		} else {
-			radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
-					  S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
-			radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
-					  S_00B41C_WAVE_LIMIT(0x3F));
-			radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
-					  S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
-			/* If this is 0, Bonaire can hang even if GS isn't being used.
-			 * Other chips are unaffected. These are suboptimal values,
-			 * but we don't use on-chip GS.
-			 */
-			radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
-					       S_028A44_ES_VERTS_PER_SUBGRP(64) |
-					       S_028A44_GS_PRIMS_PER_SUBGRP(4));
-		}
-
-		/* Compute LATE_ALLOC_VS.LIMIT. */
-		unsigned num_cu_per_sh = physical_device->rad_info.min_good_cu_per_sa;
-		unsigned late_alloc_wave64 = 0; /* The limit is per SA. */
-		unsigned late_alloc_wave64_gs = 0;
-		unsigned cu_mask_vs = 0xffff;
-		unsigned cu_mask_gs = 0xffff;
-
-		if (physical_device->rad_info.chip_class >= GFX10) {
-			/* For Wave32, the hw will launch twice the number of late
-			 * alloc waves, so 1 == 2x wave32.
-			 */
-			if (!physical_device->rad_info.use_late_alloc) {
-				late_alloc_wave64 = 0;
-			} else if (num_cu_per_sh <= 6) {
-				late_alloc_wave64 = num_cu_per_sh - 2;
-			} else {
-				late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
-
-				/* Gfx10: CU2 & CU3 must be disabled to
-				 * prevent a hw deadlock.  Others: CU1 must be
-				 * disabled to prevent a hw deadlock.
-				 *
-				 * The deadlock is caused by late alloc, which
-				 * usually increases performance.
-				 */
-				cu_mask_vs &= physical_device->rad_info.chip_class == GFX10 ?
-					      ~BITFIELD_RANGE(2, 2) : ~BITFIELD_RANGE(1, 1);
-
-				if (physical_device->use_ngg) {
-					cu_mask_gs = cu_mask_vs;
-				}
-			}
-
-			late_alloc_wave64_gs = late_alloc_wave64;
-
-			/* Don't use late alloc for NGG on Navi14 due to a hw
-			 * bug. If NGG is never used, enable all CUs.
-			 */
-			if (!physical_device->use_ngg ||
-			    physical_device->rad_info.family == CHIP_NAVI14) {
-				late_alloc_wave64_gs = 0;
-				cu_mask_gs = 0xffff;
-			}
-
-			/* Limit LATE_ALLOC_GS for prevent a hang (hw bug). */
-			if (physical_device->rad_info.chip_class == GFX10)
-				late_alloc_wave64_gs = MIN2(late_alloc_wave64_gs, 64);
-		} else {
-			if (!physical_device->rad_info.use_late_alloc) {
-				late_alloc_wave64 = 0;
-			} else if (num_cu_per_sh <= 4) {
-				/* Too few available compute units per SA.
-				 * Disallowing VS to run on one CU could hurt
-				 * us more than late VS allocation would help.
-				 *
-				 * 2 is the highest safe number that allows us
-				 * to keep all CUs enabled.
-				 */
-				late_alloc_wave64 = 2;
-			} else {
-				/* This is a good initial value, allowing 1
-				 * late_alloc wave per SIMD on num_cu - 2.
-				 */
-				late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
-			}
-
-			if (late_alloc_wave64 > 2)
-				cu_mask_vs = 0xfffe; /* 1 CU disabled */
-		}
-
-		radeon_set_sh_reg_idx(physical_device, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
-				      3, S_00B118_CU_EN(cu_mask_vs) |
-				      S_00B118_WAVE_LIMIT(0x3F));
-		radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS,
-				  S_00B11C_LIMIT(late_alloc_wave64));
-
-		radeon_set_sh_reg_idx(physical_device, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
-				      3, S_00B21C_CU_EN(cu_mask_gs) | S_00B21C_WAVE_LIMIT(0x3F));
-
-		if (physical_device->rad_info.chip_class >= GFX10) {
-			radeon_set_sh_reg_idx(physical_device, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
-					      3, S_00B204_CU_EN(0xffff) |
-					      S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64_gs));
-		}
-
-		radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
-				      3, S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
-	}
-
-	if (physical_device->rad_info.chip_class >= GFX10) {
-		/* Break up a pixel wave if it contains deallocs for more than
-		 * half the parameter cache.
-		 *
-		 * To avoid a deadlock where pixel waves aren't launched
-		 * because they're waiting for more pixels while the frontend
-		 * is stuck waiting for PC space, the maximum allowed value is
-		 * the size of the PC minus the largest possible allocation for
-		 * a single primitive shader subgroup.
-		 */
-		radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL,
-				       S_028C50_MAX_DEALLOCS_IN_WAVE(512));
-		radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
-
-		/* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */
-		unsigned meta_write_policy, meta_read_policy;
-
-		/* TODO: investigate whether LRU improves performance on other chips too */
-		if (physical_device->rad_info.max_render_backends <= 4) {
-			meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
-			meta_read_policy =  V_02807C_CACHE_LRU_RD; /* cache reads */
-		} else {
-			meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */
-			meta_read_policy =  V_02807C_CACHE_NOA;    /* don't cache reads */
-		}
-
-		radeon_set_context_reg(cs, R_02807C_DB_RMI_L2_CACHE_CONTROL,
-				       S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) |
-				       S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |
-				       S_02807C_HTILE_WR_POLICY(meta_write_policy) |
-				       S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) |
-				       S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA) |
-				       S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA) |
-				       S_02807C_HTILE_RD_POLICY(meta_read_policy));
-
-		radeon_set_context_reg(cs, R_028410_CB_RMI_GL2_CACHE_CONTROL,
-				       S_028410_CMASK_WR_POLICY(meta_write_policy) |
-				       S_028410_FMASK_WR_POLICY(meta_write_policy) |
-				       S_028410_DCC_WR_POLICY(meta_write_policy) |
-				       S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM) |
-				       S_028410_CMASK_RD_POLICY(meta_read_policy) |
-				       S_028410_FMASK_RD_POLICY(meta_read_policy) |
-				       S_028410_DCC_RD_POLICY(meta_read_policy) |
-				       S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA));
-		radeon_set_context_reg(cs, R_028428_CB_COVERAGE_OUT_CONTROL, 0);
-
-		radeon_set_sh_reg(cs, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
-		radeon_set_sh_reg(cs, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
-		radeon_set_sh_reg(cs, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
-		radeon_set_sh_reg(cs, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
-		radeon_set_sh_reg(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
-		radeon_set_sh_reg(cs, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
-		radeon_set_sh_reg(cs, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
-		radeon_set_sh_reg(cs, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
-		radeon_set_sh_reg(cs, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
-		radeon_set_sh_reg(cs, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
-		radeon_set_sh_reg(cs, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
-		radeon_set_sh_reg(cs, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
-		radeon_set_sh_reg(cs, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
-		radeon_set_sh_reg(cs, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
-		radeon_set_sh_reg(cs, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
-		radeon_set_sh_reg(cs, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
-
-		radeon_set_sh_reg(cs, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
-				  S_00B0C0_SOFT_GROUPING_EN(1) |
-				  S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
-		radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
-
-		if (physical_device->rad_info.chip_class >= GFX10_3) {
-			radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
-                        /* This allows sample shading. */
-			radeon_set_context_reg(cs, R_028848_PA_CL_VRS_CNTL,
-					       S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
-		}
-
-		if (physical_device->rad_info.chip_class == GFX10) {
-			/* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */
-			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-			radeon_emit(cs, EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0));
-		}
-
-		/* TODO: For culling, replace 128 with 256. */
-		radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC,
-				       S_030980_OVERSUB_EN(physical_device->rad_info.use_late_alloc) |
-				       S_030980_NUM_PC_LINES(128 * physical_device->rad_info.max_se - 1));
-	}
-
-	if (physical_device->rad_info.chip_class >= GFX9) {
-		radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
-				       S_028B50_ACCUM_ISOLINE(40) |
-				       S_028B50_ACCUM_TRI(30) |
-				       S_028B50_ACCUM_QUAD(24) |
-				       S_028B50_DONUT_SPLIT(24) |
-				       S_028B50_TRAP_SPLIT(6));
-	} else if (physical_device->rad_info.chip_class >= GFX8) {
-		uint32_t vgt_tess_distribution;
-
-		vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
-			S_028B50_ACCUM_TRI(11) |
-			S_028B50_ACCUM_QUAD(11) |
-			S_028B50_DONUT_SPLIT(16);
-
-		if (physical_device->rad_info.family == CHIP_FIJI ||
-		    physical_device->rad_info.family >= CHIP_POLARIS10)
-			vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
-
-		radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
-				       vgt_tess_distribution);
-	} else if (!has_clear_state) {
-		radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
-		radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
-	}
-
-	if (device->border_color_data.bo) {
-		uint64_t border_color_va = radv_buffer_get_va(device->border_color_data.bo);
-
-		radeon_set_context_reg(cs, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
-		if (physical_device->rad_info.chip_class >= GFX7) {
-			radeon_set_context_reg(cs, R_028084_TA_BC_BASE_ADDR_HI,
-					       S_028084_ADDRESS(border_color_va >> 40));
-		}
-	}
-
-	if (physical_device->rad_info.chip_class >= GFX9) {
-		radeon_set_context_reg(cs, R_028C48_PA_SC_BINNER_CNTL_1,
-				       S_028C48_MAX_ALLOC_COUNT(physical_device->rad_info.pbb_max_alloc_count - 1) |
-				       S_028C48_MAX_PRIM_PER_BATCH(1023));
-		radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
-				       S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
-		radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0);
-	}
-
-	unsigned tmp = (unsigned)(1.0 * 8.0);
-	radeon_set_context_reg_seq(cs, R_028A00_PA_SU_POINT_SIZE, 1);
-	radeon_emit(cs, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
-	radeon_set_context_reg_seq(cs, R_028A04_PA_SU_POINT_MINMAX, 1);
-	radeon_emit(cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
-		    S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875/2)));
-
-	if (!has_clear_state) {
-		radeon_set_context_reg(cs, R_028004_DB_COUNT_CONTROL,
-				       S_028004_ZPASS_INCREMENT_DISABLE(1));
-	}
-
-	/* Enable the Polaris small primitive filter control.
-	 * XXX: There is possibly an issue when MSAA is off (see RadeonSI
-	 * has_msaa_sample_loc_bug). But this doesn't seem to regress anything,
-	 * and AMDVLK doesn't have a workaround as well.
-	 */
-	if (physical_device->rad_info.family >= CHIP_POLARIS10) {
-		unsigned small_prim_filter_cntl =
-			S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
-			/* Workaround for a hw line bug. */
-			S_028830_LINE_FILTER_DISABLE(physical_device->rad_info.family <= CHIP_POLARIS12);
-
-		radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
-				       small_prim_filter_cntl);
-	}
-
-	radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0,
-	                       S_0286D4_FLAT_SHADE_ENA(1) |
-	                       S_0286D4_PNT_SPRITE_ENA(1) |
-	                       S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
-	                       S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
-	                       S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
-	                       S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
-	                       S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
-
-	radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL,
-	                       S_028BE4_PIX_CENTER(1) |
-	                       S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
-	                       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
-
-	radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL,
-			       S_028818_VTX_W0_FMT(1) |
-			       S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
-			       S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
-			       S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
-
-	if (device->tma_bo) {
-		uint64_t tba_va, tma_va;
-
-		assert(device->physical_device->rad_info.chip_class == GFX8);
-
-		tba_va = radv_buffer_get_va(device->trap_handler_shader->bo) +
-			 device->trap_handler_shader->bo_offset;
-		tma_va = radv_buffer_get_va(device->tma_bo);
-
-		uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS,
-				   R_00B100_SPI_SHADER_TBA_LO_VS,
-				   R_00B200_SPI_SHADER_TBA_LO_GS,
-				   R_00B300_SPI_SHADER_TBA_LO_ES,
-				   R_00B400_SPI_SHADER_TBA_LO_HS,
-				   R_00B500_SPI_SHADER_TBA_LO_LS};
-
-		for (i = 0; i < ARRAY_SIZE(regs); ++i) {
-			radeon_set_sh_reg_seq(cs, regs[i], 4);
-			radeon_emit(cs, tba_va >> 8);
-			radeon_emit(cs, tba_va >> 40);
-			radeon_emit(cs, tma_va >> 8);
-			radeon_emit(cs, tma_va >> 40);
-		}
-	}
-
-	si_emit_compute(device, cs);
+   struct radv_physical_device *physical_device = device->physical_device;
+
+   bool has_clear_state = physical_device->rad_info.has_clear_state;
+   int i;
+
+   radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+   radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
+   radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
+
+   if (has_clear_state) {
+      radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
+      radeon_emit(cs, 0);
+   }
+
+   if (physical_device->rad_info.chip_class <= GFX8)
+      si_set_raster_config(physical_device, cs);
+
+   radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
+   if (!has_clear_state)
+      radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
+
+   /* FIXME calculate these values somehow ??? */
+   if (physical_device->rad_info.chip_class <= GFX8) {
+      radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
+      radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40);
+   }
+
+   if (!has_clear_state) {
+      radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);
+      radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
+      radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
+   }
+
+   if (physical_device->rad_info.chip_class <= GFX9)
+      radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
+   if (!has_clear_state)
+      radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
+   if (physical_device->rad_info.chip_class < GFX7)
+      radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE,
+                            S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
+
+   if (!has_clear_state)
+      radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
+
+   /* CLEAR_STATE doesn't clear these correctly on certain generations.
+    * I don't know why. Deduced by trial and error.
+    */
+   if (physical_device->rad_info.chip_class <= GFX7 || !has_clear_state) {
+      radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
+      radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL,
+                             S_028204_WINDOW_OFFSET_DISABLE(1));
+      radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL,
+                             S_028240_WINDOW_OFFSET_DISABLE(1));
+      radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
+                             S_028244_BR_X(16384) | S_028244_BR_Y(16384));
+      radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
+      radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
+                             S_028034_BR_X(16384) | S_028034_BR_Y(16384));
+   }
+
+   if (!has_clear_state) {
+      for (i = 0; i < 16; i++) {
+         radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i * 8, 0);
+         radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i * 8, fui(1.0));
+      }
+   }
+
+   if (!has_clear_state) {
+      radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
+      radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
+      /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on GFX6 */
+      radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+      radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
+      radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
+      radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
+      radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
+   }
+
+   radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE,
+                          S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
+                             S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
+
+   if (physical_device->rad_info.chip_class >= GFX10) {
+      radeon_set_context_reg(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);
+      radeon_set_uconfig_reg(cs, R_030964_GE_MAX_VTX_INDX, ~0);
+      radeon_set_uconfig_reg(cs, R_030924_GE_MIN_VTX_INDX, 0);
+      radeon_set_uconfig_reg(cs, R_030928_GE_INDX_OFFSET, 0);
+      radeon_set_uconfig_reg(cs, R_03097C_GE_STEREO_CNTL, 0);
+      radeon_set_uconfig_reg(cs, R_030988_GE_USER_VGPR_EN, 0);
+   } else if (physical_device->rad_info.chip_class == GFX9) {
+      radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0);
+      radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0);
+      radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
+   } else {
+      /* These registers, when written, also overwrite the
+       * CLEAR_STATE context, so we can't rely on CLEAR_STATE setting
+       * them.  It would be an issue if there was another UMD
+       * changing them.
+       */
+      radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
+      radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
+      radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
+   }
+
+   unsigned cu_mask_ps = 0xffffffff;
+
+   /* It's wasteful to enable all CUs for PS if shader arrays have a
+    * different number of CUs. The reason is that the hardware sends the
+    * same number of PS waves to each shader array, so the slowest shader
+    * array limits the performance.  Disable the extra CUs for PS in
+    * other shader arrays to save power and thus increase clocks for busy
+    * CUs. In the future, we might disable or enable this tweak only for
+    * certain apps.
+    */
+   if (physical_device->rad_info.chip_class >= GFX10_3)
+      cu_mask_ps = u_bit_consecutive(0, physical_device->rad_info.min_good_cu_per_sa);
+
+   if (physical_device->rad_info.chip_class >= GFX7) {
+      if (physical_device->rad_info.chip_class >= GFX10) {
+         /* Logical CUs 16 - 31 */
+         radeon_set_sh_reg_idx(physical_device, cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, 3,
+                               S_00B404_CU_EN(0xffff));
+         radeon_set_sh_reg_idx(physical_device, cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, 3,
+                               S_00B104_CU_EN(0xffff));
+         radeon_set_sh_reg_idx(physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3,
+                               S_00B004_CU_EN(cu_mask_ps >> 16));
+      }
+
+      if (physical_device->rad_info.chip_class >= GFX9) {
+         radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 3,
+                               S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
+      } else {
+         radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+                           S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+         radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
+         radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+                           S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
+         /* If this is 0, Bonaire can hang even if GS isn't being used.
+          * Other chips are unaffected. These are suboptimal values,
+          * but we don't use on-chip GS.
+          */
+         radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+                                S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
+      }
+
+      /* Compute LATE_ALLOC_VS.LIMIT. */
+      unsigned num_cu_per_sh = physical_device->rad_info.min_good_cu_per_sa;
+      unsigned late_alloc_wave64 = 0; /* The limit is per SA. */
+      unsigned late_alloc_wave64_gs = 0;
+      unsigned cu_mask_vs = 0xffff;
+      unsigned cu_mask_gs = 0xffff;
+
+      if (physical_device->rad_info.chip_class >= GFX10) {
+         /* For Wave32, the hw will launch twice the number of late
+          * alloc waves, so 1 == 2x wave32.
+          */
+         if (!physical_device->rad_info.use_late_alloc) {
+            late_alloc_wave64 = 0;
+         } else if (num_cu_per_sh <= 6) {
+            late_alloc_wave64 = num_cu_per_sh - 2;
+         } else {
+            late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
+
+            /* Gfx10: CU2 & CU3 must be disabled to
+             * prevent a hw deadlock.  Others: CU1 must be
+             * disabled to prevent a hw deadlock.
+             *
+             * The deadlock is caused by late alloc, which
+             * usually increases performance.
+             */
+            cu_mask_vs &= physical_device->rad_info.chip_class == GFX10 ? ~BITFIELD_RANGE(2, 2)
+                                                                        : ~BITFIELD_RANGE(1, 1);
+
+            if (physical_device->use_ngg) {
+               cu_mask_gs = cu_mask_vs;
+            }
+         }
+
+         late_alloc_wave64_gs = late_alloc_wave64;
+
+         /* Don't use late alloc for NGG on Navi14 due to a hw
+          * bug. If NGG is never used, enable all CUs.
+          */
+         if (!physical_device->use_ngg || physical_device->rad_info.family == CHIP_NAVI14) {
+            late_alloc_wave64_gs = 0;
+            cu_mask_gs = 0xffff;
+         }
+
+         /* Limit LATE_ALLOC_GS for prevent a hang (hw bug). */
+         if (physical_device->rad_info.chip_class == GFX10)
+            late_alloc_wave64_gs = MIN2(late_alloc_wave64_gs, 64);
+      } else {
+         if (!physical_device->rad_info.use_late_alloc) {
+            late_alloc_wave64 = 0;
+         } else if (num_cu_per_sh <= 4) {
+            /* Too few available compute units per SA.
+             * Disallowing VS to run on one CU could hurt
+             * us more than late VS allocation would help.
+             *
+             * 2 is the highest safe number that allows us
+             * to keep all CUs enabled.
+             */
+            late_alloc_wave64 = 2;
+         } else {
+            /* This is a good initial value, allowing 1
+             * late_alloc wave per SIMD on num_cu - 2.
+             */
+            late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
+         }
+
+         if (late_alloc_wave64 > 2)
+            cu_mask_vs = 0xfffe; /* 1 CU disabled */
+      }
+
+      radeon_set_sh_reg_idx(physical_device, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3,
+                            S_00B118_CU_EN(cu_mask_vs) | S_00B118_WAVE_LIMIT(0x3F));
+      radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
+
+      radeon_set_sh_reg_idx(physical_device, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
+                            S_00B21C_CU_EN(cu_mask_gs) | S_00B21C_WAVE_LIMIT(0x3F));
+
+      if (physical_device->rad_info.chip_class >= GFX10) {
+         radeon_set_sh_reg_idx(
+            physical_device, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
+            S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64_gs));
+      }
+
+      radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3,
+                            S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
+   }
+
+   if (physical_device->rad_info.chip_class >= GFX10) {
+      /* Break up a pixel wave if it contains deallocs for more than
+       * half the parameter cache.
+       *
+       * To avoid a deadlock where pixel waves aren't launched
+       * because they're waiting for more pixels while the frontend
+       * is stuck waiting for PC space, the maximum allowed value is
+       * the size of the PC minus the largest possible allocation for
+       * a single primitive shader subgroup.
+       */
+      radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
+      radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
+
+      /* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */
+      unsigned meta_write_policy, meta_read_policy;
+
+      /* TODO: investigate whether LRU improves performance on other chips too */
+      if (physical_device->rad_info.max_render_backends <= 4) {
+         meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
+         meta_read_policy = V_02807C_CACHE_LRU_RD;  /* cache reads */
+      } else {
+         meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */
+         meta_read_policy = V_02807C_CACHE_NOA;     /* don't cache reads */
+      }
+
+      radeon_set_context_reg(
+         cs, R_02807C_DB_RMI_L2_CACHE_CONTROL,
+         S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |
+            S_02807C_HTILE_WR_POLICY(meta_write_policy) |
+            S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) |
+            S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA) | S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA) |
+            S_02807C_HTILE_RD_POLICY(meta_read_policy));
+
+      radeon_set_context_reg(
+         cs, R_028410_CB_RMI_GL2_CACHE_CONTROL,
+         S_028410_CMASK_WR_POLICY(meta_write_policy) | S_028410_FMASK_WR_POLICY(meta_write_policy) |
+            S_028410_DCC_WR_POLICY(meta_write_policy) |
+            S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM) |
+            S_028410_CMASK_RD_POLICY(meta_read_policy) |
+            S_028410_FMASK_RD_POLICY(meta_read_policy) | S_028410_DCC_RD_POLICY(meta_read_policy) |
+            S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA));
+      radeon_set_context_reg(cs, R_028428_CB_COVERAGE_OUT_CONTROL, 0);
+
+      radeon_set_sh_reg(cs, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
+      radeon_set_sh_reg(cs, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
+      radeon_set_sh_reg(cs, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
+      radeon_set_sh_reg(cs, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
+      radeon_set_sh_reg(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
+      radeon_set_sh_reg(cs, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
+      radeon_set_sh_reg(cs, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
+      radeon_set_sh_reg(cs, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
+      radeon_set_sh_reg(cs, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
+      radeon_set_sh_reg(cs, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
+      radeon_set_sh_reg(cs, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
+      radeon_set_sh_reg(cs, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
+      radeon_set_sh_reg(cs, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
+      radeon_set_sh_reg(cs, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
+      radeon_set_sh_reg(cs, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
+      radeon_set_sh_reg(cs, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
+
+      radeon_set_sh_reg(cs, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
+                        S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
+      radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
+
+      if (physical_device->rad_info.chip_class >= GFX10_3) {
+         radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
+         /* This allows sample shading. */
+         radeon_set_context_reg(
+            cs, R_028848_PA_CL_VRS_CNTL,
+            S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
+      }
+
+      if (physical_device->rad_info.chip_class == GFX10) {
+         /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */
+         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+         radeon_emit(cs, EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0));
+      }
+
+      /* TODO: For culling, replace 128 with 256. */
+      radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC,
+                             S_030980_OVERSUB_EN(physical_device->rad_info.use_late_alloc) |
+                                S_030980_NUM_PC_LINES(128 * physical_device->rad_info.max_se - 1));
+   }
+
+   if (physical_device->rad_info.chip_class >= GFX9) {
+      radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
+                             S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) |
+                                S_028B50_ACCUM_QUAD(24) | S_028B50_DONUT_SPLIT(24) |
+                                S_028B50_TRAP_SPLIT(6));
+   } else if (physical_device->rad_info.chip_class >= GFX8) {
+      uint32_t vgt_tess_distribution;
+
+      vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) |
+                              S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT(16);
+
+      if (physical_device->rad_info.family == CHIP_FIJI ||
+          physical_device->rad_info.family >= CHIP_POLARIS10)
+         vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
+
+      radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
+   } else if (!has_clear_state) {
+      radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
+      radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
+   }
+
+   if (device->border_color_data.bo) {
+      uint64_t border_color_va = radv_buffer_get_va(device->border_color_data.bo);
+
+      radeon_set_context_reg(cs, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
+      if (physical_device->rad_info.chip_class >= GFX7) {
+         radeon_set_context_reg(cs, R_028084_TA_BC_BASE_ADDR_HI,
+                                S_028084_ADDRESS(border_color_va >> 40));
+      }
+   }
+
+   if (physical_device->rad_info.chip_class >= GFX9) {
+      radeon_set_context_reg(
+         cs, R_028C48_PA_SC_BINNER_CNTL_1,
+         S_028C48_MAX_ALLOC_COUNT(physical_device->rad_info.pbb_max_alloc_count - 1) |
+            S_028C48_MAX_PRIM_PER_BATCH(1023));
+      radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
+                             S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
+      radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0);
+   }
+
+   unsigned tmp = (unsigned)(1.0 * 8.0);
+   radeon_set_context_reg_seq(cs, R_028A00_PA_SU_POINT_SIZE, 1);
+   radeon_emit(cs, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
+   radeon_set_context_reg_seq(cs, R_028A04_PA_SU_POINT_MINMAX, 1);
+   radeon_emit(cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
+                      S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875 / 2)));
+
+   if (!has_clear_state) {
+      radeon_set_context_reg(cs, R_028004_DB_COUNT_CONTROL, S_028004_ZPASS_INCREMENT_DISABLE(1));
+   }
+
+   /* Enable the Polaris small primitive filter control.
+    * XXX: There is possibly an issue when MSAA is off (see RadeonSI
+    * has_msaa_sample_loc_bug). But this doesn't seem to regress anything,
+    * and AMDVLK doesn't have a workaround as well.
+    */
+   if (physical_device->rad_info.family >= CHIP_POLARIS10) {
+      unsigned small_prim_filter_cntl =
+         S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
+         /* Workaround for a hw line bug. */
+         S_028830_LINE_FILTER_DISABLE(physical_device->rad_info.family <= CHIP_POLARIS12);
+
+      radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl);
+   }
+
+   radeon_set_context_reg(
+      cs, R_0286D4_SPI_INTERP_CONTROL_0,
+      S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) |
+         S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
+         S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
+         S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
+         S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
+         S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
+
+   radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL,
+                          S_028BE4_PIX_CENTER(1) | S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
+                             S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
+
+   radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL,
+                          S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) |
+                             S_028818_VPORT_X_OFFSET_ENA(1) | S_028818_VPORT_Y_SCALE_ENA(1) |
+                             S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) |
+                             S_028818_VPORT_Z_OFFSET_ENA(1));
+
+   if (device->tma_bo) {
+      uint64_t tba_va, tma_va;
+
+      assert(device->physical_device->rad_info.chip_class == GFX8);
+
+      tba_va = radv_buffer_get_va(device->trap_handler_shader->bo) +
+               device->trap_handler_shader->bo_offset;
+      tma_va = radv_buffer_get_va(device->tma_bo);
+
+      uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS, R_00B100_SPI_SHADER_TBA_LO_VS,
+                         R_00B200_SPI_SHADER_TBA_LO_GS, R_00B300_SPI_SHADER_TBA_LO_ES,
+                         R_00B400_SPI_SHADER_TBA_LO_HS, R_00B500_SPI_SHADER_TBA_LO_LS};
+
+      for (i = 0; i < ARRAY_SIZE(regs); ++i) {
+         radeon_set_sh_reg_seq(cs, regs[i], 4);
+         radeon_emit(cs, tba_va >> 8);
+         radeon_emit(cs, tba_va >> 40);
+         radeon_emit(cs, tma_va >> 8);
+         radeon_emit(cs, tma_va >> 40);
+      }
+   }
+
+   si_emit_compute(device, cs);
 }
 
 void
 cik_create_gfx_config(struct radv_device *device)
 {
-	struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, RING_GFX);
-	if (!cs)
-		return;
-
-	si_emit_graphics(device, cs);
-
-	while (cs->cdw & 7) {
-		if (device->physical_device->rad_info.gfx_ib_pad_with_type2)
-			radeon_emit(cs, PKT2_NOP_PAD);
-		else
-			radeon_emit(cs, PKT3_NOP_PAD);
-	}
-
-	device->gfx_init = device->ws->buffer_create(device->ws,
-						     cs->cdw * 4, 4096,
-						     radv_cmdbuffer_domain(&device->physical_device->rad_info,
-									   device->instance->perftest_flags),
-						     RADEON_FLAG_CPU_ACCESS|
-						     RADEON_FLAG_NO_INTERPROCESS_SHARING |
-						     RADEON_FLAG_READ_ONLY |
-						     RADEON_FLAG_GTT_WC,
-						     RADV_BO_PRIORITY_CS);
-	if (!device->gfx_init)
-		goto fail;
-
-	void *map = device->ws->buffer_map(device->gfx_init);
-	if (!map) {
-		device->ws->buffer_destroy(device->ws, device->gfx_init);
-		device->gfx_init = NULL;
-		goto fail;
-	}
-	memcpy(map, cs->buf, cs->cdw * 4);
-
-	device->ws->buffer_unmap(device->gfx_init);
-	device->gfx_init_size_dw = cs->cdw;
+   struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, RING_GFX);
+   if (!cs)
+      return;
+
+   si_emit_graphics(device, cs);
+
+   while (cs->cdw & 7) {
+      if (device->physical_device->rad_info.gfx_ib_pad_with_type2)
+         radeon_emit(cs, PKT2_NOP_PAD);
+      else
+         radeon_emit(cs, PKT3_NOP_PAD);
+   }
+
+   device->gfx_init = device->ws->buffer_create(
+      device->ws, cs->cdw * 4, 4096,
+      radv_cmdbuffer_domain(&device->physical_device->rad_info, device->instance->perftest_flags),
+      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY |
+         RADEON_FLAG_GTT_WC,
+      RADV_BO_PRIORITY_CS);
+   if (!device->gfx_init)
+      goto fail;
+
+   void *map = device->ws->buffer_map(device->gfx_init);
+   if (!map) {
+      device->ws->buffer_destroy(device->ws, device->gfx_init);
+      device->gfx_init = NULL;
+      goto fail;
+   }
+   memcpy(map, cs->buf, cs->cdw * 4);
+
+   device->ws->buffer_unmap(device->gfx_init);
+   device->gfx_init_size_dw = cs->cdw;
 fail:
-	device->ws->cs_destroy(cs);
+   device->ws->cs_destroy(cs);
 }
 
 static void
-get_viewport_xform(const VkViewport *viewport,
-                   float scale[3], float translate[3])
+get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3])
 {
-	float x = viewport->x;
-	float y = viewport->y;
-	float half_width = 0.5f * viewport->width;
-	float half_height = 0.5f * viewport->height;
-	double n = viewport->minDepth;
-	double f = viewport->maxDepth;
-
-	scale[0] = half_width;
-	translate[0] = half_width + x;
-	scale[1] = half_height;
-	translate[1] = half_height + y;
-
-	scale[2] = (f - n);
-	translate[2] = n;
+   float x = viewport->x;
+   float y = viewport->y;
+   float half_width = 0.5f * viewport->width;
+   float half_height = 0.5f * viewport->height;
+   double n = viewport->minDepth;
+   double f = viewport->maxDepth;
+
+   scale[0] = half_width;
+   translate[0] = half_width + x;
+   scale[1] = half_height;
+   translate[1] = half_height + y;
+
+   scale[2] = (f - n);
+   translate[2] = n;
 }
 
 void
-si_write_viewport(struct radeon_cmdbuf *cs, int first_vp,
-                  int count, const VkViewport *viewports)
+si_write_viewport(struct radeon_cmdbuf *cs, int first_vp, int count, const VkViewport *viewports)
 {
-	int i;
-
-	assert(count);
-	radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
-				   first_vp * 4 * 6, count * 6);
-
-	for (i = 0; i < count; i++) {
-		float scale[3], translate[3];
-
-
-		get_viewport_xform(&viewports[i], scale, translate);
-		radeon_emit(cs, fui(scale[0]));
-		radeon_emit(cs, fui(translate[0]));
-		radeon_emit(cs, fui(scale[1]));
-		radeon_emit(cs, fui(translate[1]));
-		radeon_emit(cs, fui(scale[2]));
-		radeon_emit(cs, fui(translate[2]));
-	}
-
-	radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
-				   first_vp * 4 * 2, count * 2);
-	for (i = 0; i < count; i++) {
-		float zmin = MIN2(viewports[i].minDepth, viewports[i].maxDepth);
-		float zmax = MAX2(viewports[i].minDepth, viewports[i].maxDepth);
-		radeon_emit(cs, fui(zmin));
-		radeon_emit(cs, fui(zmax));
-	}
+   int i;
+
+   assert(count);
+   radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + first_vp * 4 * 6, count * 6);
+
+   for (i = 0; i < count; i++) {
+      float scale[3], translate[3];
+
+      get_viewport_xform(&viewports[i], scale, translate);
+      radeon_emit(cs, fui(scale[0]));
+      radeon_emit(cs, fui(translate[0]));
+      radeon_emit(cs, fui(scale[1]));
+      radeon_emit(cs, fui(translate[1]));
+      radeon_emit(cs, fui(scale[2]));
+      radeon_emit(cs, fui(translate[2]));
+   }
+
+   radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + first_vp * 4 * 2, count * 2);
+   for (i = 0; i < count; i++) {
+      float zmin = MIN2(viewports[i].minDepth, viewports[i].maxDepth);
+      float zmax = MAX2(viewports[i].minDepth, viewports[i].maxDepth);
+      radeon_emit(cs, fui(zmin));
+      radeon_emit(cs, fui(zmax));
+   }
 }
 
-static VkRect2D si_scissor_from_viewport(const VkViewport *viewport)
+static VkRect2D
+si_scissor_from_viewport(const VkViewport *viewport)
 {
-	float scale[3], translate[3];
-	VkRect2D rect;
+   float scale[3], translate[3];
+   VkRect2D rect;
 
-	get_viewport_xform(viewport, scale, translate);
+   get_viewport_xform(viewport, scale, translate);
 
-	rect.offset.x = translate[0] - fabsf(scale[0]);
-	rect.offset.y = translate[1] - fabsf(scale[1]);
-	rect.extent.width = ceilf(translate[0] + fabsf(scale[0])) - rect.offset.x;
-	rect.extent.height = ceilf(translate[1] + fabsf(scale[1])) - rect.offset.y;
+   rect.offset.x = translate[0] - fabsf(scale[0]);
+   rect.offset.y = translate[1] - fabsf(scale[1]);
+   rect.extent.width = ceilf(translate[0] + fabsf(scale[0])) - rect.offset.x;
+   rect.extent.height = ceilf(translate[1] + fabsf(scale[1])) - rect.offset.y;
 
-	return rect;
+   return rect;
 }
 
-static VkRect2D si_intersect_scissor(const VkRect2D *a, const VkRect2D *b) {
-	VkRect2D ret;
-	ret.offset.x = MAX2(a->offset.x, b->offset.x);
-	ret.offset.y = MAX2(a->offset.y, b->offset.y);
-	ret.extent.width = MIN2(a->offset.x + a->extent.width,
-	                        b->offset.x + b->extent.width) - ret.offset.x;
-	ret.extent.height = MIN2(a->offset.y + a->extent.height,
-	                         b->offset.y + b->extent.height) - ret.offset.y;
-	return ret;
+static VkRect2D
+si_intersect_scissor(const VkRect2D *a, const VkRect2D *b)
+{
+   VkRect2D ret;
+   ret.offset.x = MAX2(a->offset.x, b->offset.x);
+   ret.offset.y = MAX2(a->offset.y, b->offset.y);
+   ret.extent.width =
+      MIN2(a->offset.x + a->extent.width, b->offset.x + b->extent.width) - ret.offset.x;
+   ret.extent.height =
+      MIN2(a->offset.y + a->extent.height, b->offset.y + b->extent.height) - ret.offset.y;
+   return ret;
 }
 
 void
-si_write_scissors(struct radeon_cmdbuf *cs, int first,
-                  int count, const VkRect2D *scissors,
+si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors,
                   const VkViewport *viewports, bool can_use_guardband)
 {
-	int i;
-	float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
-	const float max_range = 32767.0f;
-	if (!count)
-		return;
-
-	radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
-	for (i = 0; i < count; i++) {
-		VkRect2D viewport_scissor = si_scissor_from_viewport(viewports + i);
-		VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor);
-
-		get_viewport_xform(viewports + i, scale, translate);
-		scale[0] = fabsf(scale[0]);
-		scale[1] = fabsf(scale[1]);
-
-		if (scale[0] < 0.5)
-			scale[0] = 0.5;
-		if (scale[1] < 0.5)
-			scale[1] = 0.5;
-
-		guardband_x = MIN2(guardband_x, (max_range - fabsf(translate[0])) / scale[0]);
-		guardband_y = MIN2(guardband_y, (max_range - fabsf(translate[1])) / scale[1]);
-
-		radeon_emit(cs, S_028250_TL_X(scissor.offset.x) |
-			    S_028250_TL_Y(scissor.offset.y) |
-			    S_028250_WINDOW_OFFSET_DISABLE(1));
-		radeon_emit(cs, S_028254_BR_X(scissor.offset.x + scissor.extent.width) |
-			    S_028254_BR_Y(scissor.offset.y + scissor.extent.height));
-	}
-	if (!can_use_guardband) {
-		guardband_x = 1.0;
-		guardband_y = 1.0;
-	}
-
-	radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
-	radeon_emit(cs, fui(guardband_y));
-	radeon_emit(cs, fui(1.0));
-	radeon_emit(cs, fui(guardband_x));
-	radeon_emit(cs, fui(1.0));
+   int i;
+   float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
+   const float max_range = 32767.0f;
+   if (!count)
+      return;
+
+   radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
+   for (i = 0; i < count; i++) {
+      VkRect2D viewport_scissor = si_scissor_from_viewport(viewports + i);
+      VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor);
+
+      get_viewport_xform(viewports + i, scale, translate);
+      scale[0] = fabsf(scale[0]);
+      scale[1] = fabsf(scale[1]);
+
+      if (scale[0] < 0.5)
+         scale[0] = 0.5;
+      if (scale[1] < 0.5)
+         scale[1] = 0.5;
+
+      guardband_x = MIN2(guardband_x, (max_range - fabsf(translate[0])) / scale[0]);
+      guardband_y = MIN2(guardband_y, (max_range - fabsf(translate[1])) / scale[1]);
+
+      radeon_emit(cs, S_028250_TL_X(scissor.offset.x) | S_028250_TL_Y(scissor.offset.y) |
+                         S_028250_WINDOW_OFFSET_DISABLE(1));
+      radeon_emit(cs, S_028254_BR_X(scissor.offset.x + scissor.extent.width) |
+                         S_028254_BR_Y(scissor.offset.y + scissor.extent.height));
+   }
+   if (!can_use_guardband) {
+      guardband_x = 1.0;
+      guardband_y = 1.0;
+   }
+
+   radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
+   radeon_emit(cs, fui(guardband_y));
+   radeon_emit(cs, fui(1.0));
+   radeon_emit(cs, fui(guardband_x));
+   radeon_emit(cs, fui(1.0));
 }
 
 static inline unsigned
 radv_prims_for_vertices(struct radv_prim_vertex_count *info, unsigned num)
 {
-	if (num == 0)
-		return 0;
+   if (num == 0)
+      return 0;
 
-	if (info->incr == 0)
-		return 0;
+   if (info->incr == 0)
+      return 0;
 
-	if (num < info->min)
-		return 0;
+   if (num < info->min)
+      return 0;
 
-	return 1 + ((num - info->min) / info->incr);
+   return 1 + ((num - info->min) / info->incr);
 }
 
 static const struct radv_prim_vertex_count prim_size_table[] = {
-	[V_008958_DI_PT_NONE] = {0, 0},
-	[V_008958_DI_PT_POINTLIST] = {1, 1},
-	[V_008958_DI_PT_LINELIST] = {2, 2},
-	[V_008958_DI_PT_LINESTRIP] = {2, 1},
-	[V_008958_DI_PT_TRILIST] = {3, 3},
-	[V_008958_DI_PT_TRIFAN] = {3, 1},
-	[V_008958_DI_PT_TRISTRIP] = {3, 1},
-	[V_008958_DI_PT_LINELIST_ADJ] = {4, 4},
-	[V_008958_DI_PT_LINESTRIP_ADJ] = {4, 1},
-	[V_008958_DI_PT_TRILIST_ADJ] = {6, 6},
-	[V_008958_DI_PT_TRISTRIP_ADJ] = {6, 2},
-	[V_008958_DI_PT_RECTLIST] = {3, 3},
-	[V_008958_DI_PT_LINELOOP] = {2, 1},
-	[V_008958_DI_PT_POLYGON] = {3, 1},
-	[V_008958_DI_PT_2D_TRI_STRIP] = {0, 0},
+   [V_008958_DI_PT_NONE] = {0, 0},          [V_008958_DI_PT_POINTLIST] = {1, 1},
+   [V_008958_DI_PT_LINELIST] = {2, 2},      [V_008958_DI_PT_LINESTRIP] = {2, 1},
+   [V_008958_DI_PT_TRILIST] = {3, 3},       [V_008958_DI_PT_TRIFAN] = {3, 1},
+   [V_008958_DI_PT_TRISTRIP] = {3, 1},      [V_008958_DI_PT_LINELIST_ADJ] = {4, 4},
+   [V_008958_DI_PT_LINESTRIP_ADJ] = {4, 1}, [V_008958_DI_PT_TRILIST_ADJ] = {6, 6},
+   [V_008958_DI_PT_TRISTRIP_ADJ] = {6, 2},  [V_008958_DI_PT_RECTLIST] = {3, 3},
+   [V_008958_DI_PT_LINELOOP] = {2, 1},      [V_008958_DI_PT_POLYGON] = {3, 1},
+   [V_008958_DI_PT_2D_TRI_STRIP] = {0, 0},
 };
 
 uint32_t
-si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
-			  bool instanced_draw, bool indirect_draw,
-			  bool count_from_stream_output,
-			  uint32_t draw_vertex_count,
-			  unsigned topology)
+si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
+                          bool indirect_draw, bool count_from_stream_output,
+                          uint32_t draw_vertex_count, unsigned topology)
 {
-	enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
-	enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family;
-	struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
-	const unsigned max_primgroup_in_wave = 2;
-	/* SWITCH_ON_EOP(0) is always preferable. */
-	bool wd_switch_on_eop = false;
-	bool ia_switch_on_eop = false;
-	bool ia_switch_on_eoi = false;
-	bool partial_vs_wave = false;
-	bool partial_es_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_es_wave;
-	bool multi_instances_smaller_than_primgroup;
-	struct radv_prim_vertex_count prim_vertex_count = prim_size_table[topology];
-
-	if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) {
-		if (topology == V_008958_DI_PT_PATCH) {
-			prim_vertex_count.min = cmd_buffer->state.pipeline->graphics.tess_patch_control_points;
-			prim_vertex_count.incr = 1;
-		}
-	}
-
-	multi_instances_smaller_than_primgroup = indirect_draw;
-	if (!multi_instances_smaller_than_primgroup && instanced_draw) {
-		uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count);
-		if (num_prims < cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.primgroup_size)
-			multi_instances_smaller_than_primgroup = true;
-	}
-
-	ia_switch_on_eoi = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.ia_switch_on_eoi;
-	partial_vs_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_vs_wave;
-
-	if (chip_class >= GFX7) {
-		/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
-		 * 4 shader engines. Set 1 to pass the assertion below.
-		 * The other cases are hardware requirements. */
-		if (cmd_buffer->device->physical_device->rad_info.max_se < 4 ||
-		    topology == V_008958_DI_PT_POLYGON ||
-		    topology == V_008958_DI_PT_LINELOOP ||
-		    topology == V_008958_DI_PT_TRIFAN ||
-		    topology == V_008958_DI_PT_TRISTRIP_ADJ ||
-		    (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
-		     (cmd_buffer->device->physical_device->rad_info.family < CHIP_POLARIS10 ||
-		      (topology != V_008958_DI_PT_POINTLIST &&
-		       topology != V_008958_DI_PT_LINESTRIP))))
-			wd_switch_on_eop = true;
-
-		/* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
-		 * We don't know that for indirect drawing, so treat it as
-		 * always problematic. */
-		if (family == CHIP_HAWAII &&
-		    (instanced_draw || indirect_draw))
-			wd_switch_on_eop = true;
-
-		/* Performance recommendation for 4 SE Gfx7-8 parts if
-		 * instances are smaller than a primgroup.
-		 * Assume indirect draws always use small instances.
-		 * This is needed for good VS wave utilization.
-		 */
-		if (chip_class <= GFX8 &&
-		    info->max_se == 4 &&
-		    multi_instances_smaller_than_primgroup)
-			wd_switch_on_eop = true;
-
-		/* Required on GFX7 and later. */
-		if (info->max_se > 2 && !wd_switch_on_eop)
-			ia_switch_on_eoi = true;
-
-		/* Required by Hawaii and, for some special cases, by GFX8. */
-		if (ia_switch_on_eoi &&
-		    (family == CHIP_HAWAII ||
-		     (chip_class == GFX8 &&
-		      /* max primgroup in wave is always 2 - leave this for documentation */
-		      (radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2))))
-			partial_vs_wave = true;
-
-		/* Instancing bug on Bonaire. */
-		if (family == CHIP_BONAIRE && ia_switch_on_eoi &&
-		    (instanced_draw || indirect_draw))
-			partial_vs_wave = true;
-
-		/* Hardware requirement when drawing primitives from a stream
-		 * output buffer.
-		 */
-		if (count_from_stream_output)
-			wd_switch_on_eop = true;
-
-		/* If the WD switch is false, the IA switch must be false too. */
-		assert(wd_switch_on_eop || !ia_switch_on_eop);
-	}
-	/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
-	if (chip_class <= GFX8 && ia_switch_on_eoi)
-		partial_es_wave = true;
-
-	if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
-		/* GS hw bug with single-primitive instances and SWITCH_ON_EOI.
-		 * The hw doc says all multi-SE chips are affected, but amdgpu-pro Vulkan
-		 * only applies it to Hawaii. Do what amdgpu-pro Vulkan does.
-		 */
-		if (family == CHIP_HAWAII && ia_switch_on_eoi) {
-			bool set_vgt_flush = indirect_draw;
-			if (!set_vgt_flush && instanced_draw) {
-				uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count);
-				if (num_prims <= 1)
-					set_vgt_flush = true;
-			}
-			if (set_vgt_flush)
-				cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
-		}
-	}
-
-	/* Workaround for a VGT hang when strip primitive types are used with
-	 * primitive restart.
-	 */
-	if (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
-	    (topology == V_008958_DI_PT_LINESTRIP ||
-	     topology == V_008958_DI_PT_TRISTRIP ||
-	     topology == V_008958_DI_PT_LINESTRIP_ADJ ||
-	     topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
-		partial_vs_wave = true;
-	}
-
-	return cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.base |
-		S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
-		S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
-		S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
-		S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
-		S_028AA8_WD_SWITCH_ON_EOP(chip_class >= GFX7 ? wd_switch_on_eop : 0);
-
+   enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
+   enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family;
+   struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
+   const unsigned max_primgroup_in_wave = 2;
+   /* SWITCH_ON_EOP(0) is always preferable. */
+   bool wd_switch_on_eop = false;
+   bool ia_switch_on_eop = false;
+   bool ia_switch_on_eoi = false;
+   bool partial_vs_wave = false;
+   bool partial_es_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_es_wave;
+   bool multi_instances_smaller_than_primgroup;
+   struct radv_prim_vertex_count prim_vertex_count = prim_size_table[topology];
+
+   if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) {
+      if (topology == V_008958_DI_PT_PATCH) {
+         prim_vertex_count.min = cmd_buffer->state.pipeline->graphics.tess_patch_control_points;
+         prim_vertex_count.incr = 1;
+      }
+   }
+
+   multi_instances_smaller_than_primgroup = indirect_draw;
+   if (!multi_instances_smaller_than_primgroup && instanced_draw) {
+      uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count);
+      if (num_prims < cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.primgroup_size)
+         multi_instances_smaller_than_primgroup = true;
+   }
+
+   ia_switch_on_eoi = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.ia_switch_on_eoi;
+   partial_vs_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_vs_wave;
+
+   if (chip_class >= GFX7) {
+      /* WD_SWITCH_ON_EOP has no effect on GPUs with less than
+       * 4 shader engines. Set 1 to pass the assertion below.
+       * The other cases are hardware requirements. */
+      if (cmd_buffer->device->physical_device->rad_info.max_se < 4 ||
+          topology == V_008958_DI_PT_POLYGON || topology == V_008958_DI_PT_LINELOOP ||
+          topology == V_008958_DI_PT_TRIFAN || topology == V_008958_DI_PT_TRISTRIP_ADJ ||
+          (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
+           (cmd_buffer->device->physical_device->rad_info.family < CHIP_POLARIS10 ||
+            (topology != V_008958_DI_PT_POINTLIST && topology != V_008958_DI_PT_LINESTRIP))))
+         wd_switch_on_eop = true;
+
+      /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
+       * We don't know that for indirect drawing, so treat it as
+       * always problematic. */
+      if (family == CHIP_HAWAII && (instanced_draw || indirect_draw))
+         wd_switch_on_eop = true;
+
+      /* Performance recommendation for 4 SE Gfx7-8 parts if
+       * instances are smaller than a primgroup.
+       * Assume indirect draws always use small instances.
+       * This is needed for good VS wave utilization.
+       */
+      if (chip_class <= GFX8 && info->max_se == 4 && multi_instances_smaller_than_primgroup)
+         wd_switch_on_eop = true;
+
+      /* Required on GFX7 and later. */
+      if (info->max_se > 2 && !wd_switch_on_eop)
+         ia_switch_on_eoi = true;
+
+      /* Required by Hawaii and, for some special cases, by GFX8. */
+      if (ia_switch_on_eoi &&
+          (family == CHIP_HAWAII ||
+           (chip_class == GFX8 &&
+            /* max primgroup in wave is always 2 - leave this for documentation */
+            (radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2))))
+         partial_vs_wave = true;
+
+      /* Instancing bug on Bonaire. */
+      if (family == CHIP_BONAIRE && ia_switch_on_eoi && (instanced_draw || indirect_draw))
+         partial_vs_wave = true;
+
+      /* Hardware requirement when drawing primitives from a stream
+       * output buffer.
+       */
+      if (count_from_stream_output)
+         wd_switch_on_eop = true;
+
+      /* If the WD switch is false, the IA switch must be false too. */
+      assert(wd_switch_on_eop || !ia_switch_on_eop);
+   }
+   /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+   if (chip_class <= GFX8 && ia_switch_on_eoi)
+      partial_es_wave = true;
+
+   if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
+      /* GS hw bug with single-primitive instances and SWITCH_ON_EOI.
+       * The hw doc says all multi-SE chips are affected, but amdgpu-pro Vulkan
+       * only applies it to Hawaii. Do what amdgpu-pro Vulkan does.
+       */
+      if (family == CHIP_HAWAII && ia_switch_on_eoi) {
+         bool set_vgt_flush = indirect_draw;
+         if (!set_vgt_flush && instanced_draw) {
+            uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count);
+            if (num_prims <= 1)
+               set_vgt_flush = true;
+         }
+         if (set_vgt_flush)
+            cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
+      }
+   }
+
+   /* Workaround for a VGT hang when strip primitive types are used with
+    * primitive restart.
+    */
+   if (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
+       (topology == V_008958_DI_PT_LINESTRIP || topology == V_008958_DI_PT_TRISTRIP ||
+        topology == V_008958_DI_PT_LINESTRIP_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
+      partial_vs_wave = true;
+   }
+
+   return cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.base |
+          S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
+          S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
+          S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
+          S_028AA8_WD_SWITCH_ON_EOP(chip_class >= GFX7 ? wd_switch_on_eop : 0);
 }
 
-void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
-				enum chip_class chip_class,
-				bool is_mec,
-				unsigned event, unsigned event_flags,
-				unsigned dst_sel, unsigned data_sel,
-				uint64_t va,
-				uint32_t new_fence,
-				uint64_t gfx9_eop_bug_va)
+void
+si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class, bool is_mec,
+                           unsigned event, unsigned event_flags, unsigned dst_sel,
+                           unsigned data_sel, uint64_t va, uint32_t new_fence,
+                           uint64_t gfx9_eop_bug_va)
 {
-	unsigned op = EVENT_TYPE(event) |
-		EVENT_INDEX(event == V_028A90_CS_DONE ||
-			    event == V_028A90_PS_DONE ? 6 : 5) |
-		event_flags;
-	unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
-	unsigned sel = EOP_DST_SEL(dst_sel) |
-		       EOP_DATA_SEL(data_sel);
-
-	/* Wait for write confirmation before writing data, but don't send
-	 * an interrupt. */
-	if (data_sel != EOP_DATA_SEL_DISCARD)
-		sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
-
-	if (chip_class >= GFX9 || is_gfx8_mec) {
-		/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
-		 * counters) must immediately precede every timestamp event to
-		 * prevent a GPU hang on GFX9.
-		 */
-		if (chip_class == GFX9 && !is_mec) {
-			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-			radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
-			radeon_emit(cs, gfx9_eop_bug_va);
-			radeon_emit(cs, gfx9_eop_bug_va >> 32);
-		}
-
-		radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, false));
-		radeon_emit(cs, op);
-		radeon_emit(cs, sel);
-		radeon_emit(cs, va);            /* address lo */
-		radeon_emit(cs, va >> 32);      /* address hi */
-		radeon_emit(cs, new_fence);     /* immediate data lo */
-		radeon_emit(cs, 0); /* immediate data hi */
-		if (!is_gfx8_mec)
-			radeon_emit(cs, 0); /* unused */
-	} else {
-		/* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS.
-		 * On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on
-		 * the graphics queue, and with RELEASE_MEM on the compute
-		 * queue.
-		 */
-		if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) {
-			assert(event_flags == 0 &&
-			       dst_sel == EOP_DST_SEL_MEM &&
-			       data_sel == EOP_DATA_SEL_VALUE_32BIT);
-
-			if (is_mec) {
-				radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, false));
-				radeon_emit(cs, op);
-				radeon_emit(cs, sel);
-				radeon_emit(cs, va);            /* address lo */
-				radeon_emit(cs, va >> 32);      /* address hi */
-				radeon_emit(cs, new_fence);     /* immediate data lo */
-				radeon_emit(cs, 0);		/* immediate data hi */
-			} else {
-				radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, false));
-				radeon_emit(cs, op);
-				radeon_emit(cs, va);
-				radeon_emit(cs, ((va >> 32) & 0xffff) |
-						EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT));
-				radeon_emit(cs, new_fence);
-			}
-		} else {
-			if (chip_class == GFX7 ||
-			    chip_class == GFX8) {
-				/* Two EOP events are required to make all
-				 * engines go idle (and optional cache flushes
-				 * executed) before the timestamp is written.
-				 */
-				radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
-				radeon_emit(cs, op);
-				radeon_emit(cs, va);
-				radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
-				radeon_emit(cs, 0); /* immediate data */
-				radeon_emit(cs, 0); /* unused */
-			}
-
-			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
-			radeon_emit(cs, op);
-			radeon_emit(cs, va);
-			radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
-			radeon_emit(cs, new_fence); /* immediate data */
-			radeon_emit(cs, 0); /* unused */
-		}
-	}
+   unsigned op = EVENT_TYPE(event) |
+                 EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) |
+                 event_flags;
+   unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
+   unsigned sel = EOP_DST_SEL(dst_sel) | EOP_DATA_SEL(data_sel);
+
+   /* Wait for write confirmation before writing data, but don't send
+    * an interrupt. */
+   if (data_sel != EOP_DATA_SEL_DISCARD)
+      sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
+
+   if (chip_class >= GFX9 || is_gfx8_mec) {
+      /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
+       * counters) must immediately precede every timestamp event to
+       * prevent a GPU hang on GFX9.
+       */
+      if (chip_class == GFX9 && !is_mec) {
+         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+         radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+         radeon_emit(cs, gfx9_eop_bug_va);
+         radeon_emit(cs, gfx9_eop_bug_va >> 32);
+      }
+
+      radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, false));
+      radeon_emit(cs, op);
+      radeon_emit(cs, sel);
+      radeon_emit(cs, va);        /* address lo */
+      radeon_emit(cs, va >> 32);  /* address hi */
+      radeon_emit(cs, new_fence); /* immediate data lo */
+      radeon_emit(cs, 0);         /* immediate data hi */
+      if (!is_gfx8_mec)
+         radeon_emit(cs, 0); /* unused */
+   } else {
+      /* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS.
+       * On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on
+       * the graphics queue, and with RELEASE_MEM on the compute
+       * queue.
+       */
+      if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) {
+         assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM &&
+                data_sel == EOP_DATA_SEL_VALUE_32BIT);
+
+         if (is_mec) {
+            radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, false));
+            radeon_emit(cs, op);
+            radeon_emit(cs, sel);
+            radeon_emit(cs, va);        /* address lo */
+            radeon_emit(cs, va >> 32);  /* address hi */
+            radeon_emit(cs, new_fence); /* immediate data lo */
+            radeon_emit(cs, 0);         /* immediate data hi */
+         } else {
+            radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, false));
+            radeon_emit(cs, op);
+            radeon_emit(cs, va);
+            radeon_emit(cs, ((va >> 32) & 0xffff) | EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT));
+            radeon_emit(cs, new_fence);
+         }
+      } else {
+         if (chip_class == GFX7 || chip_class == GFX8) {
+            /* Two EOP events are required to make all
+             * engines go idle (and optional cache flushes
+             * executed) before the timestamp is written.
+             */
+            radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
+            radeon_emit(cs, op);
+            radeon_emit(cs, va);
+            radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
+            radeon_emit(cs, 0); /* immediate data */
+            radeon_emit(cs, 0); /* unused */
+         }
+
+         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
+         radeon_emit(cs, op);
+         radeon_emit(cs, va);
+         radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
+         radeon_emit(cs, new_fence); /* immediate data */
+         radeon_emit(cs, 0);         /* unused */
+      }
+   }
 }
 
 void
-radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va,
-		 uint32_t ref, uint32_t mask)
+radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask)
 {
-	assert(op == WAIT_REG_MEM_EQUAL ||
-	       op == WAIT_REG_MEM_NOT_EQUAL ||
-	       op == WAIT_REG_MEM_GREATER_OR_EQUAL);
-
-	radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
-	radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit(cs, ref); /* reference value */
-	radeon_emit(cs, mask); /* mask */
-	radeon_emit(cs, 4); /* poll interval */
+   assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL ||
+          op == WAIT_REG_MEM_GREATER_OR_EQUAL);
+
+   radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
+   radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+   radeon_emit(cs, ref);  /* reference value */
+   radeon_emit(cs, mask); /* mask */
+   radeon_emit(cs, 4);    /* poll interval */
 }
 
 static void
-si_emit_acquire_mem(struct radeon_cmdbuf *cs,
-                    bool is_mec,
-		    bool is_gfx9,
-                    unsigned cp_coher_cntl)
+si_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl)
 {
-	if (is_mec || is_gfx9) {
-		uint32_t hi_val = is_gfx9 ? 0xffffff : 0xff;
-		radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, false) |
-		                            PKT3_SHADER_TYPE_S(is_mec));
-		radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
-		radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
-		radeon_emit(cs, hi_val);          /* CP_COHER_SIZE_HI */
-		radeon_emit(cs, 0);               /* CP_COHER_BASE */
-		radeon_emit(cs, 0);               /* CP_COHER_BASE_HI */
-		radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
-	} else {
-		/* ACQUIRE_MEM is only required on a compute ring. */
-		radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, false));
-		radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
-		radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
-		radeon_emit(cs, 0);               /* CP_COHER_BASE */
-		radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
-	}
+   if (is_mec || is_gfx9) {
+      uint32_t hi_val = is_gfx9 ? 0xffffff : 0xff;
+      radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, false) | PKT3_SHADER_TYPE_S(is_mec));
+      radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+      radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */
+      radeon_emit(cs, hi_val);        /* CP_COHER_SIZE_HI */
+      radeon_emit(cs, 0);             /* CP_COHER_BASE */
+      radeon_emit(cs, 0);             /* CP_COHER_BASE_HI */
+      radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */
+   } else {
+      /* ACQUIRE_MEM is only required on a compute ring. */
+      radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, false));
+      radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+      radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */
+      radeon_emit(cs, 0);             /* CP_COHER_BASE */
+      radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */
+   }
 }
 
 static void
-gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
-			  enum chip_class chip_class,
-			  uint32_t *flush_cnt,
-			  uint64_t flush_va,
-			  bool is_mec,
-			  enum radv_cmd_flush_bits flush_bits,
-			  enum rgp_flush_bits *sqtt_flush_bits,
-			  uint64_t gfx9_eop_bug_va)
+gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uint32_t *flush_cnt,
+                          uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
+                          enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
 {
-	uint32_t gcr_cntl = 0;
-	unsigned cb_db_event = 0;
-
-	/* We don't need these. */
-	assert(!(flush_bits & (RADV_CMD_FLAG_VGT_STREAMOUT_SYNC)));
-
-	if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) {
-		gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL);
-
-		*sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
-	}
-	if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) {
-		/* TODO: When writing to the SMEM L1 cache, we need to set SEQ
-		 * to FORWARD when both L1 and L2 are written out (WB or INV).
-		 */
-		gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1);
-
-		*sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
-	}
-	if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
-		gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1);
-
-		*sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0 | RGP_FLUSH_INVAL_L1;
-	}
-	if (flush_bits & RADV_CMD_FLAG_INV_L2) {
-		/* Writeback and invalidate everything in L2. */
-		gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1) |
-		            S_586_GLM_INV(1) | S_586_GLM_WB(1);
-
-		*sqtt_flush_bits |= RGP_FLUSH_INVAL_L2;
-	} else if (flush_bits & RADV_CMD_FLAG_WB_L2) {
-		/* Writeback but do not invalidate.
-		 * GLM doesn't support WB alone. If WB is set, INV must be set too.
-		 */
-		gcr_cntl |= S_586_GL2_WB(1) |
-		            S_586_GLM_WB(1) | S_586_GLM_INV(1);
-
-		*sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2;
-	} else if (flush_bits & RADV_CMD_FLAG_INV_L2_METADATA) {
-		gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1);
-	}
-
-	if (flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
-		/* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_CB_META */
-		if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
-			/* Flush CMASK/FMASK/DCC. Will wait for idle later. */
-			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-			radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) |
-					EVENT_INDEX(0));
-
-			*sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
-		}
-
-		/* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_DB_META ? */
-		if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
-			/* Flush HTILE. Will wait for idle later. */
-			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-			radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) |
-					EVENT_INDEX(0));
-
-			*sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
-		}
-
-		/* First flush CB/DB, then L1/L2. */
-		gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD);
-
-		if ((flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) ==
-		    (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
-			cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
-		} else if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
-			cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
-		} else if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
-			cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS;
-		} else {
-			assert(0);
-		}
-	} else {
-		/* Wait for graphics shaders to go idle if requested. */
-		if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
-			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-			radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
-			*sqtt_flush_bits |= RGP_FLUSH_PS_PARTIAL_FLUSH;
-		} else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
-			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-			radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
-			*sqtt_flush_bits |= RGP_FLUSH_VS_PARTIAL_FLUSH;
-		}
-	}
-
-	if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
-
-		*sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH;
-	}
-
-	if (cb_db_event) {
-		/* CB/DB flush and invalidate (or possibly just a wait for a
-		 * meta flush) via RELEASE_MEM.
-		 *
-		 * Combine this with other cache flushes when possible; this
-		 * requires affected shaders to be idle, so do it after the
-		 * CS_PARTIAL_FLUSH before (VS/PS partial flushes are always
-		 * implied).
-		 */
-		/* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */
-		unsigned glm_wb = G_586_GLM_WB(gcr_cntl);
-		unsigned glm_inv = G_586_GLM_INV(gcr_cntl);
-		unsigned glv_inv = G_586_GLV_INV(gcr_cntl);
-		unsigned gl1_inv = G_586_GL1_INV(gcr_cntl);
-		assert(G_586_GL2_US(gcr_cntl) == 0);
-		assert(G_586_GL2_RANGE(gcr_cntl) == 0);
-		assert(G_586_GL2_DISCARD(gcr_cntl) == 0);
-		unsigned gl2_inv = G_586_GL2_INV(gcr_cntl);
-		unsigned gl2_wb = G_586_GL2_WB(gcr_cntl);
-		unsigned gcr_seq = G_586_SEQ(gcr_cntl);
-
-		gcr_cntl &= C_586_GLM_WB &
-			    C_586_GLM_INV &
-			    C_586_GLV_INV &
-			    C_586_GL1_INV &
-			    C_586_GL2_INV &
-			    C_586_GL2_WB; /* keep SEQ */
-
-		assert(flush_cnt);
-		(*flush_cnt)++;
-
-		si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event,
-					   S_490_GLM_WB(glm_wb) |
-					   S_490_GLM_INV(glm_inv) |
-					   S_490_GLV_INV(glv_inv) |
-					   S_490_GL1_INV(gl1_inv) |
-					   S_490_GL2_INV(gl2_inv) |
-					   S_490_GL2_WB(gl2_wb) |
-					   S_490_SEQ(gcr_seq),
-					   EOP_DST_SEL_MEM,
-					   EOP_DATA_SEL_VALUE_32BIT,
-					   flush_va, *flush_cnt,
-					   gfx9_eop_bug_va);
-
-		radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va,
-				 *flush_cnt, 0xffffffff);
-	}
-
-	/* VGT state sync */
-	if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
-	}
-
-	/* Ignore fields that only modify the behavior of other fields. */
-	if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
-		/* Flush caches and wait for the caches to assert idle.
-		 * The cache flush is executed in the ME, but the PFP waits
-		 * for completion.
-		 */
-		radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
-		radeon_emit(cs, 0);		/* CP_COHER_CNTL */
-		radeon_emit(cs, 0xffffffff);	/* CP_COHER_SIZE */
-		radeon_emit(cs, 0xffffff);	/* CP_COHER_SIZE_HI */
-		radeon_emit(cs, 0);		/* CP_COHER_BASE */
-		radeon_emit(cs, 0);		/* CP_COHER_BASE_HI */
-		radeon_emit(cs, 0x0000000A);	/* POLL_INTERVAL */
-		radeon_emit(cs, gcr_cntl);	/* GCR_CNTL */
-	} else if ((cb_db_event ||
-		   (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH |
-			     RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-			     RADV_CMD_FLAG_CS_PARTIAL_FLUSH)))
-		   && !is_mec) {
-		/* We need to ensure that PFP waits as well. */
-		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
-		radeon_emit(cs, 0);
-
-		*sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
-	}
-
-	if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) |
-			        EVENT_INDEX(0));
-	} else if (flush_bits & RADV_CMD_FLAG_STOP_PIPELINE_STATS) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) |
-			        EVENT_INDEX(0));
-	}
+   uint32_t gcr_cntl = 0;
+   unsigned cb_db_event = 0;
+
+   /* We don't need these. */
+   assert(!(flush_bits & (RADV_CMD_FLAG_VGT_STREAMOUT_SYNC)));
+
+   if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) {
+      gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL);
+
+      *sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
+   }
+   if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) {
+      /* TODO: When writing to the SMEM L1 cache, we need to set SEQ
+       * to FORWARD when both L1 and L2 are written out (WB or INV).
+       */
+      gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1);
+
+      *sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
+   }
+   if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
+      gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1);
+
+      *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0 | RGP_FLUSH_INVAL_L1;
+   }
+   if (flush_bits & RADV_CMD_FLAG_INV_L2) {
+      /* Writeback and invalidate everything in L2. */
+      gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1) | S_586_GLM_INV(1) | S_586_GLM_WB(1);
+
+      *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2;
+   } else if (flush_bits & RADV_CMD_FLAG_WB_L2) {
+      /* Writeback but do not invalidate.
+       * GLM doesn't support WB alone. If WB is set, INV must be set too.
+       */
+      gcr_cntl |= S_586_GL2_WB(1) | S_586_GLM_WB(1) | S_586_GLM_INV(1);
+
+      *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2;
+   } else if (flush_bits & RADV_CMD_FLAG_INV_L2_METADATA) {
+      gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1);
+   }
+
+   if (flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
+      /* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_CB_META */
+      if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
+         /* Flush CMASK/FMASK/DCC. Will wait for idle later. */
+         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+         radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
+
+         *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
+      }
+
+      /* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_DB_META ? */
+      if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
+         /* Flush HTILE. Will wait for idle later. */
+         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+         radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
+
+         *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
+      }
+
+      /* First flush CB/DB, then L1/L2. */
+      gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD);
+
+      if ((flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) ==
+          (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
+         cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
+      } else if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
+         cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
+      } else if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
+         cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS;
+      } else {
+         assert(0);
+      }
+   } else {
+      /* Wait for graphics shaders to go idle if requested. */
+      if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
+         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+         radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+         *sqtt_flush_bits |= RGP_FLUSH_PS_PARTIAL_FLUSH;
+      } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
+         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+         radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+         *sqtt_flush_bits |= RGP_FLUSH_VS_PARTIAL_FLUSH;
+      }
+   }
+
+   if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
+
+      *sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH;
+   }
+
+   if (cb_db_event) {
+      /* CB/DB flush and invalidate (or possibly just a wait for a
+       * meta flush) via RELEASE_MEM.
+       *
+       * Combine this with other cache flushes when possible; this
+       * requires affected shaders to be idle, so do it after the
+       * CS_PARTIAL_FLUSH before (VS/PS partial flushes are always
+       * implied).
+       */
+      /* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */
+      unsigned glm_wb = G_586_GLM_WB(gcr_cntl);
+      unsigned glm_inv = G_586_GLM_INV(gcr_cntl);
+      unsigned glv_inv = G_586_GLV_INV(gcr_cntl);
+      unsigned gl1_inv = G_586_GL1_INV(gcr_cntl);
+      assert(G_586_GL2_US(gcr_cntl) == 0);
+      assert(G_586_GL2_RANGE(gcr_cntl) == 0);
+      assert(G_586_GL2_DISCARD(gcr_cntl) == 0);
+      unsigned gl2_inv = G_586_GL2_INV(gcr_cntl);
+      unsigned gl2_wb = G_586_GL2_WB(gcr_cntl);
+      unsigned gcr_seq = G_586_SEQ(gcr_cntl);
+
+      gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV &
+                  C_586_GL2_WB; /* keep SEQ */
+
+      assert(flush_cnt);
+      (*flush_cnt)++;
+
+      si_cs_emit_write_event_eop(
+         cs, chip_class, false, cb_db_event,
+         S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) |
+            S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) |
+            S_490_SEQ(gcr_seq),
+         EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
+
+      radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
+   }
+
+   /* VGT state sync */
+   if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+   }
+
+   /* Ignore fields that only modify the behavior of other fields. */
+   if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
+      /* Flush caches and wait for the caches to assert idle.
+       * The cache flush is executed in the ME, but the PFP waits
+       * for completion.
+       */
+      radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
+      radeon_emit(cs, 0);          /* CP_COHER_CNTL */
+      radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
+      radeon_emit(cs, 0xffffff);   /* CP_COHER_SIZE_HI */
+      radeon_emit(cs, 0);          /* CP_COHER_BASE */
+      radeon_emit(cs, 0);          /* CP_COHER_BASE_HI */
+      radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+      radeon_emit(cs, gcr_cntl);   /* GCR_CNTL */
+   } else if ((cb_db_event ||
+               (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+                              RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) &&
+              !is_mec) {
+      /* We need to ensure that PFP waits as well. */
+      radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+      radeon_emit(cs, 0);
+
+      *sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
+   }
+
+   if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0));
+   } else if (flush_bits & RADV_CMD_FLAG_STOP_PIPELINE_STATS) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
+   }
 }
 
 void
-si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
-                       enum chip_class chip_class,
-		       uint32_t *flush_cnt,
-		       uint64_t flush_va,
-                       bool is_mec,
-                       enum radv_cmd_flush_bits flush_bits,
-		       enum rgp_flush_bits *sqtt_flush_bits,
-		       uint64_t gfx9_eop_bug_va)
+si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uint32_t *flush_cnt,
+                       uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
+                       enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
 {
-	unsigned cp_coher_cntl = 0;
-	uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-					     RADV_CMD_FLAG_FLUSH_AND_INV_DB);
-
-	if (chip_class >= GFX10) {
-		/* GFX10 cache flush handling is quite different. */
-		gfx10_cs_emit_cache_flush(cs, chip_class, flush_cnt, flush_va,
-					  is_mec, flush_bits, sqtt_flush_bits,
-					  gfx9_eop_bug_va);
-		return;
-	}
-
-	if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) {
-		cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
-		*sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
-	}
-	if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) {
-		cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
-		*sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
-	}
-
-	if (chip_class <= GFX8) {
-		if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
-			cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
-				S_0085F0_CB0_DEST_BASE_ENA(1) |
-				S_0085F0_CB1_DEST_BASE_ENA(1) |
-				S_0085F0_CB2_DEST_BASE_ENA(1) |
-				S_0085F0_CB3_DEST_BASE_ENA(1) |
-				S_0085F0_CB4_DEST_BASE_ENA(1) |
-				S_0085F0_CB5_DEST_BASE_ENA(1) |
-				S_0085F0_CB6_DEST_BASE_ENA(1) |
-				S_0085F0_CB7_DEST_BASE_ENA(1);
-
-			/* Necessary for DCC */
-			if (chip_class >= GFX8) {
-				si_cs_emit_write_event_eop(cs,
-							   chip_class,
-							   is_mec,
-							   V_028A90_FLUSH_AND_INV_CB_DATA_TS,
-							   0,
-							   EOP_DST_SEL_MEM,
-							   EOP_DATA_SEL_DISCARD,
-							   0, 0,
-							   gfx9_eop_bug_va);
-			}
-
-			*sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
-		}
-		if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
-			cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
-				S_0085F0_DB_DEST_BASE_ENA(1);
-
-			*sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
-		}
-	}
-
-	if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
-
-		*sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
-	}
-
-	if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
-
-		*sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
-	}
-
-	if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
-		*sqtt_flush_bits |= RGP_FLUSH_PS_PARTIAL_FLUSH;
-	} else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
-		*sqtt_flush_bits |= RGP_FLUSH_VS_PARTIAL_FLUSH;
-	}
-
-	if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
-		*sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH;
-	}
-
-	if (chip_class == GFX9 && flush_cb_db) {
-		unsigned cb_db_event, tc_flags;
-
-		/* Set the CB/DB flush event. */
-		cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
-
-		/* These are the only allowed combinations. If you need to
-		 * do multiple operations at once, do them separately.
-		 * All operations that invalidate L2 also seem to invalidate
-		 * metadata. Volatile (VOL) and WC flushes are not listed here.
-		 *
-		 * TC    | TC_WB         = writeback & invalidate L2 & L1
-		 * TC    | TC_WB | TC_NC = writeback & invalidate L2 for MTYPE == NC
-		 *         TC_WB | TC_NC = writeback L2 for MTYPE == NC
-		 * TC            | TC_NC = invalidate L2 for MTYPE == NC
-		 * TC    | TC_MD         = writeback & invalidate L2 metadata (DCC, etc.)
-		 * TCL1                  = invalidate L1
-		 */
-		tc_flags = EVENT_TC_ACTION_ENA |
-			   EVENT_TC_MD_ACTION_ENA;
-
-		*sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB |
-			            RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
-
-		/* Ideally flush TC together with CB/DB. */
-		if (flush_bits & RADV_CMD_FLAG_INV_L2) {
-			/* Writeback and invalidate everything in L2 & L1. */
-			tc_flags = EVENT_TC_ACTION_ENA |
-			           EVENT_TC_WB_ACTION_ENA;
-
-
-			/* Clear the flags. */
-		        flush_bits &= ~(RADV_CMD_FLAG_INV_L2 |
-					 RADV_CMD_FLAG_WB_L2 |
-					 RADV_CMD_FLAG_INV_VCACHE);
-
-			*sqtt_flush_bits |= RGP_FLUSH_INVAL_L2;
-		}
-
-		assert(flush_cnt);
-		(*flush_cnt)++;
-
-		si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags,
-					   EOP_DST_SEL_MEM,
-					   EOP_DATA_SEL_VALUE_32BIT,
-					   flush_va, *flush_cnt,
-					   gfx9_eop_bug_va);
-		radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va,
-				 *flush_cnt, 0xffffffff);
-	}
-
-	/* VGT state sync */
-	if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
-	}
-
-	/* VGT streamout state sync */
-	if (flush_bits & RADV_CMD_FLAG_VGT_STREAMOUT_SYNC) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
-	}
-
-	/* Make sure ME is idle (it executes most packets) before continuing.
-	 * This prevents read-after-write hazards between PFP and ME.
-	 */
-	if ((cp_coher_cntl ||
-	     (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-			    RADV_CMD_FLAG_INV_VCACHE |
-			    RADV_CMD_FLAG_INV_L2 |
-			    RADV_CMD_FLAG_WB_L2))) &&
-	    !is_mec) {
-		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
-		radeon_emit(cs, 0);
-
-		*sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
-	}
-
-	if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
-	    (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
-		si_emit_acquire_mem(cs, is_mec, chip_class == GFX9,
-				    cp_coher_cntl |
-				    S_0085F0_TC_ACTION_ENA(1) |
-				    S_0085F0_TCL1_ACTION_ENA(1) |
-				    S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
-		cp_coher_cntl = 0;
-
-		*sqtt_flush_bits |= RGP_FLUSH_INVAL_L2 | RGP_FLUSH_INVAL_VMEM_L0;
-	} else {
-		if(flush_bits & RADV_CMD_FLAG_WB_L2) {
-			/* WB = write-back
-			 * NC = apply to non-coherent MTYPEs
-			 *      (i.e. MTYPE <= 1, which is what we use everywhere)
-			 *
-			 * WB doesn't work without NC.
-			 */
-			si_emit_acquire_mem(cs, is_mec,
-					    chip_class == GFX9,
-					    cp_coher_cntl |
-					    S_0301F0_TC_WB_ACTION_ENA(1) |
-					    S_0301F0_TC_NC_ACTION_ENA(1));
-			cp_coher_cntl = 0;
-
-			*sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0;
-		}
-		if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
-			si_emit_acquire_mem(cs, is_mec,
-					    chip_class == GFX9,
-					    cp_coher_cntl |
-					    S_0085F0_TCL1_ACTION_ENA(1));
-			cp_coher_cntl = 0;
-
-			*sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0;
-		}
-	}
-
-	/* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
-	 * Therefore, it should be last. Done in PFP.
-	 */
-	if (cp_coher_cntl)
-		si_emit_acquire_mem(cs, is_mec, chip_class == GFX9, cp_coher_cntl);
-
-	if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) |
-			        EVENT_INDEX(0));
-	} else if (flush_bits & RADV_CMD_FLAG_STOP_PIPELINE_STATS) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) |
-			        EVENT_INDEX(0));
-	}
+   unsigned cp_coher_cntl = 0;
+   uint32_t flush_cb_db =
+      flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB);
+
+   if (chip_class >= GFX10) {
+      /* GFX10 cache flush handling is quite different. */
+      gfx10_cs_emit_cache_flush(cs, chip_class, flush_cnt, flush_va, is_mec, flush_bits,
+                                sqtt_flush_bits, gfx9_eop_bug_va);
+      return;
+   }
+
+   if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) {
+      cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
+      *sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
+   }
+   if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) {
+      cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
+      *sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
+   }
+
+   if (chip_class <= GFX8) {
+      if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
+         cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) |
+                          S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) |
+                          S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) |
+                          S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) |
+                          S_0085F0_CB7_DEST_BASE_ENA(1);
+
+         /* Necessary for DCC */
+         if (chip_class >= GFX8) {
+            si_cs_emit_write_event_eop(cs, chip_class, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0,
+                                       EOP_DST_SEL_MEM, EOP_DATA_SEL_DISCARD, 0, 0,
+                                       gfx9_eop_bug_va);
+         }
+
+         *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
+      }
+      if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
+         cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1);
+
+         *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
+      }
+   }
+
+   if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
+
+      *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
+   }
+
+   if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
+
+      *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
+   }
+
+   if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+      *sqtt_flush_bits |= RGP_FLUSH_PS_PARTIAL_FLUSH;
+   } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+      *sqtt_flush_bits |= RGP_FLUSH_VS_PARTIAL_FLUSH;
+   }
+
+   if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+      *sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH;
+   }
+
+   if (chip_class == GFX9 && flush_cb_db) {
+      unsigned cb_db_event, tc_flags;
+
+      /* Set the CB/DB flush event. */
+      cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
+
+      /* These are the only allowed combinations. If you need to
+       * do multiple operations at once, do them separately.
+       * All operations that invalidate L2 also seem to invalidate
+       * metadata. Volatile (VOL) and WC flushes are not listed here.
+       *
+       * TC    | TC_WB         = writeback & invalidate L2 & L1
+       * TC    | TC_WB | TC_NC = writeback & invalidate L2 for MTYPE == NC
+       *         TC_WB | TC_NC = writeback L2 for MTYPE == NC
+       * TC            | TC_NC = invalidate L2 for MTYPE == NC
+       * TC    | TC_MD         = writeback & invalidate L2 metadata (DCC, etc.)
+       * TCL1                  = invalidate L1
+       */
+      tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_MD_ACTION_ENA;
+
+      *sqtt_flush_bits |=
+         RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB | RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
+
+      /* Ideally flush TC together with CB/DB. */
+      if (flush_bits & RADV_CMD_FLAG_INV_L2) {
+         /* Writeback and invalidate everything in L2 & L1. */
+         tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_WB_ACTION_ENA;
+
+         /* Clear the flags. */
+         flush_bits &= ~(RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_INV_VCACHE);
+
+         *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2;
+      }
+
+      assert(flush_cnt);
+      (*flush_cnt)++;
+
+      si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM,
+                                 EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
+      radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
+   }
+
+   /* VGT state sync */
+   if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+   }
+
+   /* VGT streamout state sync */
+   if (flush_bits & RADV_CMD_FLAG_VGT_STREAMOUT_SYNC) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
+   }
+
+   /* Make sure ME is idle (it executes most packets) before continuing.
+    * This prevents read-after-write hazards between PFP and ME.
+    */
+   if ((cp_coher_cntl || (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+                                        RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2))) &&
+       !is_mec) {
+      radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+      radeon_emit(cs, 0);
+
+      *sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
+   }
+
+   if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
+       (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
+      si_emit_acquire_mem(cs, is_mec, chip_class == GFX9,
+                          cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
+                             S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
+      cp_coher_cntl = 0;
+
+      *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2 | RGP_FLUSH_INVAL_VMEM_L0;
+   } else {
+      if (flush_bits & RADV_CMD_FLAG_WB_L2) {
+         /* WB = write-back
+          * NC = apply to non-coherent MTYPEs
+          *      (i.e. MTYPE <= 1, which is what we use everywhere)
+          *
+          * WB doesn't work without NC.
+          */
+         si_emit_acquire_mem(
+            cs, is_mec, chip_class == GFX9,
+            cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
+         cp_coher_cntl = 0;
+
+         *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0;
+      }
+      if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
+         si_emit_acquire_mem(cs, is_mec, chip_class == GFX9,
+                             cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
+         cp_coher_cntl = 0;
+
+         *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0;
+      }
+   }
+
+   /* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
+    * Therefore, it should be last. Done in PFP.
+    */
+   if (cp_coher_cntl)
+      si_emit_acquire_mem(cs, is_mec, chip_class == GFX9, cp_coher_cntl);
+
+   if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0));
+   } else if (flush_bits & RADV_CMD_FLAG_STOP_PIPELINE_STATS) {
+      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+      radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
+   }
 }
 
 void
 si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 {
-	bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
-
-	if (is_compute)
-		cmd_buffer->state.flush_bits &= ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-	                                          RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
-	                                          RADV_CMD_FLAG_FLUSH_AND_INV_DB |
-	                                          RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
-	                                          RADV_CMD_FLAG_INV_L2_METADATA |
-	                                          RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-	                                          RADV_CMD_FLAG_VS_PARTIAL_FLUSH |
-	                                          RADV_CMD_FLAG_VGT_FLUSH |
-						  RADV_CMD_FLAG_START_PIPELINE_STATS |
-						  RADV_CMD_FLAG_STOP_PIPELINE_STATS);
-
-	if (!cmd_buffer->state.flush_bits) {
-		radv_describe_barrier_end_delayed(cmd_buffer);
-		return;
-	}
-
-	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);
-
-	si_cs_emit_cache_flush(cmd_buffer->cs,
-	                       cmd_buffer->device->physical_device->rad_info.chip_class,
-			       &cmd_buffer->gfx9_fence_idx,
-			       cmd_buffer->gfx9_fence_va,
-	                       radv_cmd_buffer_uses_mec(cmd_buffer),
-	                       cmd_buffer->state.flush_bits,
-			       &cmd_buffer->state.sqtt_flush_bits,
-			       cmd_buffer->gfx9_eop_bug_va);
-
-
-	if (unlikely(cmd_buffer->device->trace_bo))
-		radv_cmd_buffer_trace_emit(cmd_buffer);
-
-	/* Clear the caches that have been flushed to avoid syncing too much
-	 * when there is some pending active queries.
-	 */
-	cmd_buffer->active_query_flush_bits &= ~cmd_buffer->state.flush_bits;
-
-	cmd_buffer->state.flush_bits = 0;
-
-	/* If the driver used a compute shader for resetting a query pool, it
-	 * should be finished at this point.
-	 */
-	cmd_buffer->pending_reset_query = false;
-
-	radv_describe_barrier_end_delayed(cmd_buffer);
+   bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
+
+   if (is_compute)
+      cmd_buffer->state.flush_bits &=
+         ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+           RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
+           RADV_CMD_FLAG_INV_L2_METADATA | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+           RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_VGT_FLUSH |
+           RADV_CMD_FLAG_START_PIPELINE_STATS | RADV_CMD_FLAG_STOP_PIPELINE_STATS);
+
+   if (!cmd_buffer->state.flush_bits) {
+      radv_describe_barrier_end_delayed(cmd_buffer);
+      return;
+   }
+
+   radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);
+
+   si_cs_emit_cache_flush(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+                          &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
+                          radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits,
+                          &cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va);
+
+   if (unlikely(cmd_buffer->device->trace_bo))
+      radv_cmd_buffer_trace_emit(cmd_buffer);
+
+   /* Clear the caches that have been flushed to avoid syncing too much
+    * when there is some pending active queries.
+    */
+   cmd_buffer->active_query_flush_bits &= ~cmd_buffer->state.flush_bits;
+
+   cmd_buffer->state.flush_bits = 0;
+
+   /* If the driver used a compute shader for resetting a query pool, it
+    * should be finished at this point.
+    */
+   cmd_buffer->pending_reset_query = false;
+
+   radv_describe_barrier_end_delayed(cmd_buffer);
 }
 
 /* sets the CP predication state using a boolean stored at va */
 void
-si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer,
-			      bool draw_visible, unsigned pred_op, uint64_t va)
+si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
+                              unsigned pred_op, uint64_t va)
 {
-	uint32_t op = 0;
-
-	if (va) {
-		assert(pred_op == PREDICATION_OP_BOOL32 ||
-		       pred_op == PREDICATION_OP_BOOL64);
-
-		op = PRED_OP(pred_op);
-
-		/* PREDICATION_DRAW_VISIBLE means that if the 32-bit value is
-		 * zero, all rendering commands are discarded. Otherwise, they
-		 * are discarded if the value is non zero.
-		 */
-		op |= draw_visible ? PREDICATION_DRAW_VISIBLE :
-				     PREDICATION_DRAW_NOT_VISIBLE;
-	}
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
-		radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
-		radeon_emit(cmd_buffer->cs, op);
-		radeon_emit(cmd_buffer->cs, va);
-		radeon_emit(cmd_buffer->cs, va >> 32);
-	} else {
-		radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
-		radeon_emit(cmd_buffer->cs, va);
-		radeon_emit(cmd_buffer->cs, op | ((va >> 32) & 0xFF));
-	}
+   uint32_t op = 0;
+
+   if (va) {
+      assert(pred_op == PREDICATION_OP_BOOL32 || pred_op == PREDICATION_OP_BOOL64);
+
+      op = PRED_OP(pred_op);
+
+      /* PREDICATION_DRAW_VISIBLE means that if the 32-bit value is
+       * zero, all rendering commands are discarded. Otherwise, they
+       * are discarded if the value is non zero.
+       */
+      op |= draw_visible ? PREDICATION_DRAW_VISIBLE : PREDICATION_DRAW_NOT_VISIBLE;
+   }
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+      radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
+      radeon_emit(cmd_buffer->cs, op);
+      radeon_emit(cmd_buffer->cs, va);
+      radeon_emit(cmd_buffer->cs, va >> 32);
+   } else {
+      radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+      radeon_emit(cmd_buffer->cs, va);
+      radeon_emit(cmd_buffer->cs, op | ((va >> 32) & 0xFF));
+   }
 }
 
 /* Set this if you want the 3D engine to wait until CP DMA is done.
  * It should be set on the last CP DMA packet. */
-#define CP_DMA_SYNC	(1 << 0)
+#define CP_DMA_SYNC (1 << 0)
 
 /* Set this if the source data was used as a destination in a previous CP DMA
  * packet. It's for preventing a read-after-write (RAW) hazard between two
  * CP DMA packets. */
-#define CP_DMA_RAW_WAIT	(1 << 1)
-#define CP_DMA_USE_L2	(1 << 2)
-#define CP_DMA_CLEAR	(1 << 3)
+#define CP_DMA_RAW_WAIT (1 << 1)
+#define CP_DMA_USE_L2   (1 << 2)
+#define CP_DMA_CLEAR    (1 << 3)
 
 /* Alignment for optimal performance. */
-#define SI_CPDMA_ALIGNMENT	32
+#define SI_CPDMA_ALIGNMENT 32
 
 /* The max number of bytes that can be copied per packet. */
-static inline unsigned cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
+static inline unsigned
+cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
 {
-	unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 ?
-			       S_415_BYTE_COUNT_GFX9(~0u) :
-			       S_415_BYTE_COUNT_GFX6(~0u);
+   unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9
+                     ? S_415_BYTE_COUNT_GFX9(~0u)
+                     : S_415_BYTE_COUNT_GFX6(~0u);
 
-	/* make it aligned for optimal performance */
-	return max & ~(SI_CPDMA_ALIGNMENT - 1);
+   /* make it aligned for optimal performance */
+   return max & ~(SI_CPDMA_ALIGNMENT - 1);
 }
 
 /* Emit a CP DMA packet to do a copy from one buffer to another, or to clear
  * a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit
  * clear value.
  */
-static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
-			   uint64_t dst_va, uint64_t src_va,
-			   unsigned size, unsigned flags)
+static void
+si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size,
+               unsigned flags)
 {
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
-	uint32_t header = 0, command = 0;
-
-	assert(size <= cp_dma_max_byte_count(cmd_buffer));
-
-	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
-		command |= S_415_BYTE_COUNT_GFX9(size);
-	else
-		command |= S_415_BYTE_COUNT_GFX6(size);
-
-	/* Sync flags. */
-	if (flags & CP_DMA_SYNC)
-		header |= S_411_CP_SYNC(1);
-	else {
-		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
-			command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
-		else
-			command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
-	}
-
-	if (flags & CP_DMA_RAW_WAIT)
-		command |= S_415_RAW_WAIT(1);
-
-	/* Src and dst flags. */
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
-	    !(flags & CP_DMA_CLEAR) &&
-	    src_va == dst_va)
-		header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
-	else if (flags & CP_DMA_USE_L2)
-		header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
-
-	if (flags & CP_DMA_CLEAR)
-		header |= S_411_SRC_SEL(V_411_DATA);
-	else if (flags & CP_DMA_USE_L2)
-		header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
-
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
-		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, cmd_buffer->state.predicating));
-		radeon_emit(cs, header);
-		radeon_emit(cs, src_va);		/* SRC_ADDR_LO [31:0] */
-		radeon_emit(cs, src_va >> 32);		/* SRC_ADDR_HI [31:0] */
-		radeon_emit(cs, dst_va);		/* DST_ADDR_LO [31:0] */
-		radeon_emit(cs, dst_va >> 32);		/* DST_ADDR_HI [31:0] */
-		radeon_emit(cs, command);
-	} else {
-		assert(!(flags & CP_DMA_USE_L2));
-		header |= S_411_SRC_ADDR_HI(src_va >> 32);
-		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, cmd_buffer->state.predicating));
-		radeon_emit(cs, src_va);			/* SRC_ADDR_LO [31:0] */
-		radeon_emit(cs, header);			/* SRC_ADDR_HI [15:0] + flags. */
-		radeon_emit(cs, dst_va);			/* DST_ADDR_LO [31:0] */
-		radeon_emit(cs, (dst_va >> 32) & 0xffff);	/* DST_ADDR_HI [15:0] */
-		radeon_emit(cs, command);
-	}
-
-	/* CP DMA is executed in ME, but index buffers are read by PFP.
-	 * This ensures that ME (CP DMA) is idle before PFP starts fetching
-	 * indices. If we wanted to execute CP DMA in PFP, this packet
-	 * should precede it.
-	 */
-	if (flags & CP_DMA_SYNC) {
-		if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
-			radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
-			radeon_emit(cs, 0);
-		}
-
-		/* CP will see the sync flag and wait for all DMAs to complete. */
-		cmd_buffer->state.dma_is_busy = false;
-	}
-
-	if (unlikely(cmd_buffer->device->trace_bo))
-		radv_cmd_buffer_trace_emit(cmd_buffer);
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint32_t header = 0, command = 0;
+
+   assert(size <= cp_dma_max_byte_count(cmd_buffer));
+
+   radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
+      command |= S_415_BYTE_COUNT_GFX9(size);
+   else
+      command |= S_415_BYTE_COUNT_GFX6(size);
+
+   /* Sync flags. */
+   if (flags & CP_DMA_SYNC)
+      header |= S_411_CP_SYNC(1);
+   else {
+      if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
+         command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
+      else
+         command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
+   }
+
+   if (flags & CP_DMA_RAW_WAIT)
+      command |= S_415_RAW_WAIT(1);
+
+   /* Src and dst flags. */
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+       !(flags & CP_DMA_CLEAR) && src_va == dst_va)
+      header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
+   else if (flags & CP_DMA_USE_L2)
+      header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
+
+   if (flags & CP_DMA_CLEAR)
+      header |= S_411_SRC_SEL(V_411_DATA);
+   else if (flags & CP_DMA_USE_L2)
+      header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
+
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+      radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, cmd_buffer->state.predicating));
+      radeon_emit(cs, header);
+      radeon_emit(cs, src_va);       /* SRC_ADDR_LO [31:0] */
+      radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
+      radeon_emit(cs, dst_va);       /* DST_ADDR_LO [31:0] */
+      radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
+      radeon_emit(cs, command);
+   } else {
+      assert(!(flags & CP_DMA_USE_L2));
+      header |= S_411_SRC_ADDR_HI(src_va >> 32);
+      radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, cmd_buffer->state.predicating));
+      radeon_emit(cs, src_va);                  /* SRC_ADDR_LO [31:0] */
+      radeon_emit(cs, header);                  /* SRC_ADDR_HI [15:0] + flags. */
+      radeon_emit(cs, dst_va);                  /* DST_ADDR_LO [31:0] */
+      radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
+      radeon_emit(cs, command);
+   }
+
+   /* CP DMA is executed in ME, but index buffers are read by PFP.
+    * This ensures that ME (CP DMA) is idle before PFP starts fetching
+    * indices. If we wanted to execute CP DMA in PFP, this packet
+    * should precede it.
+    */
+   if (flags & CP_DMA_SYNC) {
+      if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
+         radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
+         radeon_emit(cs, 0);
+      }
+
+      /* CP will see the sync flag and wait for all DMAs to complete. */
+      cmd_buffer->state.dma_is_busy = false;
+   }
+
+   if (unlikely(cmd_buffer->device->trace_bo))
+      radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
-void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
-                        unsigned size)
+void
+si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size)
 {
-	uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1);
-	uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT -1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;
+   uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1);
+   uint64_t aligned_size =
+      ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;
 
-	si_emit_cp_dma(cmd_buffer, aligned_va, aligned_va,
-		       aligned_size, CP_DMA_USE_L2);
+   si_emit_cp_dma(cmd_buffer, aligned_va, aligned_va, aligned_size, CP_DMA_USE_L2);
 }
 
-static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
-			      uint64_t remaining_size, unsigned *flags)
+static void
+si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, uint64_t remaining_size,
+                  unsigned *flags)
 {
 
-	/* Flush the caches for the first copy only.
-	 * Also wait for the previous CP DMA operations.
-	 */
-	if (cmd_buffer->state.flush_bits) {
-		si_emit_cache_flush(cmd_buffer);
-		*flags |= CP_DMA_RAW_WAIT;
-	}
-
-	/* Do the synchronization after the last dma, so that all data
-	 * is written to memory.
-	 */
-	if (byte_count == remaining_size)
-		*flags |= CP_DMA_SYNC;
+   /* Flush the caches for the first copy only.
+    * Also wait for the previous CP DMA operations.
+    */
+   if (cmd_buffer->state.flush_bits) {
+      si_emit_cache_flush(cmd_buffer);
+      *flags |= CP_DMA_RAW_WAIT;
+   }
+
+   /* Do the synchronization after the last dma, so that all data
+    * is written to memory.
+    */
+   if (byte_count == remaining_size)
+      *flags |= CP_DMA_SYNC;
 }
 
-static void si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size)
+static void
+si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size)
 {
-	uint64_t va;
-	uint32_t offset;
-	unsigned dma_flags = 0;
-	unsigned buf_size = SI_CPDMA_ALIGNMENT * 2;
-	void *ptr;
+   uint64_t va;
+   uint32_t offset;
+   unsigned dma_flags = 0;
+   unsigned buf_size = SI_CPDMA_ALIGNMENT * 2;
+   void *ptr;
 
-	assert(size < SI_CPDMA_ALIGNMENT);
+   assert(size < SI_CPDMA_ALIGNMENT);
 
-	radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size,  &offset, &ptr);
+   radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, &offset, &ptr);
 
-	va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-	va += offset;
+   va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+   va += offset;
 
-	si_cp_dma_prepare(cmd_buffer, size, size, &dma_flags);
+   si_cp_dma_prepare(cmd_buffer, size, size, &dma_flags);
 
-	si_emit_cp_dma(cmd_buffer, va, va + SI_CPDMA_ALIGNMENT, size,
-		       dma_flags);
+   si_emit_cp_dma(cmd_buffer, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags);
 }
 
-void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
-			   uint64_t src_va, uint64_t dest_va,
-			   uint64_t size)
+void
+si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
+                      uint64_t size)
 {
-	uint64_t main_src_va, main_dest_va;
-	uint64_t skipped_size = 0, realign_size = 0;
-
-	/* Assume that we are not going to sync after the last DMA operation. */
-	cmd_buffer->state.dma_is_busy = true;
-
-	if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO ||
-	    cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) {
-		/* If the size is not aligned, we must add a dummy copy at the end
-		 * just to align the internal counter. Otherwise, the DMA engine
-		 * would slow down by an order of magnitude for following copies.
-		 */
-		if (size % SI_CPDMA_ALIGNMENT)
-			realign_size = SI_CPDMA_ALIGNMENT - (size % SI_CPDMA_ALIGNMENT);
-
-		/* If the copy begins unaligned, we must start copying from the next
-		 * aligned block and the skipped part should be copied after everything
-		 * else has been copied. Only the src alignment matters, not dst.
-		 */
-		if (src_va % SI_CPDMA_ALIGNMENT) {
-			skipped_size = SI_CPDMA_ALIGNMENT - (src_va % SI_CPDMA_ALIGNMENT);
-			/* The main part will be skipped if the size is too small. */
-			skipped_size = MIN2(skipped_size, size);
-			size -= skipped_size;
-		}
-	}
-	main_src_va = src_va + skipped_size;
-	main_dest_va = dest_va + skipped_size;
-
-	while (size) {
-		unsigned dma_flags = 0;
-		unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
-
-		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
-			/* DMA operations via L2 are coherent and faster.
-			 * TODO: GFX7-GFX8 should also support this but it
-			 * requires tests/benchmarks.
-			 *
-			 * Also enable on GFX9 so we can use L2 at rest on GFX9+. On Raven
-			 * this didn't seem to be worse.
-			 *
-			 * Note that we only use CP DMA for sizes < RADV_BUFFER_OPS_CS_THRESHOLD,
-			 * which is 4k at the moment, so this is really unlikely to cause
-			 * significant thrashing.
-			 */
-			dma_flags |= CP_DMA_USE_L2;
-		}
-
-		si_cp_dma_prepare(cmd_buffer, byte_count,
-				  size + skipped_size + realign_size,
-				  &dma_flags);
-
-		dma_flags &= ~CP_DMA_SYNC;
-
-		si_emit_cp_dma(cmd_buffer, main_dest_va, main_src_va,
-			       byte_count, dma_flags);
-
-		size -= byte_count;
-		main_src_va += byte_count;
-		main_dest_va += byte_count;
-	}
-
-	if (skipped_size) {
-		unsigned dma_flags = 0;
-
-		si_cp_dma_prepare(cmd_buffer, skipped_size,
-				  size + skipped_size + realign_size,
-				  &dma_flags);
-
-		si_emit_cp_dma(cmd_buffer, dest_va, src_va,
-			       skipped_size, dma_flags);
-	}
-	if (realign_size)
-		si_cp_dma_realign_engine(cmd_buffer, realign_size);
+   uint64_t main_src_va, main_dest_va;
+   uint64_t skipped_size = 0, realign_size = 0;
+
+   /* Assume that we are not going to sync after the last DMA operation. */
+   cmd_buffer->state.dma_is_busy = true;
+
+   if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO ||
+       cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) {
+      /* If the size is not aligned, we must add a dummy copy at the end
+       * just to align the internal counter. Otherwise, the DMA engine
+       * would slow down by an order of magnitude for following copies.
+       */
+      if (size % SI_CPDMA_ALIGNMENT)
+         realign_size = SI_CPDMA_ALIGNMENT - (size % SI_CPDMA_ALIGNMENT);
+
+      /* If the copy begins unaligned, we must start copying from the next
+       * aligned block and the skipped part should be copied after everything
+       * else has been copied. Only the src alignment matters, not dst.
+       */
+      if (src_va % SI_CPDMA_ALIGNMENT) {
+         skipped_size = SI_CPDMA_ALIGNMENT - (src_va % SI_CPDMA_ALIGNMENT);
+         /* The main part will be skipped if the size is too small. */
+         skipped_size = MIN2(skipped_size, size);
+         size -= skipped_size;
+      }
+   }
+   main_src_va = src_va + skipped_size;
+   main_dest_va = dest_va + skipped_size;
+
+   while (size) {
+      unsigned dma_flags = 0;
+      unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
+
+      if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+         /* DMA operations via L2 are coherent and faster.
+          * TODO: GFX7-GFX8 should also support this but it
+          * requires tests/benchmarks.
+          *
+          * Also enable on GFX9 so we can use L2 at rest on GFX9+. On Raven
+          * this didn't seem to be worse.
+          *
+          * Note that we only use CP DMA for sizes < RADV_BUFFER_OPS_CS_THRESHOLD,
+          * which is 4k at the moment, so this is really unlikely to cause
+          * significant thrashing.
+          */
+         dma_flags |= CP_DMA_USE_L2;
+      }
+
+      si_cp_dma_prepare(cmd_buffer, byte_count, size + skipped_size + realign_size, &dma_flags);
+
+      dma_flags &= ~CP_DMA_SYNC;
+
+      si_emit_cp_dma(cmd_buffer, main_dest_va, main_src_va, byte_count, dma_flags);
+
+      size -= byte_count;
+      main_src_va += byte_count;
+      main_dest_va += byte_count;
+   }
+
+   if (skipped_size) {
+      unsigned dma_flags = 0;
+
+      si_cp_dma_prepare(cmd_buffer, skipped_size, size + skipped_size + realign_size, &dma_flags);
+
+      si_emit_cp_dma(cmd_buffer, dest_va, src_va, skipped_size, dma_flags);
+   }
+   if (realign_size)
+      si_cp_dma_realign_engine(cmd_buffer, realign_size);
 }
 
-void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
-			    uint64_t size, unsigned value)
+void
+si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
+                       unsigned value)
 {
 
-	if (!size)
-		return;
+   if (!size)
+      return;
 
-	assert(va % 4 == 0 && size % 4 == 0);
+   assert(va % 4 == 0 && size % 4 == 0);
 
-	/* Assume that we are not going to sync after the last DMA operation. */
-	cmd_buffer->state.dma_is_busy = true;
+   /* Assume that we are not going to sync after the last DMA operation. */
+   cmd_buffer->state.dma_is_busy = true;
 
-	while (size) {
-		unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
-		unsigned dma_flags = CP_DMA_CLEAR;
+   while (size) {
+      unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
+      unsigned dma_flags = CP_DMA_CLEAR;
 
-		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
-			/* DMA operations via L2 are coherent and faster.
-			 * TODO: GFX7-GFX8 should also support this but it
-			 * requires tests/benchmarks.
-			 *
-			 * Also enable on GFX9 so we can use L2 at rest on GFX9+.
-			 */
-			dma_flags |= CP_DMA_USE_L2;
-		}
+      if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+         /* DMA operations via L2 are coherent and faster.
+          * TODO: GFX7-GFX8 should also support this but it
+          * requires tests/benchmarks.
+          *
+          * Also enable on GFX9 so we can use L2 at rest on GFX9+.
+          */
+         dma_flags |= CP_DMA_USE_L2;
+      }
 
-		si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags);
+      si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags);
 
-		/* Emit the clear packet. */
-		si_emit_cp_dma(cmd_buffer, va, value, byte_count,
-			       dma_flags);
+      /* Emit the clear packet. */
+      si_emit_cp_dma(cmd_buffer, va, value, byte_count, dma_flags);
 
-		size -= byte_count;
-		va += byte_count;
-	}
+      size -= byte_count;
+      va += byte_count;
+   }
 }
 
-void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
+void
+si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
 {
-	if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX7)
-		return;
+   if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX7)
+      return;
 
-	if (!cmd_buffer->state.dma_is_busy)
-		return;
+   if (!cmd_buffer->state.dma_is_busy)
+      return;
 
-	/* Issue a dummy DMA that copies zero bytes.
-	 *
-	 * The DMA engine will see that there's no work to do and skip this
-	 * DMA request, however, the CP will see the sync flag and still wait
-	 * for all DMAs to complete.
-	 */
-	si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC);
+   /* Issue a dummy DMA that copies zero bytes.
+    *
+    * The DMA engine will see that there's no work to do and skip this
+    * DMA request, however, the CP will see the sync flag and still wait
+    * for all DMAs to complete.
+    */
+   si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC);
 
-	cmd_buffer->state.dma_is_busy = false;
+   cmd_buffer->state.dma_is_busy = false;
 }
 
 /* For MSAA sample positions. */
-#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
-	((((unsigned)(s0x) & 0xf) << 0)  | (((unsigned)(s0y) & 0xf) << 4)  | \
-	 (((unsigned)(s1x) & 0xf) << 8)  | (((unsigned)(s1y) & 0xf) << 12) | \
-	 (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
-	 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
+#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)                                          \
+   ((((unsigned)(s0x)&0xf) << 0) | (((unsigned)(s0y)&0xf) << 4) | (((unsigned)(s1x)&0xf) << 8) |   \
+    (((unsigned)(s1y)&0xf) << 12) | (((unsigned)(s2x)&0xf) << 16) |                                \
+    (((unsigned)(s2y)&0xf) << 20) | (((unsigned)(s3x)&0xf) << 24) | (((unsigned)(s3y)&0xf) << 28))
 
 /* For obtaining location coordinates from registers */
-#define SEXT4(x)		((int)((x) | ((x) & 0x8 ? 0xfffffff0 : 0)))
-#define GET_SFIELD(reg, index)	SEXT4(((reg) >> ((index) * 4)) & 0xf)
-#define GET_SX(reg, index)	GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2)
-#define GET_SY(reg, index)	GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2 + 1)
+#define SEXT4(x)               ((int)((x) | ((x)&0x8 ? 0xfffffff0 : 0)))
+#define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index)*4)) & 0xf)
+#define GET_SX(reg, index)     GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2)
+#define GET_SY(reg, index)     GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2 + 1)
 
 /* 1x MSAA */
-static const uint32_t sample_locs_1x =
-	FILL_SREG(0, 0,   0, 0,   0, 0,   0, 0);
+static const uint32_t sample_locs_1x = FILL_SREG(0, 0, 0, 0, 0, 0, 0, 0);
 static const unsigned max_dist_1x = 0;
 static const uint64_t centroid_priority_1x = 0x0000000000000000ull;
 
 /* 2xMSAA */
-static const uint32_t sample_locs_2x =
-	FILL_SREG(4,4,   -4, -4,   0, 0,   0, 0);
+static const uint32_t sample_locs_2x = FILL_SREG(4, 4, -4, -4, 0, 0, 0, 0);
 static const unsigned max_dist_2x = 4;
 static const uint64_t centroid_priority_2x = 0x1010101010101010ull;
 
 /* 4xMSAA */
-static const uint32_t sample_locs_4x =
-	FILL_SREG(-2,-6,   6, -2,   -6, 2,  2, 6);
+static const uint32_t sample_locs_4x = FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6);
 static const unsigned max_dist_4x = 6;
 static const uint64_t centroid_priority_4x = 0x3210321032103210ull;
 
 /* 8xMSAA */
 static const uint32_t sample_locs_8x[] = {
-	FILL_SREG( 1,-3,  -1, 3,   5, 1,  -3,-5),
-	FILL_SREG(-5, 5,  -7,-1,   3, 7,   7,-7),
-	/* The following are unused by hardware, but we emit them to IBs
-	 * instead of multiple SET_CONTEXT_REG packets. */
-	0,
-	0,
+   FILL_SREG(1, -3, -1, 3, 5, 1, -3, -5),
+   FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+   /* The following are unused by hardware, but we emit them to IBs
+    * instead of multiple SET_CONTEXT_REG packets. */
+   0,
+   0,
 };
 static const unsigned max_dist_8x = 7;
 static const uint64_t centroid_priority_8x = 0x7654321076543210ull;
 
-unsigned radv_get_default_max_sample_dist(int log_samples)
+unsigned
+radv_get_default_max_sample_dist(int log_samples)
 {
-	unsigned max_dist[] = {
-		max_dist_1x,
-		max_dist_2x,
-		max_dist_4x,
-		max_dist_8x,
-	};
-	return max_dist[log_samples];
+   unsigned max_dist[] = {
+      max_dist_1x,
+      max_dist_2x,
+      max_dist_4x,
+      max_dist_8x,
+   };
+   return max_dist[log_samples];
 }
 
-void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples)
+void
+radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples)
 {
-	switch (nr_samples) {
-	default:
-	case 1:
-		radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
-		radeon_emit(cs, (uint32_t)centroid_priority_1x);
-		radeon_emit(cs, centroid_priority_1x >> 32);
-		radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_1x);
-		radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_1x);
-		radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_1x);
-		radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_1x);
-		break;
-	case 2:
-		radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
-		radeon_emit(cs, (uint32_t)centroid_priority_2x);
-		radeon_emit(cs, centroid_priority_2x >> 32);
-		radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x);
-		radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x);
-		radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x);
-		radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x);
-		break;
-	case 4:
-		radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
-		radeon_emit(cs, (uint32_t)centroid_priority_4x);
-		radeon_emit(cs, centroid_priority_4x >> 32);
-		radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x);
-		radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x);
-		radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x);
-		radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x);
-		break;
-	case 8:
-		radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
-		radeon_emit(cs, (uint32_t)centroid_priority_8x);
-		radeon_emit(cs, centroid_priority_8x >> 32);
-		radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
-		radeon_emit_array(cs, sample_locs_8x, 4);
-		radeon_emit_array(cs, sample_locs_8x, 4);
-		radeon_emit_array(cs, sample_locs_8x, 4);
-		radeon_emit_array(cs, sample_locs_8x, 2);
-		break;
-	}
+   switch (nr_samples) {
+   default:
+   case 1:
+      radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+      radeon_emit(cs, (uint32_t)centroid_priority_1x);
+      radeon_emit(cs, centroid_priority_1x >> 32);
+      radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_1x);
+      radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_1x);
+      radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_1x);
+      radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_1x);
+      break;
+   case 2:
+      radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+      radeon_emit(cs, (uint32_t)centroid_priority_2x);
+      radeon_emit(cs, centroid_priority_2x >> 32);
+      radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x);
+      radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x);
+      radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x);
+      radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x);
+      break;
+   case 4:
+      radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+      radeon_emit(cs, (uint32_t)centroid_priority_4x);
+      radeon_emit(cs, centroid_priority_4x >> 32);
+      radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x);
+      radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x);
+      radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x);
+      radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x);
+      break;
+   case 8:
+      radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+      radeon_emit(cs, (uint32_t)centroid_priority_8x);
+      radeon_emit(cs, centroid_priority_8x >> 32);
+      radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+      radeon_emit_array(cs, sample_locs_8x, 4);
+      radeon_emit_array(cs, sample_locs_8x, 4);
+      radeon_emit_array(cs, sample_locs_8x, 4);
+      radeon_emit_array(cs, sample_locs_8x, 2);
+      break;
+   }
 }
 
-static void radv_get_sample_position(struct radv_device *device,
-				     unsigned sample_count,
-				     unsigned sample_index, float *out_value)
+static void
+radv_get_sample_position(struct radv_device *device, unsigned sample_count, unsigned sample_index,
+                         float *out_value)
 {
-	const uint32_t *sample_locs;
-
-	switch (sample_count) {
-	case 1:
-	default:
-		sample_locs = &sample_locs_1x;
-		break;
-	case 2:
-		sample_locs = &sample_locs_2x;
-		break;
-	case 4:
-		sample_locs = &sample_locs_4x;
-		break;
-	case 8:
-		sample_locs = sample_locs_8x;
-		break;
-	}
-
-	out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f;
-	out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f;
+   const uint32_t *sample_locs;
+
+   switch (sample_count) {
+   case 1:
+   default:
+      sample_locs = &sample_locs_1x;
+      break;
+   case 2:
+      sample_locs = &sample_locs_2x;
+      break;
+   case 4:
+      sample_locs = &sample_locs_4x;
+      break;
+   case 8:
+      sample_locs = sample_locs_8x;
+      break;
+   }
+
+   out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f;
+   out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f;
 }
 
-void radv_device_init_msaa(struct radv_device *device)
+void
+radv_device_init_msaa(struct radv_device *device)
 {
-	int i;
+   int i;
 
-	radv_get_sample_position(device, 1, 0, device->sample_locations_1x[0]);
+   radv_get_sample_position(device, 1, 0, device->sample_locations_1x[0]);
 
-	for (i = 0; i < 2; i++)
-		radv_get_sample_position(device, 2, i, device->sample_locations_2x[i]);
-	for (i = 0; i < 4; i++)
-		radv_get_sample_position(device, 4, i, device->sample_locations_4x[i]);
-	for (i = 0; i < 8; i++)
-		radv_get_sample_position(device, 8, i, device->sample_locations_8x[i]);
+   for (i = 0; i < 2; i++)
+      radv_get_sample_position(device, 2, i, device->sample_locations_2x[i]);
+   for (i = 0; i < 4; i++)
+      radv_get_sample_position(device, 4, i, device->sample_locations_4x[i]);
+   for (i = 0; i < 8; i++)
+      radv_get_sample_position(device, 8, i, device->sample_locations_8x[i]);
 }
diff --git a/src/amd/vulkan/vk_format.h b/src/amd/vulkan/vk_format.h
index d6132a5b96b..d58180c0196 100644
--- a/src/amd/vulkan/vk_format.h
+++ b/src/amd/vulkan/vk_format.h
@@ -28,13 +28,14 @@
 #define VK_FORMAT_H
 
 #include <assert.h>
-#include <vulkan/vulkan.h>
 #include <util/macros.h>
 #include <vulkan/util/vk_format.h>
+#include <vulkan/vulkan.h>
 
-static inline const struct util_format_description *vk_format_description(VkFormat format)
+static inline const struct util_format_description *
+vk_format_description(VkFormat format)
 {
-	return util_format_description(vk_format_to_pipe_format(format));
+   return util_format_description(vk_format_to_pipe_format(format));
 }
 
 /**
@@ -43,7 +44,7 @@ static inline const struct util_format_description *vk_format_description(VkForm
 static inline unsigned
 vk_format_get_blocksizebits(VkFormat format)
 {
-	return util_format_get_blocksizebits(vk_format_to_pipe_format(format));
+   return util_format_get_blocksizebits(vk_format_to_pipe_format(format));
 }
 
 /**
@@ -52,19 +53,19 @@ vk_format_get_blocksizebits(VkFormat format)
 static inline unsigned
 vk_format_get_blocksize(VkFormat format)
 {
-	return util_format_get_blocksize(vk_format_to_pipe_format(format));
+   return util_format_get_blocksize(vk_format_to_pipe_format(format));
 }
 
 static inline unsigned
 vk_format_get_blockwidth(VkFormat format)
 {
-	return util_format_get_blockwidth(vk_format_to_pipe_format(format));
+   return util_format_get_blockwidth(vk_format_to_pipe_format(format));
 }
 
 static inline unsigned
 vk_format_get_blockheight(VkFormat format)
 {
-	return util_format_get_blockheight(vk_format_to_pipe_format(format));
+   return util_format_get_blockheight(vk_format_to_pipe_format(format));
 }
 
 /**
@@ -74,259 +75,256 @@ vk_format_get_blockheight(VkFormat format)
 static inline int
 vk_format_get_first_non_void_channel(VkFormat format)
 {
-	return util_format_get_first_non_void_channel(vk_format_to_pipe_format(format));
+   return util_format_get_first_non_void_channel(vk_format_to_pipe_format(format));
 }
 
 static inline enum pipe_swizzle
-radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4], VkComponentSwizzle vk_swiz)
+radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4],
+                  VkComponentSwizzle vk_swiz)
 {
-	if (vk_swiz == VK_COMPONENT_SWIZZLE_IDENTITY)
-		vk_swiz = component;
-	switch (vk_swiz) {
-	case VK_COMPONENT_SWIZZLE_ZERO:
-		return PIPE_SWIZZLE_0;
-	case VK_COMPONENT_SWIZZLE_ONE:
-		return PIPE_SWIZZLE_1;
-	case VK_COMPONENT_SWIZZLE_R:
-	case VK_COMPONENT_SWIZZLE_G:
-	case VK_COMPONENT_SWIZZLE_B:
-	case VK_COMPONENT_SWIZZLE_A:
-		return (enum pipe_swizzle)chan[vk_swiz - VK_COMPONENT_SWIZZLE_R];
-	default:
-		unreachable("Illegal swizzle");
-	}
+   if (vk_swiz == VK_COMPONENT_SWIZZLE_IDENTITY)
+      vk_swiz = component;
+   switch (vk_swiz) {
+   case VK_COMPONENT_SWIZZLE_ZERO:
+      return PIPE_SWIZZLE_0;
+   case VK_COMPONENT_SWIZZLE_ONE:
+      return PIPE_SWIZZLE_1;
+   case VK_COMPONENT_SWIZZLE_R:
+   case VK_COMPONENT_SWIZZLE_G:
+   case VK_COMPONENT_SWIZZLE_B:
+   case VK_COMPONENT_SWIZZLE_A:
+      return (enum pipe_swizzle)chan[vk_swiz - VK_COMPONENT_SWIZZLE_R];
+   default:
+      unreachable("Illegal swizzle");
+   }
 }
 
-static inline void vk_format_compose_swizzles(const VkComponentMapping *mapping,
-					      const unsigned char swz[4],
-					      enum pipe_swizzle dst[4])
+static inline void
+vk_format_compose_swizzles(const VkComponentMapping *mapping, const unsigned char swz[4],
+                           enum pipe_swizzle dst[4])
 {
-	dst[0] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_R, swz, mapping->r);
-	dst[1] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_G, swz, mapping->g);
-	dst[2] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_B, swz, mapping->b);
-	dst[3] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_A, swz, mapping->a);
+   dst[0] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_R, swz, mapping->r);
+   dst[1] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_G, swz, mapping->g);
+   dst[2] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_B, swz, mapping->b);
+   dst[3] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_A, swz, mapping->a);
 }
 
 static inline bool
 vk_format_is_compressed(VkFormat format)
 {
-	return util_format_is_compressed(vk_format_to_pipe_format(format));
+   return util_format_is_compressed(vk_format_to_pipe_format(format));
 }
 
 static inline bool
 vk_format_is_subsampled(VkFormat format)
 {
-	return util_format_is_subsampled_422(vk_format_to_pipe_format(format));
+   return util_format_is_subsampled_422(vk_format_to_pipe_format(format));
 }
 
 static inline VkFormat
 vk_format_depth_only(VkFormat format)
 {
-	switch (format) {
-	case VK_FORMAT_D16_UNORM_S8_UINT:
-		return VK_FORMAT_D16_UNORM;
-	case VK_FORMAT_D24_UNORM_S8_UINT:
-		return VK_FORMAT_X8_D24_UNORM_PACK32;
-	case VK_FORMAT_D32_SFLOAT_S8_UINT:
-		return VK_FORMAT_D32_SFLOAT;
-	default:
-		return format;
-	}
+   switch (format) {
+   case VK_FORMAT_D16_UNORM_S8_UINT:
+      return VK_FORMAT_D16_UNORM;
+   case VK_FORMAT_D24_UNORM_S8_UINT:
+      return VK_FORMAT_X8_D24_UNORM_PACK32;
+   case VK_FORMAT_D32_SFLOAT_S8_UINT:
+      return VK_FORMAT_D32_SFLOAT;
+   default:
+      return format;
+   }
 }
 
 static inline bool
 vk_format_is_int(VkFormat format)
 {
-	return util_format_is_pure_integer(vk_format_to_pipe_format(format));
+   return util_format_is_pure_integer(vk_format_to_pipe_format(format));
 }
 
 static inline bool
 vk_format_is_uint(VkFormat format)
 {
-	return util_format_is_pure_uint(vk_format_to_pipe_format(format));
+   return util_format_is_pure_uint(vk_format_to_pipe_format(format));
 }
 
 static inline bool
 vk_format_is_sint(VkFormat format)
 {
-	return util_format_is_pure_sint(vk_format_to_pipe_format(format));
+   return util_format_is_pure_sint(vk_format_to_pipe_format(format));
 }
 
 static inline bool
 vk_format_is_unorm(VkFormat format)
 {
-	return util_format_is_unorm(vk_format_to_pipe_format(format));
+   return util_format_is_unorm(vk_format_to_pipe_format(format));
 }
 
 static inline bool
 vk_format_is_srgb(VkFormat format)
 {
-	return util_format_is_srgb(vk_format_to_pipe_format(format));
+   return util_format_is_srgb(vk_format_to_pipe_format(format));
 }
 
 static inline VkFormat
 vk_format_no_srgb(VkFormat format)
 {
-	switch(format) {
-	case VK_FORMAT_R8_SRGB:
-		return VK_FORMAT_R8_UNORM;
-	case VK_FORMAT_R8G8_SRGB:
-		return VK_FORMAT_R8G8_UNORM;
-	case VK_FORMAT_R8G8B8_SRGB:
-		return VK_FORMAT_R8G8B8_UNORM;
-	case VK_FORMAT_B8G8R8_SRGB:
-		return VK_FORMAT_B8G8R8_UNORM;
-	case VK_FORMAT_R8G8B8A8_SRGB:
-		return VK_FORMAT_R8G8B8A8_UNORM;
-	case VK_FORMAT_B8G8R8A8_SRGB:
-		return VK_FORMAT_B8G8R8A8_UNORM;
-	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
-		return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
-	case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
-		return VK_FORMAT_BC1_RGB_UNORM_BLOCK;
-	case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
-		return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
-	case VK_FORMAT_BC2_SRGB_BLOCK:
-		return VK_FORMAT_BC2_UNORM_BLOCK;
-	case VK_FORMAT_BC3_SRGB_BLOCK:
-		return VK_FORMAT_BC3_UNORM_BLOCK;
-	case VK_FORMAT_BC7_SRGB_BLOCK:
-		return VK_FORMAT_BC7_UNORM_BLOCK;
-	case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
-		return VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK;
-	case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
-		return VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK;
-	case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
-		return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK;
-	default:
-		assert(!vk_format_is_srgb(format));
-		return format;
-	}
+   switch (format) {
+   case VK_FORMAT_R8_SRGB:
+      return VK_FORMAT_R8_UNORM;
+   case VK_FORMAT_R8G8_SRGB:
+      return VK_FORMAT_R8G8_UNORM;
+   case VK_FORMAT_R8G8B8_SRGB:
+      return VK_FORMAT_R8G8B8_UNORM;
+   case VK_FORMAT_B8G8R8_SRGB:
+      return VK_FORMAT_B8G8R8_UNORM;
+   case VK_FORMAT_R8G8B8A8_SRGB:
+      return VK_FORMAT_R8G8B8A8_UNORM;
+   case VK_FORMAT_B8G8R8A8_SRGB:
+      return VK_FORMAT_B8G8R8A8_UNORM;
+   case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+      return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+   case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+      return VK_FORMAT_BC1_RGB_UNORM_BLOCK;
+   case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+      return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
+   case VK_FORMAT_BC2_SRGB_BLOCK:
+      return VK_FORMAT_BC2_UNORM_BLOCK;
+   case VK_FORMAT_BC3_SRGB_BLOCK:
+      return VK_FORMAT_BC3_UNORM_BLOCK;
+   case VK_FORMAT_BC7_SRGB_BLOCK:
+      return VK_FORMAT_BC7_UNORM_BLOCK;
+   case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+      return VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK;
+   case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+      return VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK;
+   case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+      return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK;
+   default:
+      assert(!vk_format_is_srgb(format));
+      return format;
+   }
 }
 
 static inline VkFormat
 vk_format_stencil_only(VkFormat format)
 {
-	return VK_FORMAT_S8_UINT;
+   return VK_FORMAT_S8_UINT;
 }
 
 static inline unsigned
-vk_format_get_component_bits(VkFormat format,
-			     enum util_format_colorspace colorspace,
-			     unsigned component)
+vk_format_get_component_bits(VkFormat format, enum util_format_colorspace colorspace,
+                             unsigned component)
 {
-	const struct util_format_description *desc = vk_format_description(format);
-	enum util_format_colorspace desc_colorspace;
-
-	assert(format);
-	if (!format) {
-		return 0;
-	}
-
-	assert(component < 4);
-
-	/* Treat RGB and SRGB as equivalent. */
-	if (colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
-		colorspace = UTIL_FORMAT_COLORSPACE_RGB;
-	}
-	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
-		desc_colorspace = UTIL_FORMAT_COLORSPACE_RGB;
-	} else {
-		desc_colorspace = desc->colorspace;
-	}
-
-	if (desc_colorspace != colorspace) {
-		return 0;
-	}
-
-	switch (desc->swizzle[component]) {
-	case PIPE_SWIZZLE_X:
-		return desc->channel[0].size;
-	case PIPE_SWIZZLE_Y:
-		return desc->channel[1].size;
-	case PIPE_SWIZZLE_Z:
-		return desc->channel[2].size;
-	case PIPE_SWIZZLE_W:
-		return desc->channel[3].size;
-	default:
-		return 0;
-	}
+   const struct util_format_description *desc = vk_format_description(format);
+   enum util_format_colorspace desc_colorspace;
+
+   assert(format);
+   if (!format) {
+      return 0;
+   }
+
+   assert(component < 4);
+
+   /* Treat RGB and SRGB as equivalent. */
+   if (colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+      colorspace = UTIL_FORMAT_COLORSPACE_RGB;
+   }
+   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+      desc_colorspace = UTIL_FORMAT_COLORSPACE_RGB;
+   } else {
+      desc_colorspace = desc->colorspace;
+   }
+
+   if (desc_colorspace != colorspace) {
+      return 0;
+   }
+
+   switch (desc->swizzle[component]) {
+   case PIPE_SWIZZLE_X:
+      return desc->channel[0].size;
+   case PIPE_SWIZZLE_Y:
+      return desc->channel[1].size;
+   case PIPE_SWIZZLE_Z:
+      return desc->channel[2].size;
+   case PIPE_SWIZZLE_W:
+      return desc->channel[3].size;
+   default:
+      return 0;
+   }
 }
 
 static inline VkFormat
 vk_to_non_srgb_format(VkFormat format)
 {
-	switch(format) {
-	case VK_FORMAT_R8_SRGB :
-		return VK_FORMAT_R8_UNORM;
-	case VK_FORMAT_R8G8_SRGB:
-		return VK_FORMAT_R8G8_UNORM;
-	case VK_FORMAT_R8G8B8_SRGB:
-		return VK_FORMAT_R8G8B8_UNORM;
-	case VK_FORMAT_B8G8R8_SRGB:
-		return VK_FORMAT_B8G8R8_UNORM;
-	case VK_FORMAT_R8G8B8A8_SRGB :
-		return VK_FORMAT_R8G8B8A8_UNORM;
-	case VK_FORMAT_B8G8R8A8_SRGB:
-		return VK_FORMAT_B8G8R8A8_UNORM;
-	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
-		return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
-	default:
-		return format;
-	}
+   switch (format) {
+   case VK_FORMAT_R8_SRGB:
+      return VK_FORMAT_R8_UNORM;
+   case VK_FORMAT_R8G8_SRGB:
+      return VK_FORMAT_R8G8_UNORM;
+   case VK_FORMAT_R8G8B8_SRGB:
+      return VK_FORMAT_R8G8B8_UNORM;
+   case VK_FORMAT_B8G8R8_SRGB:
+      return VK_FORMAT_B8G8R8_UNORM;
+   case VK_FORMAT_R8G8B8A8_SRGB:
+      return VK_FORMAT_R8G8B8A8_UNORM;
+   case VK_FORMAT_B8G8R8A8_SRGB:
+      return VK_FORMAT_B8G8R8A8_UNORM;
+   case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+      return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+   default:
+      return format;
+   }
 }
 
 static inline unsigned
 vk_format_get_nr_components(VkFormat format)
 {
-	return util_format_get_nr_components(vk_format_to_pipe_format(format));
+   return util_format_get_nr_components(vk_format_to_pipe_format(format));
 }
 
 static inline unsigned
 vk_format_get_plane_count(VkFormat format)
 {
-	return util_format_get_num_planes(vk_format_to_pipe_format(format));
+   return util_format_get_num_planes(vk_format_to_pipe_format(format));
 }
 
 static inline unsigned
-vk_format_get_plane_width(VkFormat format, unsigned plane,
-                          unsigned width)
+vk_format_get_plane_width(VkFormat format, unsigned plane, unsigned width)
 {
-	return util_format_get_plane_width(vk_format_to_pipe_format(format), plane, width);
+   return util_format_get_plane_width(vk_format_to_pipe_format(format), plane, width);
 }
 
 static inline unsigned
-vk_format_get_plane_height(VkFormat format, unsigned plane,
-                          unsigned height)
+vk_format_get_plane_height(VkFormat format, unsigned plane, unsigned height)
 {
-	return util_format_get_plane_height(vk_format_to_pipe_format(format), plane, height);
+   return util_format_get_plane_height(vk_format_to_pipe_format(format), plane, height);
 }
 
 static inline VkFormat
 vk_format_get_plane_format(VkFormat format, unsigned plane_id)
 {
-	assert(plane_id < vk_format_get_plane_count(format));
-
-	switch(format) {
-	case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
-	case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
-	case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
-		return VK_FORMAT_R8_UNORM;
-	case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
-	case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
-		return plane_id ? VK_FORMAT_R8G8_UNORM : VK_FORMAT_R8_UNORM;
-	case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
-	case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
-	case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
-		return VK_FORMAT_R16_UNORM;
-	case VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
-	case VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
-		return plane_id ? VK_FORMAT_R16G16_UNORM : VK_FORMAT_R16_UNORM;
-	default:
-		assert(vk_format_get_plane_count(format) == 1);
-		return format;
-	}
+   assert(plane_id < vk_format_get_plane_count(format));
+
+   switch (format) {
+   case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+   case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
+   case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
+      return VK_FORMAT_R8_UNORM;
+   case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+   case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
+      return plane_id ? VK_FORMAT_R8G8_UNORM : VK_FORMAT_R8_UNORM;
+   case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
+   case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
+   case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
+      return VK_FORMAT_R16_UNORM;
+   case VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
+   case VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
+      return plane_id ? VK_FORMAT_R16G16_UNORM : VK_FORMAT_R16_UNORM;
+   default:
+      assert(vk_format_get_plane_count(format) == 1);
+      return format;
+   }
 }
 
-
 #endif /* VK_FORMAT_H */
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
index 2fc391d3c2d..e96bcf23ee8 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -31,1025 +31,1018 @@
 #include "radv_amdgpu_bo.h"
 
 #include <amdgpu.h>
-#include "drm-uapi/amdgpu_drm.h"
 #include <inttypes.h>
 #include <pthread.h>
 #include <unistd.h>
+#include "drm-uapi/amdgpu_drm.h"
 
+#include "util/os_time.h"
 #include "util/u_atomic.h"
-#include "util/u_memory.h"
 #include "util/u_math.h"
-#include "util/os_time.h"
+#include "util/u_memory.h"
 
-static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws,
-					  struct radeon_winsys_bo *_bo);
+static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo);
 
 static int
-radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws,
-		     amdgpu_bo_handle bo,
-		     uint64_t offset,
-		     uint64_t size,
-		     uint64_t addr,
-		     uint32_t bo_flags,
-		     uint64_t internal_flags,
-		     uint32_t ops)
+radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset,
+                     uint64_t size, uint64_t addr, uint32_t bo_flags, uint64_t internal_flags,
+                     uint32_t ops)
 {
-	uint64_t flags = internal_flags;
-	if (bo) {
-		flags = AMDGPU_VM_PAGE_READABLE |
-		         AMDGPU_VM_PAGE_EXECUTABLE;
+   uint64_t flags = internal_flags;
+   if (bo) {
+      flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
 
-		if ((bo_flags & RADEON_FLAG_VA_UNCACHED) &&
-		    ws->info.chip_class >= GFX9)
-			flags |= AMDGPU_VM_MTYPE_UC;
+      if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
+         flags |= AMDGPU_VM_MTYPE_UC;
 
-		if (!(bo_flags & RADEON_FLAG_READ_ONLY))
-			flags |= AMDGPU_VM_PAGE_WRITEABLE;
-	}
+      if (!(bo_flags & RADEON_FLAG_READ_ONLY))
+         flags |= AMDGPU_VM_PAGE_WRITEABLE;
+   }
 
-	size = align64(size, getpagesize());
+   size = align64(size, getpagesize());
 
-	return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr,
-				   flags, ops);
+   return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr, flags, ops);
 }
 
 static void
-radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys *ws,
-			       struct radv_amdgpu_winsys_bo *bo,
+radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo,
                                const struct radv_amdgpu_map_range *range)
 {
-	uint64_t internal_flags = 0;
-	assert(range->size);
-
-	if (!range->bo) {
-		if (!ws->info.has_sparse_vm_mappings)
-			return;
-
-		internal_flags |= AMDGPU_VM_PAGE_PRT;
-	} else
-		p_atomic_inc(&range->bo->ref_count);
-
-	int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL,
-				     range->bo_offset, range->size,
-				     range->offset + bo->base.va, 0,
-				     internal_flags, AMDGPU_VA_OP_MAP);
-	if (r)
-		abort();
+   uint64_t internal_flags = 0;
+   assert(range->size);
+
+   if (!range->bo) {
+      if (!ws->info.has_sparse_vm_mappings)
+         return;
+
+      internal_flags |= AMDGPU_VM_PAGE_PRT;
+   } else
+      p_atomic_inc(&range->bo->ref_count);
+
+   int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL, range->bo_offset, range->size,
+                                range->offset + bo->base.va, 0, internal_flags, AMDGPU_VA_OP_MAP);
+   if (r)
+      abort();
 }
 
 static void
-radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys *ws,
-				 struct radv_amdgpu_winsys_bo *bo,
+radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo,
                                  const struct radv_amdgpu_map_range *range)
 {
-	uint64_t internal_flags = 0;
-	assert(range->size);
-
-	if (!range->bo) {
-		if(!ws->info.has_sparse_vm_mappings)
-			return;
-
-		/* Even though this is an unmap, if we don't set this flag,
-		   AMDGPU is going to complain about the missing buffer. */
-		internal_flags |= AMDGPU_VM_PAGE_PRT;
-	}
-
-	int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL,
-				     range->bo_offset, range->size,
-				     range->offset + bo->base.va, 0, internal_flags,
-				     AMDGPU_VA_OP_UNMAP);
-	if (r)
-		abort();
-
-	if (range->bo)
-		ws->base.buffer_destroy(&ws->base, (struct radeon_winsys_bo *)range->bo);
+   uint64_t internal_flags = 0;
+   assert(range->size);
+
+   if (!range->bo) {
+      if (!ws->info.has_sparse_vm_mappings)
+         return;
+
+      /* Even though this is an unmap, if we don't set this flag,
+         AMDGPU is going to complain about the missing buffer. */
+      internal_flags |= AMDGPU_VM_PAGE_PRT;
+   }
+
+   int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL, range->bo_offset, range->size,
+                                range->offset + bo->base.va, 0, internal_flags, AMDGPU_VA_OP_UNMAP);
+   if (r)
+      abort();
+
+   if (range->bo)
+      ws->base.buffer_destroy(&ws->base, (struct radeon_winsys_bo *)range->bo);
 }
 
-static int bo_comparator(const void *ap, const void *bp) {
-	struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
-	struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
-	return (a > b) ? 1 : (a < b) ? -1 : 0;
+static int
+bo_comparator(const void *ap, const void *bp)
+{
+   struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
+   struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
+   return (a > b) ? 1 : (a < b) ? -1 : 0;
 }
 
 static VkResult
 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
 {
-	if (bo->bo_capacity < bo->range_count) {
-		uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
-		struct radv_amdgpu_winsys_bo **bos =
-			realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
-		if (!bos)
-			return VK_ERROR_OUT_OF_HOST_MEMORY;
-		bo->bos = bos;
-		bo->bo_capacity = new_count;
-	}
-
-	uint32_t temp_bo_count = 0;
-	for (uint32_t i = 0; i < bo->range_count; ++i)
-		if (bo->ranges[i].bo)
-			bo->bos[temp_bo_count++] = bo->ranges[i].bo;
-
-	qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
-
-	uint32_t final_bo_count = 1;
-	for (uint32_t i = 1; i < temp_bo_count; ++i)
-		if (bo->bos[i] != bo->bos[i - 1])
-			bo->bos[final_bo_count++] = bo->bos[i];
-
-	bo->bo_count = final_bo_count;
-
-	return VK_SUCCESS;
+   if (bo->bo_capacity < bo->range_count) {
+      uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
+      struct radv_amdgpu_winsys_bo **bos =
+         realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
+      if (!bos)
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      bo->bos = bos;
+      bo->bo_capacity = new_count;
+   }
+
+   uint32_t temp_bo_count = 0;
+   for (uint32_t i = 0; i < bo->range_count; ++i)
+      if (bo->ranges[i].bo)
+         bo->bos[temp_bo_count++] = bo->ranges[i].bo;
+
+   qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
+
+   uint32_t final_bo_count = 1;
+   for (uint32_t i = 1; i < temp_bo_count; ++i)
+      if (bo->bos[i] != bo->bos[i - 1])
+         bo->bos[final_bo_count++] = bo->bos[i];
+
+   bo->bo_count = final_bo_count;
+
+   return VK_SUCCESS;
 }
 
 static VkResult
-radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws,
-				   struct radeon_winsys_bo *_parent,
-                                   uint64_t offset, uint64_t size,
-                                   struct radeon_winsys_bo *_bo, uint64_t bo_offset)
+radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent,
+                                   uint64_t offset, uint64_t size, struct radeon_winsys_bo *_bo,
+                                   uint64_t bo_offset)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
-	struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
-	int range_count_delta, new_idx;
-	int first = 0, last;
-	struct radv_amdgpu_map_range new_first, new_last;
-	VkResult result;
-
-	assert(parent->is_virtual);
-	assert(!bo || !bo->is_virtual);
-
-	/* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
-	if (parent->range_capacity - parent->range_count < 2) {
-		uint32_t range_capacity = parent->range_capacity + 2;
-		struct radv_amdgpu_map_range *ranges =
-			realloc(parent->ranges,
-				range_capacity * sizeof(struct radv_amdgpu_map_range));
-		if (!ranges)
-			return VK_ERROR_OUT_OF_HOST_MEMORY;
-		parent->ranges = ranges;
-		parent->range_capacity = range_capacity;
-	}
-
-	/*
-	 * [first, last] is exactly the range of ranges that either overlap the
-	 * new parent, or are adjacent to it. This corresponds to the bind ranges
-	 * that may change.
-	 */
-	while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
-		++first;
-
-	last = first;
-	while(last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
-		++last;
-
-	/* Whether the first or last range are going to be totally removed or just
-	 * resized/left alone. Note that in the case of first == last, we will split
-	 * this into a part before and after the new range. The remove flag is then
-	 * whether to not create the corresponding split part. */
-	bool remove_first = parent->ranges[first].offset == offset;
-	bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
-	bool unmapped_first = false;
-
-	assert(parent->ranges[first].offset <= offset);
-	assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
-
-	/* Try to merge the new range with the first range. */
-	if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
-		size += offset - parent->ranges[first].offset;
-		offset = parent->ranges[first].offset;
-		bo_offset = parent->ranges[first].bo_offset;
-		remove_first = true;
-	}
-
-	/* Try to merge the new range with the last range. */
-	if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
-		size = parent->ranges[last].offset + parent->ranges[last].size - offset;
-		remove_last = true;
-	}
-
-	range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
-	new_idx = first + !remove_first;
-
-	/* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
-	for (int i = first + 1; i < last; ++i)
-		radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + i);
-
-	/* If the first/last range are not left alone we unmap then and optionally map
-	 * them again after modifications. Not that this implicitly can do the splitting
-	 * if first == last. */
-	new_first = parent->ranges[first];
-	new_last = parent->ranges[last];
-
-	if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
-		radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + first);
-		unmapped_first = true;
-
-		if (!remove_first) {
-			new_first.size = offset - new_first.offset;
-			radv_amdgpu_winsys_virtual_map(ws, parent, &new_first);
-		}
-	}
-
-	if (parent->ranges[last].offset < offset + size || remove_last) {
-		if (first != last || !unmapped_first)
-			radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + last);
-
-		if (!remove_last) {
-			new_last.size -= offset + size - new_last.offset;
-			new_last.bo_offset += (offset + size - new_last.offset);
-			new_last.offset = offset + size;
-			radv_amdgpu_winsys_virtual_map(ws, parent, &new_last);
-		}
-	}
-
-	/* Moves the range list after last to account for the changed number of ranges. */
-	memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
-	        sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
-
-	if (!remove_first)
-		parent->ranges[first] = new_first;
-
-	if (!remove_last)
-		parent->ranges[new_idx + 1] = new_last;
-
-	/* Actually set up the new range. */
-	parent->ranges[new_idx].offset = offset;
-	parent->ranges[new_idx].size = size;
-	parent->ranges[new_idx].bo = bo;
-	parent->ranges[new_idx].bo_offset = bo_offset;
-
-	radv_amdgpu_winsys_virtual_map(ws, parent, parent->ranges + new_idx);
-
-	parent->range_count += range_count_delta;
-
-	result = radv_amdgpu_winsys_rebuild_bo_list(parent);
-	if (result != VK_SUCCESS)
-		return result;
-
-	return VK_SUCCESS;
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
+   struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
+   int range_count_delta, new_idx;
+   int first = 0, last;
+   struct radv_amdgpu_map_range new_first, new_last;
+   VkResult result;
+
+   assert(parent->is_virtual);
+   assert(!bo || !bo->is_virtual);
+
+   /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
+    * contains the newly bound range). */
+   if (parent->range_capacity - parent->range_count < 2) {
+      uint32_t range_capacity = parent->range_capacity + 2;
+      struct radv_amdgpu_map_range *ranges =
+         realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
+      if (!ranges)
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      parent->ranges = ranges;
+      parent->range_capacity = range_capacity;
+   }
+
+   /*
+    * [first, last] is exactly the range of ranges that either overlap the
+    * new parent, or are adjacent to it. This corresponds to the bind ranges
+    * that may change.
+    */
+   while (first + 1 < parent->range_count &&
+          parent->ranges[first].offset + parent->ranges[first].size < offset)
+      ++first;
+
+   last = first;
+   while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
+      ++last;
+
+   /* Whether the first or last range are going to be totally removed or just
+    * resized/left alone. Note that in the case of first == last, we will split
+    * this into a part before and after the new range. The remove flag is then
+    * whether to not create the corresponding split part. */
+   bool remove_first = parent->ranges[first].offset == offset;
+   bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
+   bool unmapped_first = false;
+
+   assert(parent->ranges[first].offset <= offset);
+   assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
+
+   /* Try to merge the new range with the first range. */
+   if (parent->ranges[first].bo == bo &&
+       (!bo ||
+        offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
+      size += offset - parent->ranges[first].offset;
+      offset = parent->ranges[first].offset;
+      bo_offset = parent->ranges[first].bo_offset;
+      remove_first = true;
+   }
+
+   /* Try to merge the new range with the last range. */
+   if (parent->ranges[last].bo == bo &&
+       (!bo ||
+        offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
+      size = parent->ranges[last].offset + parent->ranges[last].size - offset;
+      remove_last = true;
+   }
+
+   range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
+   new_idx = first + !remove_first;
+
+   /* Any range between first and last is going to be entirely covered by the new range so just
+    * unmap them. */
+   for (int i = first + 1; i < last; ++i)
+      radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + i);
+
+   /* If the first/last range are not left alone we unmap then and optionally map
+    * them again after modifications. Not that this implicitly can do the splitting
+    * if first == last. */
+   new_first = parent->ranges[first];
+   new_last = parent->ranges[last];
+
+   if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
+      radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + first);
+      unmapped_first = true;
+
+      if (!remove_first) {
+         new_first.size = offset - new_first.offset;
+         radv_amdgpu_winsys_virtual_map(ws, parent, &new_first);
+      }
+   }
+
+   if (parent->ranges[last].offset < offset + size || remove_last) {
+      if (first != last || !unmapped_first)
+         radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + last);
+
+      if (!remove_last) {
+         new_last.size -= offset + size - new_last.offset;
+         new_last.bo_offset += (offset + size - new_last.offset);
+         new_last.offset = offset + size;
+         radv_amdgpu_winsys_virtual_map(ws, parent, &new_last);
+      }
+   }
+
+   /* Moves the range list after last to account for the changed number of ranges. */
+   memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
+           sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
+
+   if (!remove_first)
+      parent->ranges[first] = new_first;
+
+   if (!remove_last)
+      parent->ranges[new_idx + 1] = new_last;
+
+   /* Actually set up the new range. */
+   parent->ranges[new_idx].offset = offset;
+   parent->ranges[new_idx].size = size;
+   parent->ranges[new_idx].bo = bo;
+   parent->ranges[new_idx].bo_offset = bo_offset;
+
+   radv_amdgpu_winsys_virtual_map(ws, parent, parent->ranges + new_idx);
+
+   parent->range_count += range_count_delta;
+
+   result = radv_amdgpu_winsys_rebuild_bo_list(parent);
+   if (result != VK_SUCCESS)
+      return result;
+
+   return VK_SUCCESS;
 }
 
 struct radv_amdgpu_winsys_bo_log {
-	struct list_head list;
-	uint64_t va;
-	uint64_t size;
-	uint64_t timestamp; /* CPU timestamp */
-	uint8_t is_virtual : 1;
-	uint8_t destroyed : 1;
+   struct list_head list;
+   uint64_t va;
+   uint64_t size;
+   uint64_t timestamp; /* CPU timestamp */
+   uint8_t is_virtual : 1;
+   uint8_t destroyed : 1;
 };
 
-static void radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws,
-			       struct radv_amdgpu_winsys_bo *bo,
-			       bool destroyed)
+static void
+radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, bool destroyed)
 {
-	struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
+   struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
 
-	if (!ws->debug_log_bos)
-		return;
+   if (!ws->debug_log_bos)
+      return;
 
-	bo_log = malloc(sizeof(*bo_log));
-	if (!bo_log)
-		return;
+   bo_log = malloc(sizeof(*bo_log));
+   if (!bo_log)
+      return;
 
-	bo_log->va = bo->base.va;
-	bo_log->size = bo->size;
-	bo_log->timestamp = os_time_get_nano();
-	bo_log->is_virtual = bo->is_virtual;
-	bo_log->destroyed = destroyed;
+   bo_log->va = bo->base.va;
+   bo_log->size = bo->size;
+   bo_log->timestamp = os_time_get_nano();
+   bo_log->is_virtual = bo->is_virtual;
+   bo_log->destroyed = destroyed;
 
-	u_rwlock_wrlock(&ws->log_bo_list_lock);
-	list_addtail(&bo_log->list, &ws->log_bo_list);
-	u_rwlock_wrunlock(&ws->log_bo_list_lock);
+   u_rwlock_wrlock(&ws->log_bo_list_lock);
+   list_addtail(&bo_log->list, &ws->log_bo_list);
+   u_rwlock_wrunlock(&ws->log_bo_list_lock);
 }
 
-static int radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws,
-					  struct radv_amdgpu_winsys_bo *bo)
+static int
+radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
 {
-	u_rwlock_wrlock(&ws->global_bo_list.lock);
-	if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
-		unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
-		void *data = realloc(ws->global_bo_list.bos,
-				     capacity * sizeof(struct radv_amdgpu_winsys_bo *));
-		if (!data) {
-			u_rwlock_wrunlock(&ws->global_bo_list.lock);
-			return VK_ERROR_OUT_OF_HOST_MEMORY;
-		}
-
-		ws->global_bo_list.bos = (struct radv_amdgpu_winsys_bo **)data;
-		ws->global_bo_list.capacity = capacity;
-	}
-
-	ws->global_bo_list.bos[ws->global_bo_list.count++] = bo;
-	bo->base.use_global_list = true;
-	u_rwlock_wrunlock(&ws->global_bo_list.lock);
-	return VK_SUCCESS;
+   u_rwlock_wrlock(&ws->global_bo_list.lock);
+   if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
+      unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
+      void *data =
+         realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *));
+      if (!data) {
+         u_rwlock_wrunlock(&ws->global_bo_list.lock);
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      }
+
+      ws->global_bo_list.bos = (struct radv_amdgpu_winsys_bo **)data;
+      ws->global_bo_list.capacity = capacity;
+   }
+
+   ws->global_bo_list.bos[ws->global_bo_list.count++] = bo;
+   bo->base.use_global_list = true;
+   u_rwlock_wrunlock(&ws->global_bo_list.lock);
+   return VK_SUCCESS;
 }
 
-static void radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws,
-					   struct radv_amdgpu_winsys_bo *bo)
+static void
+radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
 {
-	u_rwlock_wrlock(&ws->global_bo_list.lock);
-	for(unsigned i = ws->global_bo_list.count; i-- > 0;) {
-		if (ws->global_bo_list.bos[i] == bo) {
-			ws->global_bo_list.bos[i] = ws->global_bo_list.bos[ws->global_bo_list.count - 1];
-			--ws->global_bo_list.count;
-			bo->base.use_global_list = false;
-			break;
-		}
-	}
-	u_rwlock_wrunlock(&ws->global_bo_list.lock);
+   u_rwlock_wrlock(&ws->global_bo_list.lock);
+   for (unsigned i = ws->global_bo_list.count; i-- > 0;) {
+      if (ws->global_bo_list.bos[i] == bo) {
+         ws->global_bo_list.bos[i] = ws->global_bo_list.bos[ws->global_bo_list.count - 1];
+         --ws->global_bo_list.count;
+         bo->base.use_global_list = false;
+         break;
+      }
+   }
+   u_rwlock_wrunlock(&ws->global_bo_list.lock);
 }
 
-static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws,
-					  struct radeon_winsys_bo *_bo)
+static void
+radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
-
-	if (p_atomic_dec_return(&bo->ref_count))
-		return;
-
-	radv_amdgpu_log_bo(ws, bo, true);
-
-	if (bo->is_virtual) {
-		for (uint32_t i = 0; i < bo->range_count; ++i) {
-			radv_amdgpu_winsys_virtual_unmap(ws, bo, bo->ranges + i);
-		}
-		free(bo->bos);
-		free(bo->ranges);
-	} else {
-		if (ws->debug_all_bos)
-			radv_amdgpu_global_bo_list_del(ws, bo);
-		radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->size, bo->base.va,
-				     0, 0, AMDGPU_VA_OP_UNMAP);
-		amdgpu_bo_free(bo->bo);
-	}
-
-	if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
-		if (bo->base.vram_no_cpu_access) {
-			p_atomic_add(&ws->allocated_vram,
-				     -align64(bo->size, ws->info.gart_page_size));
-		} else {
-			p_atomic_add(&ws->allocated_vram_vis,
-				     -align64(bo->size, ws->info.gart_page_size));
-		}
-	}
-
-	if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
-		p_atomic_add(&ws->allocated_gtt,
-			     -align64(bo->size, ws->info.gart_page_size));
-
-	amdgpu_va_range_free(bo->va_handle);
-	FREE(bo);
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+
+   if (p_atomic_dec_return(&bo->ref_count))
+      return;
+
+   radv_amdgpu_log_bo(ws, bo, true);
+
+   if (bo->is_virtual) {
+      for (uint32_t i = 0; i < bo->range_count; ++i) {
+         radv_amdgpu_winsys_virtual_unmap(ws, bo, bo->ranges + i);
+      }
+      free(bo->bos);
+      free(bo->ranges);
+   } else {
+      if (ws->debug_all_bos)
+         radv_amdgpu_global_bo_list_del(ws, bo);
+      radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->size, bo->base.va, 0, 0, AMDGPU_VA_OP_UNMAP);
+      amdgpu_bo_free(bo->bo);
+   }
+
+   if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
+      if (bo->base.vram_no_cpu_access) {
+         p_atomic_add(&ws->allocated_vram, -align64(bo->size, ws->info.gart_page_size));
+      } else {
+         p_atomic_add(&ws->allocated_vram_vis, -align64(bo->size, ws->info.gart_page_size));
+      }
+   }
+
+   if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
+      p_atomic_add(&ws->allocated_gtt, -align64(bo->size, ws->info.gart_page_size));
+
+   amdgpu_va_range_free(bo->va_handle);
+   FREE(bo);
 }
 
 static struct radeon_winsys_bo *
-radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
-			     uint64_t size,
-			     unsigned alignment,
-			     enum radeon_bo_domain initial_domain,
-			     enum radeon_bo_flag flags,
-			     unsigned priority)
+radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
+                             enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
+                             unsigned priority)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	struct radv_amdgpu_winsys_bo *bo;
-	struct amdgpu_bo_alloc_request request = {0};
-	struct radv_amdgpu_map_range *ranges = NULL;
-	amdgpu_bo_handle buf_handle;
-	uint64_t va = 0;
-	amdgpu_va_handle va_handle;
-	int r;
-	bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
-	if (!bo) {
-		return NULL;
-	}
-
-	unsigned virt_alignment = alignment;
-	if (size >= ws->info.pte_fragment_size)
-		virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
-
-	r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
-				  size, virt_alignment, 0, &va, &va_handle,
-				  (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
-				   AMDGPU_VA_RANGE_HIGH);
-	if (r)
-		goto error_va_alloc;
-
-	bo->base.va = va;
-	bo->va_handle = va_handle;
-	bo->size = size;
-	bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
-	bo->ref_count = 1;
-
-	if (flags & RADEON_FLAG_VIRTUAL) {
-		ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
-		if (!ranges)
-			goto error_ranges_alloc;
-
-		bo->ranges = ranges;
-		bo->range_count = 1;
-		bo->range_capacity = 1;
-
-		bo->ranges[0].offset = 0;
-		bo->ranges[0].size = size;
-		bo->ranges[0].bo = NULL;
-		bo->ranges[0].bo_offset = 0;
-
-		radv_amdgpu_winsys_virtual_map(ws, bo, bo->ranges);
-		radv_amdgpu_log_bo(ws, bo, false);
-
-		return (struct radeon_winsys_bo *)bo;
-	}
-
-	request.alloc_size = size;
-	request.phys_alignment = alignment;
-
-	if (initial_domain & RADEON_DOMAIN_VRAM) {
-		request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
-
-		/* Since VRAM and GTT have almost the same performance on
-		 * APUs, we could just set GTT. However, in order to decrease
-		 * GTT(RAM) usage, which is shared with the OS, allow VRAM
-		 * placements too. The idea is not to use VRAM usefully, but
-		 * to use it so that it's not unused and wasted.
-		 *
-		 * Furthermore, even on discrete GPUs this is beneficial. If
-		 * both GTT and VRAM are set then AMDGPU still prefers VRAM
-		 * for the initial placement, but it makes the buffers
-		 * spillable. Otherwise AMDGPU tries to place the buffers in
-		 * VRAM really hard to the extent that we are getting a lot
-		 * of unnecessary movement. This helps significantly when
-		 * e.g. Horizon Zero Dawn allocates more memory than we have
-		 * VRAM.
-		 */
-		request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
-	}
-
-	if (initial_domain & RADEON_DOMAIN_GTT)
-		request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
-	if (initial_domain & RADEON_DOMAIN_GDS)
-		request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
-	if (initial_domain & RADEON_DOMAIN_OA)
-		request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
-
-	if (flags & RADEON_FLAG_CPU_ACCESS)
-		request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-	if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
-		bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
-		request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-	}
-	if (flags & RADEON_FLAG_GTT_WC)
-		request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-	if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
-		request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
-	if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
-	    (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
-		bo->base.is_local = true;
-		request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
-	}
-
-	/* this won't do anything on pre 4.9 kernels */
-	if (initial_domain & RADEON_DOMAIN_VRAM) {
-		if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
-			request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
-	}
-
-	r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
-	if (r) {
-		fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
-		fprintf(stderr, "amdgpu:    size      : %"PRIu64" bytes\n", size);
-		fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
-		fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);
-		goto error_bo_alloc;
-	}
-
-	r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0,
-				 AMDGPU_VA_OP_MAP);
-	if (r)
-		goto error_va_map;
-
-	bo->bo = buf_handle;
-	bo->base.initial_domain = initial_domain;
-	bo->base.use_global_list = bo->base.is_local;
-	bo->is_shared = false;
-	bo->priority = priority;
-
-	r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
-	assert(!r);
-
-	if (initial_domain & RADEON_DOMAIN_VRAM) {
-		/* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
-		 * aren't mappable and they are counted as part of the VRAM
-		 * counter.
-		 *
-		 * Otherwise, buffers with the CPU_ACCESS flag or without any
-		 * of both (imported buffers) are counted as part of the VRAM
-		 * visible counter because they can be mapped.
-		 */
-		if (bo->base.vram_no_cpu_access) {
-			p_atomic_add(&ws->allocated_vram,
-				     align64(bo->size, ws->info.gart_page_size));
-		} else {
-			p_atomic_add(&ws->allocated_vram_vis,
-				     align64(bo->size, ws->info.gart_page_size));
-		}
-	}
-
-	if (initial_domain & RADEON_DOMAIN_GTT)
-		p_atomic_add(&ws->allocated_gtt,
-			     align64(bo->size, ws->info.gart_page_size));
-
-	if (ws->debug_all_bos)
-		radv_amdgpu_global_bo_list_add(ws, bo);
-	radv_amdgpu_log_bo(ws, bo, false);
-
-	return (struct radeon_winsys_bo *)bo;
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys_bo *bo;
+   struct amdgpu_bo_alloc_request request = {0};
+   struct radv_amdgpu_map_range *ranges = NULL;
+   amdgpu_bo_handle buf_handle;
+   uint64_t va = 0;
+   amdgpu_va_handle va_handle;
+   int r;
+   bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
+   if (!bo) {
+      return NULL;
+   }
+
+   unsigned virt_alignment = alignment;
+   if (size >= ws->info.pte_fragment_size)
+      virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
+
+   r = amdgpu_va_range_alloc(
+      ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, 0, &va, &va_handle,
+      (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | AMDGPU_VA_RANGE_HIGH);
+   if (r)
+      goto error_va_alloc;
+
+   bo->base.va = va;
+   bo->va_handle = va_handle;
+   bo->size = size;
+   bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
+   bo->ref_count = 1;
+
+   if (flags & RADEON_FLAG_VIRTUAL) {
+      ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
+      if (!ranges)
+         goto error_ranges_alloc;
+
+      bo->ranges = ranges;
+      bo->range_count = 1;
+      bo->range_capacity = 1;
+
+      bo->ranges[0].offset = 0;
+      bo->ranges[0].size = size;
+      bo->ranges[0].bo = NULL;
+      bo->ranges[0].bo_offset = 0;
+
+      radv_amdgpu_winsys_virtual_map(ws, bo, bo->ranges);
+      radv_amdgpu_log_bo(ws, bo, false);
+
+      return (struct radeon_winsys_bo *)bo;
+   }
+
+   request.alloc_size = size;
+   request.phys_alignment = alignment;
+
+   if (initial_domain & RADEON_DOMAIN_VRAM) {
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
+
+      /* Since VRAM and GTT have almost the same performance on
+       * APUs, we could just set GTT. However, in order to decrease
+       * GTT(RAM) usage, which is shared with the OS, allow VRAM
+       * placements too. The idea is not to use VRAM usefully, but
+       * to use it so that it's not unused and wasted.
+       *
+       * Furthermore, even on discrete GPUs this is beneficial. If
+       * both GTT and VRAM are set then AMDGPU still prefers VRAM
+       * for the initial placement, but it makes the buffers
+       * spillable. Otherwise AMDGPU tries to place the buffers in
+       * VRAM really hard to the extent that we are getting a lot
+       * of unnecessary movement. This helps significantly when
+       * e.g. Horizon Zero Dawn allocates more memory than we have
+       * VRAM.
+       */
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+   }
+
+   if (initial_domain & RADEON_DOMAIN_GTT)
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+   if (initial_domain & RADEON_DOMAIN_GDS)
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
+   if (initial_domain & RADEON_DOMAIN_OA)
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
+
+   if (flags & RADEON_FLAG_CPU_ACCESS)
+      request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+   if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
+      bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
+      request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+   }
+   if (flags & RADEON_FLAG_GTT_WC)
+      request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+   if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
+      request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
+   if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+       (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
+      bo->base.is_local = true;
+      request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
+   }
+
+   /* this won't do anything on pre 4.9 kernels */
+   if (initial_domain & RADEON_DOMAIN_VRAM) {
+      if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
+         request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+   }
+
+   r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
+   if (r) {
+      fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
+      fprintf(stderr, "amdgpu:    size      : %" PRIu64 " bytes\n", size);
+      fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
+      fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);
+      goto error_bo_alloc;
+   }
+
+   r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0, AMDGPU_VA_OP_MAP);
+   if (r)
+      goto error_va_map;
+
+   bo->bo = buf_handle;
+   bo->base.initial_domain = initial_domain;
+   bo->base.use_global_list = bo->base.is_local;
+   bo->is_shared = false;
+   bo->priority = priority;
+
+   r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
+   assert(!r);
+
+   if (initial_domain & RADEON_DOMAIN_VRAM) {
+      /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
+       * aren't mappable and they are counted as part of the VRAM
+       * counter.
+       *
+       * Otherwise, buffers with the CPU_ACCESS flag or without any
+       * of both (imported buffers) are counted as part of the VRAM
+       * visible counter because they can be mapped.
+       */
+      if (bo->base.vram_no_cpu_access) {
+         p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
+      } else {
+         p_atomic_add(&ws->allocated_vram_vis, align64(bo->size, ws->info.gart_page_size));
+      }
+   }
+
+   if (initial_domain & RADEON_DOMAIN_GTT)
+      p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
+
+   if (ws->debug_all_bos)
+      radv_amdgpu_global_bo_list_add(ws, bo);
+   radv_amdgpu_log_bo(ws, bo, false);
+
+   return (struct radeon_winsys_bo *)bo;
 error_va_map:
-	amdgpu_bo_free(buf_handle);
+   amdgpu_bo_free(buf_handle);
 
 error_bo_alloc:
-	free(ranges);
+   free(ranges);
 
 error_ranges_alloc:
-	amdgpu_va_range_free(va_handle);
+   amdgpu_va_range_free(va_handle);
 
 error_va_alloc:
-	FREE(bo);
-	return NULL;
+   FREE(bo);
+   return NULL;
 }
 
 static void *
 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
 {
-	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
-	int ret;
-	void *data;
-	ret = amdgpu_bo_cpu_map(bo->bo, &data);
-	if (ret)
-		return NULL;
-	return data;
+   struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+   int ret;
+   void *data;
+   ret = amdgpu_bo_cpu_map(bo->bo, &data);
+   if (ret)
+      return NULL;
+   return data;
 }
 
 static void
 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
 {
-	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
-	amdgpu_bo_cpu_unmap(bo->bo);
+   struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+   amdgpu_bo_cpu_unmap(bo->bo);
 }
 
 static uint64_t
-radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
-				     uint64_t size, unsigned alignment)
+radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size,
+                                     unsigned alignment)
 {
-	uint64_t vm_alignment = alignment;
-
-	/* Increase the VM alignment for faster address translation. */
-	if (size >= ws->info.pte_fragment_size)
-		vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
-
-	/* Gfx9: Increase the VM alignment to the most significant bit set
-	 * in the size for faster address translation.
-	 */
-	if (ws->info.chip_class >= GFX9) {
-		unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
-		uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
-
-		vm_alignment = MAX2(vm_alignment, msb_alignment);
-	}
-	return vm_alignment;
+   uint64_t vm_alignment = alignment;
+
+   /* Increase the VM alignment for faster address translation. */
+   if (size >= ws->info.pte_fragment_size)
+      vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
+
+   /* Gfx9: Increase the VM alignment to the most significant bit set
+    * in the size for faster address translation.
+    */
+   if (ws->info.chip_class >= GFX9) {
+      unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
+      uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
+
+      vm_alignment = MAX2(vm_alignment, msb_alignment);
+   }
+   return vm_alignment;
 }
 
 static struct radeon_winsys_bo *
-radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
-                               void *pointer,
-                               uint64_t size,
-			       unsigned priority)
+radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size,
+                               unsigned priority)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	amdgpu_bo_handle buf_handle;
-	struct radv_amdgpu_winsys_bo *bo;
-	uint64_t va;
-	amdgpu_va_handle va_handle;
-	uint64_t vm_alignment;
-
-	bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
-	if (!bo)
-		return NULL;
-
-	if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
-		goto error;
-
-	/* Using the optimal VM alignment also fixes GPU hangs for buffers that
-	 * are imported.
-	 */
-	vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
-							    ws->info.gart_page_size);
-
-	if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
-	                          size, vm_alignment, 0, &va, &va_handle,
-				  AMDGPU_VA_RANGE_HIGH))
-		goto error_va_alloc;
-
-	if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
-		goto error_va_map;
-
-	/* Initialize it */
-	bo->base.va = va;
-	bo->va_handle = va_handle;
-	bo->size = size;
-	bo->ref_count = 1;
-	bo->bo = buf_handle;
-	bo->base.initial_domain = RADEON_DOMAIN_GTT;
-	bo->base.use_global_list = false;
-	bo->priority = priority;
-
-	ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
-	assert(!r);
-
-	p_atomic_add(&ws->allocated_gtt,
-		     align64(bo->size, ws->info.gart_page_size));
-
-	if (ws->debug_all_bos)
-		radv_amdgpu_global_bo_list_add(ws, bo);
-	radv_amdgpu_log_bo(ws, bo, false);
-
-	return (struct radeon_winsys_bo *)bo;
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   amdgpu_bo_handle buf_handle;
+   struct radv_amdgpu_winsys_bo *bo;
+   uint64_t va;
+   amdgpu_va_handle va_handle;
+   uint64_t vm_alignment;
+
+   bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
+   if (!bo)
+      return NULL;
+
+   if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
+      goto error;
+
+   /* Using the optimal VM alignment also fixes GPU hangs for buffers that
+    * are imported.
+    */
+   vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, ws->info.gart_page_size);
+
+   if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va,
+                             &va_handle, AMDGPU_VA_RANGE_HIGH))
+      goto error_va_alloc;
+
+   if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
+      goto error_va_map;
+
+   /* Initialize it */
+   bo->base.va = va;
+   bo->va_handle = va_handle;
+   bo->size = size;
+   bo->ref_count = 1;
+   bo->bo = buf_handle;
+   bo->base.initial_domain = RADEON_DOMAIN_GTT;
+   bo->base.use_global_list = false;
+   bo->priority = priority;
+
+   ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
+   assert(!r);
+
+   p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
+
+   if (ws->debug_all_bos)
+      radv_amdgpu_global_bo_list_add(ws, bo);
+   radv_amdgpu_log_bo(ws, bo, false);
+
+   return (struct radeon_winsys_bo *)bo;
 
 error_va_map:
-	amdgpu_va_range_free(va_handle);
+   amdgpu_va_range_free(va_handle);
 
 error_va_alloc:
-	amdgpu_bo_free(buf_handle);
+   amdgpu_bo_free(buf_handle);
 
 error:
-	FREE(bo);
-	return NULL;
+   FREE(bo);
+   return NULL;
 }
 
 static struct radeon_winsys_bo *
-radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
-			      int fd, unsigned priority,
-			      uint64_t *alloc_size)
+radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority,
+                              uint64_t *alloc_size)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	struct radv_amdgpu_winsys_bo *bo;
-	uint64_t va;
-	amdgpu_va_handle va_handle;
-	enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
-	struct amdgpu_bo_import_result result = {0};
-	struct amdgpu_bo_info info = {0};
-	enum radeon_bo_domain initial = 0;
-	int r;
-	bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
-	if (!bo)
-		return NULL;
-
-	r = amdgpu_bo_import(ws->dev, type, fd, &result);
-	if (r)
-		goto error;
-
-	r = amdgpu_bo_query_info(result.buf_handle, &info);
-	if (r)
-		goto error_query;
-
-	if (alloc_size) {
-		*alloc_size = info.alloc_size;
-	}
-
-	r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
-				  result.alloc_size, 1 << 20, 0, &va, &va_handle,
-				  AMDGPU_VA_RANGE_HIGH);
-	if (r)
-		goto error_query;
-
-	r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size,
-				 va, 0, 0, AMDGPU_VA_OP_MAP);
-	if (r)
-		goto error_va_map;
-
-	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
-		initial |= RADEON_DOMAIN_VRAM;
-	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
-		initial |= RADEON_DOMAIN_GTT;
-
-	bo->bo = result.buf_handle;
-	bo->base.va = va;
-	bo->va_handle = va_handle;
-	bo->base.initial_domain = initial;
-	bo->base.use_global_list = false;
-	bo->size = result.alloc_size;
-	bo->is_shared = true;
-	bo->priority = priority;
-	bo->ref_count = 1;
-
-	r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
-	assert(!r);
-
-	if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
-		p_atomic_add(&ws->allocated_vram,
-			     align64(bo->size, ws->info.gart_page_size));
-	if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
-		p_atomic_add(&ws->allocated_gtt,
-			     align64(bo->size, ws->info.gart_page_size));
-
-	if (ws->debug_all_bos)
-		radv_amdgpu_global_bo_list_add(ws, bo);
-	radv_amdgpu_log_bo(ws, bo, false);
-
-	return (struct radeon_winsys_bo *)bo;
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys_bo *bo;
+   uint64_t va;
+   amdgpu_va_handle va_handle;
+   enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
+   struct amdgpu_bo_import_result result = {0};
+   struct amdgpu_bo_info info = {0};
+   enum radeon_bo_domain initial = 0;
+   int r;
+   bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
+   if (!bo)
+      return NULL;
+
+   r = amdgpu_bo_import(ws->dev, type, fd, &result);
+   if (r)
+      goto error;
+
+   r = amdgpu_bo_query_info(result.buf_handle, &info);
+   if (r)
+      goto error_query;
+
+   if (alloc_size) {
+      *alloc_size = info.alloc_size;
+   }
+
+   r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0,
+                             &va, &va_handle, AMDGPU_VA_RANGE_HIGH);
+   if (r)
+      goto error_query;
+
+   r =
+      radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP);
+   if (r)
+      goto error_va_map;
+
+   if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
+      initial |= RADEON_DOMAIN_VRAM;
+   if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
+      initial |= RADEON_DOMAIN_GTT;
+
+   bo->bo = result.buf_handle;
+   bo->base.va = va;
+   bo->va_handle = va_handle;
+   bo->base.initial_domain = initial;
+   bo->base.use_global_list = false;
+   bo->size = result.alloc_size;
+   bo->is_shared = true;
+   bo->priority = priority;
+   bo->ref_count = 1;
+
+   r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
+   assert(!r);
+
+   if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
+      p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
+   if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
+      p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
+
+   if (ws->debug_all_bos)
+      radv_amdgpu_global_bo_list_add(ws, bo);
+   radv_amdgpu_log_bo(ws, bo, false);
+
+   return (struct radeon_winsys_bo *)bo;
 error_va_map:
-	amdgpu_va_range_free(va_handle);
+   amdgpu_va_range_free(va_handle);
 
 error_query:
-	amdgpu_bo_free(result.buf_handle);
+   amdgpu_bo_free(result.buf_handle);
 
 error:
-	FREE(bo);
-	return NULL;
+   FREE(bo);
+   return NULL;
 }
 
 static bool
-radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
-			  struct radeon_winsys_bo *_bo,
-			  int *fd)
+radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, int *fd)
 {
-	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
-	enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
-	int r;
-	unsigned handle;
-	r = amdgpu_bo_export(bo->bo, type, &handle);
-	if (r)
-		return false;
-
-	*fd = (int)handle;
-	bo->is_shared = true;
-	return true;
+   struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+   enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
+   int r;
+   unsigned handle;
+   r = amdgpu_bo_export(bo->bo, type, &handle);
+   if (r)
+      return false;
+
+   *fd = (int)handle;
+   bo->is_shared = true;
+   return true;
 }
 
 static bool
-radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd,
-                                 enum radeon_bo_domain *domains,
+radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd, enum radeon_bo_domain *domains,
                                  enum radeon_bo_flag *flags)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	struct amdgpu_bo_import_result result = {0};
-	struct amdgpu_bo_info info = {0};
-	int r;
-
-	*domains = 0;
-	*flags = 0;
-
-	r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
-	if (r)
-		return false;
-
-	r = amdgpu_bo_query_info(result.buf_handle, &info);
-	amdgpu_bo_free(result.buf_handle);
-	if (r)
-		return false;
-
-	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
-		*domains |= RADEON_DOMAIN_VRAM;
-	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
-		*domains |= RADEON_DOMAIN_GTT;
-	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
-		*domains |= RADEON_DOMAIN_GDS;
-	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
-		*domains |= RADEON_DOMAIN_OA;
-
-	if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
-		*flags |= RADEON_FLAG_CPU_ACCESS;
-	if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
-		*flags |= RADEON_FLAG_NO_CPU_ACCESS;
-	if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
-		*flags |= RADEON_FLAG_IMPLICIT_SYNC;
-	if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
-		*flags |= RADEON_FLAG_GTT_WC;
-	if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
-		*flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
-	if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
-		*flags |= RADEON_FLAG_ZERO_VRAM;
-	return true;
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct amdgpu_bo_import_result result = {0};
+   struct amdgpu_bo_info info = {0};
+   int r;
+
+   *domains = 0;
+   *flags = 0;
+
+   r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
+   if (r)
+      return false;
+
+   r = amdgpu_bo_query_info(result.buf_handle, &info);
+   amdgpu_bo_free(result.buf_handle);
+   if (r)
+      return false;
+
+   if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
+      *domains |= RADEON_DOMAIN_VRAM;
+   if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
+      *domains |= RADEON_DOMAIN_GTT;
+   if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
+      *domains |= RADEON_DOMAIN_GDS;
+   if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
+      *domains |= RADEON_DOMAIN_OA;
+
+   if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+      *flags |= RADEON_FLAG_CPU_ACCESS;
+   if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
+      *flags |= RADEON_FLAG_NO_CPU_ACCESS;
+   if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
+      *flags |= RADEON_FLAG_IMPLICIT_SYNC;
+   if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
+      *flags |= RADEON_FLAG_GTT_WC;
+   if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
+      *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
+   if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+      *flags |= RADEON_FLAG_ZERO_VRAM;
+   return true;
 }
 
-static unsigned eg_tile_split(unsigned tile_split)
+static unsigned
+eg_tile_split(unsigned tile_split)
 {
-	switch (tile_split) {
-	case 0:     tile_split = 64;    break;
-	case 1:     tile_split = 128;   break;
-	case 2:     tile_split = 256;   break;
-	case 3:     tile_split = 512;   break;
-	default:
-	case 4:     tile_split = 1024;  break;
-	case 5:     tile_split = 2048;  break;
-	case 6:     tile_split = 4096;  break;
-	}
-	return tile_split;
+   switch (tile_split) {
+   case 0:
+      tile_split = 64;
+      break;
+   case 1:
+      tile_split = 128;
+      break;
+   case 2:
+      tile_split = 256;
+      break;
+   case 3:
+      tile_split = 512;
+      break;
+   default:
+   case 4:
+      tile_split = 1024;
+      break;
+   case 5:
+      tile_split = 2048;
+      break;
+   case 6:
+      tile_split = 4096;
+      break;
+   }
+   return tile_split;
 }
 
-static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
+static unsigned
+radv_eg_tile_split_rev(unsigned eg_tile_split)
 {
-	switch (eg_tile_split) {
-	case 64:    return 0;
-	case 128:   return 1;
-	case 256:   return 2;
-	case 512:   return 3;
-	default:
-	case 1024:  return 4;
-	case 2048:  return 5;
-	case 4096:  return 6;
-	}
+   switch (eg_tile_split) {
+   case 64:
+      return 0;
+   case 128:
+      return 1;
+   case 256:
+      return 2;
+   case 512:
+      return 3;
+   default:
+   case 1024:
+      return 4;
+   case 2048:
+      return 5;
+   case 4096:
+      return 6;
+   }
 }
 
-#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT  45
-#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK   0x3
+#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
+#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK  0x3
 
 static void
-radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws,
-				   struct radeon_winsys_bo *_bo,
-				   struct radeon_bo_metadata *md)
+radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
+                                   struct radeon_bo_metadata *md)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
-	struct amdgpu_bo_metadata metadata = {0};
-	uint64_t tiling_flags = 0;
-
-	if (ws->info.chip_class >= GFX9) {
-		tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
-		tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
-		tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
-		tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
-		tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
-		tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
-		tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
-	} else {
-		if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
-			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
-		else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
-			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
-		else
-			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
-
-		tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
-		tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
-		tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
-		if (md->u.legacy.tile_split)
-			tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
-		tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
-		tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
-
-		if (md->u.legacy.scanout)
-			tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
-		else
-			tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
-	}
-
-	metadata.tiling_info = tiling_flags;
-	metadata.size_metadata = md->size_metadata;
-	memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
-
-	amdgpu_bo_set_metadata(bo->bo, &metadata);
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+   struct amdgpu_bo_metadata metadata = {0};
+   uint64_t tiling_flags = 0;
+
+   if (ws->info.chip_class >= GFX9) {
+      tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
+      tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
+      tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
+      tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
+      tiling_flags |=
+         AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
+      tiling_flags |=
+         AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
+      tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
+   } else {
+      if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+         tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
+      else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
+         tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
+      else
+         tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
+
+      tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
+      tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
+      tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
+      if (md->u.legacy.tile_split)
+         tiling_flags |=
+            AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
+      tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
+      tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks) - 1);
+
+      if (md->u.legacy.scanout)
+         tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
+      else
+         tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
+   }
+
+   metadata.tiling_info = tiling_flags;
+   metadata.size_metadata = md->size_metadata;
+   memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
+
+   amdgpu_bo_set_metadata(bo->bo, &metadata);
 }
 
 static void
-radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws,
-				   struct radeon_winsys_bo *_bo,
+radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
                                    struct radeon_bo_metadata *md)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
-	struct amdgpu_bo_info info = {0};
-
-	int r = amdgpu_bo_query_info(bo->bo, &info);
-	if (r)
-		return;
-
-	uint64_t tiling_flags = info.metadata.tiling_info;
-
-	if (ws->info.chip_class >= GFX9) {
-		md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
-		md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
-	} else {
-		md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
-		md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
-
-		if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4)  /* 2D_TILED_THIN1 */
-			md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
-		else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
-			md->u.legacy.microtile = RADEON_LAYOUT_TILED;
-
-		md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
-		md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
-		md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
-		md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
-		md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
-		md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
-		md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
-	}
-
-	md->size_metadata = info.metadata.size_metadata;
-	memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+   struct amdgpu_bo_info info = {0};
+
+   int r = amdgpu_bo_query_info(bo->bo, &info);
+   if (r)
+      return;
+
+   uint64_t tiling_flags = info.metadata.tiling_info;
+
+   if (ws->info.chip_class >= GFX9) {
+      md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
+      md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
+   } else {
+      md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
+      md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
+
+      if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
+         md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
+      else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
+         md->u.legacy.microtile = RADEON_LAYOUT_TILED;
+
+      md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+      md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+      md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+      md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
+      md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+      md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+      md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
+   }
+
+   md->size_metadata = info.metadata.size_metadata;
+   memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
 }
 
 static VkResult
-radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws,
-				    struct radeon_winsys_bo *_bo,
-				    bool resident)
+radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
+                                    bool resident)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
-	VkResult result = VK_SUCCESS;
-
-	/* Do not add the BO to the global list if it's a local BO because the
-	 * kernel maintains a list for us.
-	 */
-	if (bo->base.is_local)
-		return VK_SUCCESS;
-
-	/* Do not add the BO twice to the global list if the allbos debug
-	 * option is enabled.
-	 */
-	if (ws->debug_all_bos)
-		return VK_SUCCESS;
-
-	if (resident) {
-		result = radv_amdgpu_global_bo_list_add(ws, bo);
-	} else {
-		radv_amdgpu_global_bo_list_del(ws, bo);
-	}
-
-	return result;
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+   VkResult result = VK_SUCCESS;
+
+   /* Do not add the BO to the global list if it's a local BO because the
+    * kernel maintains a list for us.
+    */
+   if (bo->base.is_local)
+      return VK_SUCCESS;
+
+   /* Do not add the BO twice to the global list if the allbos debug
+    * option is enabled.
+    */
+   if (ws->debug_all_bos)
+      return VK_SUCCESS;
+
+   if (resident) {
+      result = radv_amdgpu_global_bo_list_add(ws, bo);
+   } else {
+      radv_amdgpu_global_bo_list_del(ws, bo);
+   }
+
+   return result;
 }
 
-static int radv_amdgpu_bo_va_compare(const void *a, const void *b)
+static int
+radv_amdgpu_bo_va_compare(const void *a, const void *b)
 {
-	const struct radv_amdgpu_winsys_bo *bo_a = *(const struct radv_amdgpu_winsys_bo * const*)a;
-	const struct radv_amdgpu_winsys_bo *bo_b = *(const struct radv_amdgpu_winsys_bo * const*)b;
-	return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
+   const struct radv_amdgpu_winsys_bo *bo_a = *(const struct radv_amdgpu_winsys_bo *const *)a;
+   const struct radv_amdgpu_winsys_bo *bo_b = *(const struct radv_amdgpu_winsys_bo *const *)b;
+   return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
 }
 
-static void radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
+static void
+radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	struct radv_amdgpu_winsys_bo_log *bo_log;
-
-	if (!ws->debug_log_bos)
-		return;
-
-	u_rwlock_rdlock(&ws->log_bo_list_lock);
-	LIST_FOR_EACH_ENTRY(bo_log, &ws->log_bo_list, list) {
-		fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n",
-			(long long)bo_log->timestamp, (long long)bo_log->va,
-			(long long)(bo_log->va + bo_log->size),
-			bo_log->destroyed, bo_log->is_virtual);
-	}
-	u_rwlock_rdunlock(&ws->log_bo_list_lock);
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys_bo_log *bo_log;
+
+   if (!ws->debug_log_bos)
+      return;
+
+   u_rwlock_rdlock(&ws->log_bo_list_lock);
+   LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) {
+      fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n",
+              (long long)bo_log->timestamp, (long long)bo_log->va,
+              (long long)(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual);
+   }
+   u_rwlock_rdunlock(&ws->log_bo_list_lock);
 }
 
-static void radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
+static void
+radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	if (ws->debug_all_bos) {
-		struct radv_amdgpu_winsys_bo **bos = NULL;
-		int i = 0;
-
-		u_rwlock_rdlock(&ws->global_bo_list.lock);
-		bos = malloc(sizeof(*bos) * ws->global_bo_list.count);
-		if (!bos) {
-			u_rwlock_rdunlock(&ws->global_bo_list.lock);
-			fprintf(file, "  Failed to allocate memory to sort VA ranges for dumping\n");
-			return;
-		}
-
-		for (i = 0; i < ws->global_bo_list.count; i++) {
-			bos[i] = ws->global_bo_list.bos[i];
-		}
-		qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
-
-		for (i = 0; i < ws->global_bo_list.count; ++i) {
-			fprintf(file, "  VA=%.16llx-%.16llx, handle=%d%s\n",
-			        (long long)bos[i]->base.va, (long long)(bos[i]->base.va + bos[i]->size),
-				bos[i]->bo_handle, bos[i]->is_virtual ? " sparse" : "");
-		}
-		free(bos);
-		u_rwlock_rdunlock(&ws->global_bo_list.lock);
-	} else
-		fprintf(file, "  To get BO VA ranges, please specify RADV_DEBUG=allbos\n");
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   if (ws->debug_all_bos) {
+      struct radv_amdgpu_winsys_bo **bos = NULL;
+      int i = 0;
+
+      u_rwlock_rdlock(&ws->global_bo_list.lock);
+      bos = malloc(sizeof(*bos) * ws->global_bo_list.count);
+      if (!bos) {
+         u_rwlock_rdunlock(&ws->global_bo_list.lock);
+         fprintf(file, "  Failed to allocate memory to sort VA ranges for dumping\n");
+         return;
+      }
+
+      for (i = 0; i < ws->global_bo_list.count; i++) {
+         bos[i] = ws->global_bo_list.bos[i];
+      }
+      qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
+
+      for (i = 0; i < ws->global_bo_list.count; ++i) {
+         fprintf(file, "  VA=%.16llx-%.16llx, handle=%d%s\n", (long long)bos[i]->base.va,
+                 (long long)(bos[i]->base.va + bos[i]->size), bos[i]->bo_handle,
+                 bos[i]->is_virtual ? " sparse" : "");
+      }
+      free(bos);
+      u_rwlock_rdunlock(&ws->global_bo_list.lock);
+   } else
+      fprintf(file, "  To get BO VA ranges, please specify RADV_DEBUG=allbos\n");
 }
-void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
+void
+radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
 {
-	ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
-	ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
-	ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
-	ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
-	ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
-	ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
-	ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
-	ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
-	ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
-	ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
-	ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
-	ws->base.buffer_make_resident = radv_amdgpu_winsys_bo_make_resident;
-	ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
-	ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
+   ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
+   ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
+   ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
+   ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
+   ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
+   ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
+   ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
+   ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
+   ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
+   ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
+   ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
+   ws->base.buffer_make_resident = radv_amdgpu_winsys_bo_make_resident;
+   ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
+   ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
 }
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
index 6284484261d..0beaa7ef727 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
@@ -31,46 +31,45 @@
 
 #include "radv_amdgpu_winsys.h"
 
-
 struct radv_amdgpu_map_range {
-	uint64_t offset;
-	uint64_t size;
-	struct radv_amdgpu_winsys_bo *bo;
-	uint64_t bo_offset;
+   uint64_t offset;
+   uint64_t size;
+   struct radv_amdgpu_winsys_bo *bo;
+   uint64_t bo_offset;
 };
 
 struct radv_amdgpu_winsys_bo {
-	struct radeon_winsys_bo base;
-	amdgpu_va_handle va_handle;
-	uint64_t size;
-	bool is_virtual;
-	uint8_t priority;
-	int ref_count;
+   struct radeon_winsys_bo base;
+   amdgpu_va_handle va_handle;
+   uint64_t size;
+   bool is_virtual;
+   uint8_t priority;
+   int ref_count;
 
-	union {
-		/* physical bo */
-		struct {
-			amdgpu_bo_handle bo;
-			bool is_shared;
-			uint32_t bo_handle;
-		};
-		/* virtual bo */
-		struct {
-			struct radv_amdgpu_map_range *ranges;
-			uint32_t range_count;
-			uint32_t range_capacity;
+   union {
+      /* physical bo */
+      struct {
+         amdgpu_bo_handle bo;
+         bool is_shared;
+         uint32_t bo_handle;
+      };
+      /* virtual bo */
+      struct {
+         struct radv_amdgpu_map_range *ranges;
+         uint32_t range_count;
+         uint32_t range_capacity;
 
-			struct radv_amdgpu_winsys_bo **bos;
-			uint32_t bo_count;
-			uint32_t bo_capacity;
-		};
-	};
+         struct radv_amdgpu_winsys_bo **bos;
+         uint32_t bo_count;
+         uint32_t bo_capacity;
+      };
+   };
 };
 
-static inline
-struct radv_amdgpu_winsys_bo *radv_amdgpu_winsys_bo(struct radeon_winsys_bo *bo)
+static inline struct radv_amdgpu_winsys_bo *
+radv_amdgpu_winsys_bo(struct radeon_winsys_bo *bo)
 {
-	return (struct radv_amdgpu_winsys_bo *)bo;
+   return (struct radv_amdgpu_winsys_bo *)bo;
 }
 
 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws);
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 5ecbe777a59..7347f04c8b2 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -22,1255 +22,1220 @@
  * IN THE SOFTWARE.
  */
 
-#include <stdlib.h>
 #include <amdgpu.h>
-#include "drm-uapi/amdgpu_drm.h"
 #include <assert.h>
-#include <pthread.h>
 #include <errno.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include "drm-uapi/amdgpu_drm.h"
 
 #include "util/u_memory.h"
 #include "ac_debug.h"
-#include "radv_radeon_winsys.h"
-#include "radv_amdgpu_cs.h"
 #include "radv_amdgpu_bo.h"
+#include "radv_amdgpu_cs.h"
+#include "radv_radeon_winsys.h"
 #include "sid.h"
 
-
-enum {
-	VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024
-};
+enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 };
 
 struct radv_amdgpu_cs {
-	struct radeon_cmdbuf base;
-	struct radv_amdgpu_winsys *ws;
-
-	struct amdgpu_cs_ib_info    ib;
-
-	struct radeon_winsys_bo     *ib_buffer;
-	uint8_t                 *ib_mapped;
-	unsigned                    max_num_buffers;
-	unsigned                    num_buffers;
-	struct drm_amdgpu_bo_list_entry *handles;
-
-	struct radeon_winsys_bo     **old_ib_buffers;
-	unsigned                    num_old_ib_buffers;
-	unsigned                    max_num_old_ib_buffers;
-	unsigned                    *ib_size_ptr;
-	VkResult                    status;
-	bool                        is_chained;
-
-	int                         buffer_hash_table[1024];
-	unsigned                    hw_ip;
-
-	unsigned                    num_virtual_buffers;
-	unsigned                    max_num_virtual_buffers;
-	struct radeon_winsys_bo     **virtual_buffers;
-	int                         *virtual_buffer_hash_table;
-
-	/* For chips that don't support chaining. */
-	struct radeon_cmdbuf     *old_cs_buffers;
-	unsigned                    num_old_cs_buffers;
+   struct radeon_cmdbuf base;
+   struct radv_amdgpu_winsys *ws;
+
+   struct amdgpu_cs_ib_info ib;
+
+   struct radeon_winsys_bo *ib_buffer;
+   uint8_t *ib_mapped;
+   unsigned max_num_buffers;
+   unsigned num_buffers;
+   struct drm_amdgpu_bo_list_entry *handles;
+
+   struct radeon_winsys_bo **old_ib_buffers;
+   unsigned num_old_ib_buffers;
+   unsigned max_num_old_ib_buffers;
+   unsigned *ib_size_ptr;
+   VkResult status;
+   bool is_chained;
+
+   int buffer_hash_table[1024];
+   unsigned hw_ip;
+
+   unsigned num_virtual_buffers;
+   unsigned max_num_virtual_buffers;
+   struct radeon_winsys_bo **virtual_buffers;
+   int *virtual_buffer_hash_table;
+
+   /* For chips that don't support chaining. */
+   struct radeon_cmdbuf *old_cs_buffers;
+   unsigned num_old_cs_buffers;
 };
 
 static inline struct radv_amdgpu_cs *
 radv_amdgpu_cs(struct radeon_cmdbuf *base)
 {
-	return (struct radv_amdgpu_cs*)base;
+   return (struct radv_amdgpu_cs *)base;
 }
 
-static int ring_to_hw_ip(enum ring_type ring)
+static int
+ring_to_hw_ip(enum ring_type ring)
 {
-	switch (ring) {
-	case RING_GFX:
-		return AMDGPU_HW_IP_GFX;
-	case RING_DMA:
-		return AMDGPU_HW_IP_DMA;
-	case RING_COMPUTE:
-		return AMDGPU_HW_IP_COMPUTE;
-	default:
-		unreachable("unsupported ring");
-	}
+   switch (ring) {
+   case RING_GFX:
+      return AMDGPU_HW_IP_GFX;
+   case RING_DMA:
+      return AMDGPU_HW_IP_DMA;
+   case RING_COMPUTE:
+      return AMDGPU_HW_IP_COMPUTE;
+   default:
+      unreachable("unsupported ring");
+   }
 }
 
 struct radv_amdgpu_cs_request {
-	/** Specify HW IP block type to which to send the IB. */
-	unsigned ip_type;
-
-	/** IP instance index if there are several IPs of the same type. */
-	unsigned ip_instance;
-
-	/**
-	 * Specify ring index of the IP. We could have several rings
-	 * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
-	 */
-	uint32_t ring;
-
-	/**
-	 * BO list handles used by this request.
-	 */
-	struct drm_amdgpu_bo_list_entry *handles;
-	uint32_t num_handles;
-
-	/** Number of IBs to submit in the field ibs. */
-	uint32_t number_of_ibs;
-
-	/**
-	 * IBs to submit. Those IBs will be submit together as single entity
-	 */
-	struct amdgpu_cs_ib_info *ibs;
-
-	/**
-	 * The returned sequence number for the command submission
-	 */
-	uint64_t seq_no;
+   /** Specify HW IP block type to which to send the IB. */
+   unsigned ip_type;
+
+   /** IP instance index if there are several IPs of the same type. */
+   unsigned ip_instance;
+
+   /**
+    * Specify ring index of the IP. We could have several rings
+    * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
+    */
+   uint32_t ring;
+
+   /**
+    * BO list handles used by this request.
+    */
+   struct drm_amdgpu_bo_list_entry *handles;
+   uint32_t num_handles;
+
+   /** Number of IBs to submit in the field ibs. */
+   uint32_t number_of_ibs;
+
+   /**
+    * IBs to submit. Those IBs will be submit together as single entity
+    */
+   struct amdgpu_cs_ib_info *ibs;
+
+   /**
+    * The returned sequence number for the command submission
+    */
+   uint64_t seq_no;
 };
 
-
 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
-				 struct radv_amdgpu_cs_request *request,
-				 struct radv_winsys_sem_info *sem_info);
+                                 struct radv_amdgpu_cs_request *request,
+                                 struct radv_winsys_sem_info *sem_info);
 
-static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
-					 struct radv_amdgpu_fence *fence,
-					 struct radv_amdgpu_cs_request *req)
+static void
+radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_fence *fence,
+                             struct radv_amdgpu_cs_request *req)
 {
-	fence->fence.context = ctx->ctx;
-	fence->fence.ip_type = req->ip_type;
-	fence->fence.ip_instance = req->ip_instance;
-	fence->fence.ring = req->ring;
-	fence->fence.fence = req->seq_no;
-	fence->user_ptr = (volatile uint64_t*)(ctx->fence_map + req->ip_type * MAX_RINGS_PER_TYPE + req->ring);
+   fence->fence.context = ctx->ctx;
+   fence->fence.ip_type = req->ip_type;
+   fence->fence.ip_instance = req->ip_instance;
+   fence->fence.ring = req->ring;
+   fence->fence.fence = req->seq_no;
+   fence->user_ptr =
+      (volatile uint64_t *)(ctx->fence_map + req->ip_type * MAX_RINGS_PER_TYPE + req->ring);
 }
 
-static void radv_amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
+static void
+radv_amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
 {
-	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
-
-	if (cs->ib_buffer)
-		cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffer);
-	else
-		free(cs->base.buf);
-
-	for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
-		cs->ws->base.buffer_destroy(&cs->ws->base, cs->old_ib_buffers[i]);
-
-	for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
-		free(cs->old_cs_buffers[i].buf);
-	}
-
-	free(cs->old_cs_buffers);
-	free(cs->old_ib_buffers);
-	free(cs->virtual_buffers);
-	free(cs->virtual_buffer_hash_table);
-	free(cs->handles);
-	free(cs);
+   struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
+
+   if (cs->ib_buffer)
+      cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffer);
+   else
+      free(cs->base.buf);
+
+   for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
+      cs->ws->base.buffer_destroy(&cs->ws->base, cs->old_ib_buffers[i]);
+
+   for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
+      free(cs->old_cs_buffers[i].buf);
+   }
+
+   free(cs->old_cs_buffers);
+   free(cs->old_ib_buffers);
+   free(cs->virtual_buffers);
+   free(cs->virtual_buffer_hash_table);
+   free(cs->handles);
+   free(cs);
 }
 
-static void radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
-				enum ring_type ring_type)
+static void
+radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs, enum ring_type ring_type)
 {
-	for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
-		cs->buffer_hash_table[i] = -1;
+   for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
+      cs->buffer_hash_table[i] = -1;
 
-	cs->hw_ip = ring_to_hw_ip(ring_type);
+   cs->hw_ip = ring_to_hw_ip(ring_type);
 }
 
 static struct radeon_cmdbuf *
-radv_amdgpu_cs_create(struct radeon_winsys *ws,
-		      enum ring_type ring_type)
+radv_amdgpu_cs_create(struct radeon_winsys *ws, enum ring_type ring_type)
 {
-	struct radv_amdgpu_cs *cs;
-	uint32_t ib_size = 20 * 1024 * 4;
-	cs = calloc(1, sizeof(struct radv_amdgpu_cs));
-	if (!cs)
-		return NULL;
-
-	cs->ws = radv_amdgpu_winsys(ws);
-	radv_amdgpu_init_cs(cs, ring_type);
-
-	if (cs->ws->use_ib_bos) {
-		cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
-						  cs->ws->cs_bo_domain,
-						  RADEON_FLAG_CPU_ACCESS |
-						  RADEON_FLAG_NO_INTERPROCESS_SHARING |
-						  RADEON_FLAG_READ_ONLY |
-						  RADEON_FLAG_GTT_WC,
-						  RADV_BO_PRIORITY_CS);
-		if (!cs->ib_buffer) {
-			free(cs);
-			return NULL;
-		}
-
-		cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
-		if (!cs->ib_mapped) {
-			ws->buffer_destroy(ws, cs->ib_buffer);
-			free(cs);
-			return NULL;
-		}
-
-		cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
-		cs->base.buf = (uint32_t *)cs->ib_mapped;
-		cs->base.max_dw = ib_size / 4 - 4;
-		cs->ib_size_ptr = &cs->ib.size;
-		cs->ib.size = 0;
-
-		ws->cs_add_buffer(&cs->base, cs->ib_buffer);
-	} else {
-		uint32_t *buf = malloc(16384);
-		if (!buf) {
-			free(cs);
-			return NULL;
-		}
-		cs->base.buf = buf;
-		cs->base.max_dw = 4096;
-	}
-
-	return &cs->base;
+   struct radv_amdgpu_cs *cs;
+   uint32_t ib_size = 20 * 1024 * 4;
+   cs = calloc(1, sizeof(struct radv_amdgpu_cs));
+   if (!cs)
+      return NULL;
+
+   cs->ws = radv_amdgpu_winsys(ws);
+   radv_amdgpu_init_cs(cs, ring_type);
+
+   if (cs->ws->use_ib_bos) {
+      cs->ib_buffer =
+         ws->buffer_create(ws, ib_size, 0, cs->ws->cs_bo_domain,
+                           RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
+                              RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
+                           RADV_BO_PRIORITY_CS);
+      if (!cs->ib_buffer) {
+         free(cs);
+         return NULL;
+      }
+
+      cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
+      if (!cs->ib_mapped) {
+         ws->buffer_destroy(ws, cs->ib_buffer);
+         free(cs);
+         return NULL;
+      }
+
+      cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
+      cs->base.buf = (uint32_t *)cs->ib_mapped;
+      cs->base.max_dw = ib_size / 4 - 4;
+      cs->ib_size_ptr = &cs->ib.size;
+      cs->ib.size = 0;
+
+      ws->cs_add_buffer(&cs->base, cs->ib_buffer);
+   } else {
+      uint32_t *buf = malloc(16384);
+      if (!buf) {
+         free(cs);
+         return NULL;
+      }
+      cs->base.buf = buf;
+      cs->base.max_dw = 4096;
+   }
+
+   return &cs->base;
 }
 
-static void radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size)
+static void
+radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size)
 {
-	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
-
-	if (cs->status != VK_SUCCESS) {
-		cs->base.cdw = 0;
-		return;
-	}
-
-	if (!cs->ws->use_ib_bos) {
-		const uint64_t limit_dws = 0xffff8;
-		uint64_t ib_dws = MAX2(cs->base.cdw + min_size,
-				       MIN2(cs->base.max_dw * 2, limit_dws));
-
-		/* The total ib size cannot exceed limit_dws dwords. */
-		if (ib_dws > limit_dws)
-		{
-			/* The maximum size in dwords has been reached,
-			 * try to allocate a new one.
-			 */
-			struct radeon_cmdbuf *old_cs_buffers =
-				realloc(cs->old_cs_buffers,
-					(cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers));
-			if (!old_cs_buffers) {
-				cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
-				cs->base.cdw = 0;
-				return;
-			}
-			cs->old_cs_buffers = old_cs_buffers;
-
-			/* Store the current one for submitting it later. */
-			cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw;
-			cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw;
-			cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf;
-			cs->num_old_cs_buffers++;
-
-			/* Reset the cs, it will be re-allocated below. */
-			cs->base.cdw = 0;
-			cs->base.buf = NULL;
-
-			/* Re-compute the number of dwords to allocate. */
-			ib_dws = MAX2(cs->base.cdw + min_size,
-				      MIN2(cs->base.max_dw * 2, limit_dws));
-			if (ib_dws > limit_dws) {
-				fprintf(stderr, "amdgpu: Too high number of "
-						"dwords to allocate\n");
-				cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
-				return;
-			}
-		}
-
-		uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
-		if (new_buf) {
-			cs->base.buf = new_buf;
-			cs->base.max_dw = ib_dws;
-		} else {
-			cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
-			cs->base.cdw = 0;
-		}
-		return;
-	}
-
-	uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
-
-	/* max that fits in the chain size field. */
-	ib_size = MIN2(ib_size, 0xfffff);
-
-	while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
-		radeon_emit(&cs->base, PKT3_NOP_PAD);
-
-	*cs->ib_size_ptr |= cs->base.cdw + 4;
-
-	if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
-		unsigned max_num_old_ib_buffers =
-			MAX2(1, cs->max_num_old_ib_buffers * 2);
-		struct radeon_winsys_bo **old_ib_buffers =
-			realloc(cs->old_ib_buffers,
-				max_num_old_ib_buffers * sizeof(void*));
-		if (!old_ib_buffers) {
-			cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
-			return;
-		}
-		cs->max_num_old_ib_buffers = max_num_old_ib_buffers;
-		cs->old_ib_buffers = old_ib_buffers;
-	}
-
-	cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
-
-	cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0,
-						   cs->ws->cs_bo_domain,
-						   RADEON_FLAG_CPU_ACCESS |
-						   RADEON_FLAG_NO_INTERPROCESS_SHARING |
-						   RADEON_FLAG_READ_ONLY |
-						   RADEON_FLAG_GTT_WC,
-						   RADV_BO_PRIORITY_CS);
-
-	if (!cs->ib_buffer) {
-		cs->base.cdw = 0;
-		cs->status = VK_ERROR_OUT_OF_DEVICE_MEMORY;
-		cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
-	}
-
-	cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
-	if (!cs->ib_mapped) {
-		cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffer);
-		cs->base.cdw = 0;
-
-		/* VK_ERROR_MEMORY_MAP_FAILED is not valid for vkEndCommandBuffer. */
-		cs->status = VK_ERROR_OUT_OF_DEVICE_MEMORY;
-		cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
-	}
-
-	cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer);
-
-	radeon_emit(&cs->base, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
-	radeon_emit(&cs->base, radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va);
-	radeon_emit(&cs->base, radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va >> 32);
-	radeon_emit(&cs->base, S_3F2_CHAIN(1) | S_3F2_VALID(1));
-
-	cs->ib_size_ptr = cs->base.buf + cs->base.cdw - 1;
-
-	cs->base.buf = (uint32_t *)cs->ib_mapped;
-	cs->base.cdw = 0;
-	cs->base.max_dw = ib_size / 4 - 4;
-
+   struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+
+   if (cs->status != VK_SUCCESS) {
+      cs->base.cdw = 0;
+      return;
+   }
+
+   if (!cs->ws->use_ib_bos) {
+      const uint64_t limit_dws = 0xffff8;
+      uint64_t ib_dws = MAX2(cs->base.cdw + min_size, MIN2(cs->base.max_dw * 2, limit_dws));
+
+      /* The total ib size cannot exceed limit_dws dwords. */
+      if (ib_dws > limit_dws) {
+         /* The maximum size in dwords has been reached,
+          * try to allocate a new one.
+          */
+         struct radeon_cmdbuf *old_cs_buffers =
+            realloc(cs->old_cs_buffers, (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers));
+         if (!old_cs_buffers) {
+            cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+            cs->base.cdw = 0;
+            return;
+         }
+         cs->old_cs_buffers = old_cs_buffers;
+
+         /* Store the current one for submitting it later. */
+         cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw;
+         cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw;
+         cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf;
+         cs->num_old_cs_buffers++;
+
+         /* Reset the cs, it will be re-allocated below. */
+         cs->base.cdw = 0;
+         cs->base.buf = NULL;
+
+         /* Re-compute the number of dwords to allocate. */
+         ib_dws = MAX2(cs->base.cdw + min_size, MIN2(cs->base.max_dw * 2, limit_dws));
+         if (ib_dws > limit_dws) {
+            fprintf(stderr, "amdgpu: Too high number of "
+                            "dwords to allocate\n");
+            cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+            return;
+         }
+      }
+
+      uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
+      if (new_buf) {
+         cs->base.buf = new_buf;
+         cs->base.max_dw = ib_dws;
+      } else {
+         cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+         cs->base.cdw = 0;
+      }
+      return;
+   }
+
+   uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
+
+   /* max that fits in the chain size field. */
+   ib_size = MIN2(ib_size, 0xfffff);
+
+   while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
+      radeon_emit(&cs->base, PKT3_NOP_PAD);
+
+   *cs->ib_size_ptr |= cs->base.cdw + 4;
+
+   if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
+      unsigned max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2);
+      struct radeon_winsys_bo **old_ib_buffers =
+         realloc(cs->old_ib_buffers, max_num_old_ib_buffers * sizeof(void *));
+      if (!old_ib_buffers) {
+         cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+         return;
+      }
+      cs->max_num_old_ib_buffers = max_num_old_ib_buffers;
+      cs->old_ib_buffers = old_ib_buffers;
+   }
+
+   cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
+
+   cs->ib_buffer =
+      cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0, cs->ws->cs_bo_domain,
+                                 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
+                                    RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
+                                 RADV_BO_PRIORITY_CS);
+
+   if (!cs->ib_buffer) {
+      cs->base.cdw = 0;
+      cs->status = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+      cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
+   }
+
+   cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
+   if (!cs->ib_mapped) {
+      cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffer);
+      cs->base.cdw = 0;
+
+      /* VK_ERROR_MEMORY_MAP_FAILED is not valid for vkEndCommandBuffer. */
+      cs->status = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+      cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
+   }
+
+   cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer);
+
+   radeon_emit(&cs->base, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+   radeon_emit(&cs->base, radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va);
+   radeon_emit(&cs->base, radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va >> 32);
+   radeon_emit(&cs->base, S_3F2_CHAIN(1) | S_3F2_VALID(1));
+
+   cs->ib_size_ptr = cs->base.buf + cs->base.cdw - 1;
+
+   cs->base.buf = (uint32_t *)cs->ib_mapped;
+   cs->base.cdw = 0;
+   cs->base.max_dw = ib_size / 4 - 4;
 }
 
-static VkResult radv_amdgpu_cs_finalize(struct radeon_cmdbuf *_cs)
+static VkResult
+radv_amdgpu_cs_finalize(struct radeon_cmdbuf *_cs)
 {
-	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+   struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
 
-	if (cs->ws->use_ib_bos) {
-		while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
-			radeon_emit(&cs->base, PKT3_NOP_PAD);
+   if (cs->ws->use_ib_bos) {
+      while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
+         radeon_emit(&cs->base, PKT3_NOP_PAD);
 
-		*cs->ib_size_ptr |= cs->base.cdw;
+      *cs->ib_size_ptr |= cs->base.cdw;
 
-		cs->is_chained = false;
-	}
+      cs->is_chained = false;
+   }
 
-	return cs->status;
+   return cs->status;
 }
 
-static void radv_amdgpu_cs_reset(struct radeon_cmdbuf *_cs)
+static void
+radv_amdgpu_cs_reset(struct radeon_cmdbuf *_cs)
 {
-	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
-	cs->base.cdw = 0;
-	cs->status = VK_SUCCESS;
-
-	for (unsigned i = 0; i < cs->num_buffers; ++i) {
-		unsigned hash = cs->handles[i].bo_handle &
-		                (ARRAY_SIZE(cs->buffer_hash_table) - 1);
-		cs->buffer_hash_table[hash] = -1;
-	}
-
-	for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
-		unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
-		cs->virtual_buffer_hash_table[hash] = -1;
-	}
-
-	cs->num_buffers = 0;
-	cs->num_virtual_buffers = 0;
-
-	if (cs->ws->use_ib_bos) {
-		cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer);
-
-		for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
-			cs->ws->base.buffer_destroy(&cs->ws->base, cs->old_ib_buffers[i]);
-
-		cs->num_old_ib_buffers = 0;
-		cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
-		cs->ib_size_ptr = &cs->ib.size;
-		cs->ib.size = 0;
-	} else {
-		for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
-			struct radeon_cmdbuf *rcs = &cs->old_cs_buffers[i];
-			free(rcs->buf);
-		}
-
-		free(cs->old_cs_buffers);
-		cs->old_cs_buffers = NULL;
-		cs->num_old_cs_buffers = 0;
-	}
+   struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+   cs->base.cdw = 0;
+   cs->status = VK_SUCCESS;
+
+   for (unsigned i = 0; i < cs->num_buffers; ++i) {
+      unsigned hash = cs->handles[i].bo_handle & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
+      cs->buffer_hash_table[hash] = -1;
+   }
+
+   for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
+      unsigned hash =
+         ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
+      cs->virtual_buffer_hash_table[hash] = -1;
+   }
+
+   cs->num_buffers = 0;
+   cs->num_virtual_buffers = 0;
+
+   if (cs->ws->use_ib_bos) {
+      cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer);
+
+      for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
+         cs->ws->base.buffer_destroy(&cs->ws->base, cs->old_ib_buffers[i]);
+
+      cs->num_old_ib_buffers = 0;
+      cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
+      cs->ib_size_ptr = &cs->ib.size;
+      cs->ib.size = 0;
+   } else {
+      for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
+         struct radeon_cmdbuf *rcs = &cs->old_cs_buffers[i];
+         free(rcs->buf);
+      }
+
+      free(cs->old_cs_buffers);
+      cs->old_cs_buffers = NULL;
+      cs->num_old_cs_buffers = 0;
+   }
 }
 
-static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs,
-				      uint32_t bo)
+static int
+radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs, uint32_t bo)
 {
-	unsigned hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
-	int index = cs->buffer_hash_table[hash];
+   unsigned hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
+   int index = cs->buffer_hash_table[hash];
 
-	if (index == -1)
-		return -1;
+   if (index == -1)
+      return -1;
 
-	if (cs->handles[index].bo_handle == bo)
-		return index;
+   if (cs->handles[index].bo_handle == bo)
+      return index;
 
-	for (unsigned i = 0; i < cs->num_buffers; ++i) {
-		if (cs->handles[i].bo_handle == bo) {
-			cs->buffer_hash_table[hash] = i;
-			return i;
-		}
-	}
+   for (unsigned i = 0; i < cs->num_buffers; ++i) {
+      if (cs->handles[i].bo_handle == bo) {
+         cs->buffer_hash_table[hash] = i;
+         return i;
+      }
+   }
 
-	return -1;
+   return -1;
 }
 
-static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
-					       uint32_t bo, uint8_t priority)
+static void
+radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs, uint32_t bo, uint8_t priority)
 {
-	unsigned hash;
-	int index = radv_amdgpu_cs_find_buffer(cs, bo);
-
-	if (index != -1)
-		return;
-
-	if (cs->num_buffers == cs->max_num_buffers) {
-		unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
-		struct drm_amdgpu_bo_list_entry *new_entries =
-			realloc(cs->handles, new_count * sizeof(struct drm_amdgpu_bo_list_entry));
-		if (new_entries) {
-			cs->max_num_buffers = new_count;
-			cs->handles = new_entries;
-		} else {
-			cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
-			return;
-		}
-	}
-
-	cs->handles[cs->num_buffers].bo_handle = bo;
-	cs->handles[cs->num_buffers].bo_priority = priority;
-
-	hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
-	cs->buffer_hash_table[hash] = cs->num_buffers;
-
-	++cs->num_buffers;
+   unsigned hash;
+   int index = radv_amdgpu_cs_find_buffer(cs, bo);
+
+   if (index != -1)
+      return;
+
+   if (cs->num_buffers == cs->max_num_buffers) {
+      unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
+      struct drm_amdgpu_bo_list_entry *new_entries =
+         realloc(cs->handles, new_count * sizeof(struct drm_amdgpu_bo_list_entry));
+      if (new_entries) {
+         cs->max_num_buffers = new_count;
+         cs->handles = new_entries;
+      } else {
+         cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+         return;
+      }
+   }
+
+   cs->handles[cs->num_buffers].bo_handle = bo;
+   cs->handles[cs->num_buffers].bo_priority = priority;
+
+   hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
+   cs->buffer_hash_table[hash] = cs->num_buffers;
+
+   ++cs->num_buffers;
 }
 
-static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_cmdbuf *_cs,
-                                              struct radeon_winsys_bo *bo)
+static void
+radv_amdgpu_cs_add_virtual_buffer(struct radeon_cmdbuf *_cs, struct radeon_winsys_bo *bo)
 {
-	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
-	unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
-
-
-	if (!cs->virtual_buffer_hash_table) {
-		int *virtual_buffer_hash_table =
-			malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
-		if (!virtual_buffer_hash_table) {
-			cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
-			return;
-		}
-		cs->virtual_buffer_hash_table = virtual_buffer_hash_table;
-
-		for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
-			cs->virtual_buffer_hash_table[i] = -1;
-	}
-
-	if (cs->virtual_buffer_hash_table[hash] >= 0) {
-		int idx = cs->virtual_buffer_hash_table[hash];
-		if (cs->virtual_buffers[idx] == bo) {
-			return;
-		}
-		for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
-			if (cs->virtual_buffers[i] == bo) {
-				cs->virtual_buffer_hash_table[hash] = i;
-				return;
-			}
-		}
-	}
-
-	if(cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
-		unsigned max_num_virtual_buffers =
-			MAX2(2, cs->max_num_virtual_buffers * 2);
-		struct radeon_winsys_bo **virtual_buffers =
-			realloc(cs->virtual_buffers,
-				sizeof(struct radeon_winsys_bo*) * max_num_virtual_buffers);
-		if (!virtual_buffers) {
-			cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
-			return;
-		}
-		cs->max_num_virtual_buffers = max_num_virtual_buffers;
-		cs->virtual_buffers = virtual_buffers;
-	}
-
-	cs->virtual_buffers[cs->num_virtual_buffers] = bo;
-
-	cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
-	++cs->num_virtual_buffers;
-
+   struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+   unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
+
+   if (!cs->virtual_buffer_hash_table) {
+      int *virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
+      if (!virtual_buffer_hash_table) {
+         cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+         return;
+      }
+      cs->virtual_buffer_hash_table = virtual_buffer_hash_table;
+
+      for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
+         cs->virtual_buffer_hash_table[i] = -1;
+   }
+
+   if (cs->virtual_buffer_hash_table[hash] >= 0) {
+      int idx = cs->virtual_buffer_hash_table[hash];
+      if (cs->virtual_buffers[idx] == bo) {
+         return;
+      }
+      for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
+         if (cs->virtual_buffers[i] == bo) {
+            cs->virtual_buffer_hash_table[hash] = i;
+            return;
+         }
+      }
+   }
+
+   if (cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
+      unsigned max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
+      struct radeon_winsys_bo **virtual_buffers =
+         realloc(cs->virtual_buffers, sizeof(struct radeon_winsys_bo *) * max_num_virtual_buffers);
+      if (!virtual_buffers) {
+         cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+         return;
+      }
+      cs->max_num_virtual_buffers = max_num_virtual_buffers;
+      cs->virtual_buffers = virtual_buffers;
+   }
+
+   cs->virtual_buffers[cs->num_virtual_buffers] = bo;
+
+   cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
+   ++cs->num_virtual_buffers;
 }
 
-static void radv_amdgpu_cs_add_buffer(struct radeon_cmdbuf *_cs,
-				      struct radeon_winsys_bo *_bo)
+static void
+radv_amdgpu_cs_add_buffer(struct radeon_cmdbuf *_cs, struct radeon_winsys_bo *_bo)
 {
-	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
-	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+   struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+   struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
 
-	if (cs->status != VK_SUCCESS)
-		return;
+   if (cs->status != VK_SUCCESS)
+      return;
 
-	if (bo->is_virtual)  {
-		radv_amdgpu_cs_add_virtual_buffer(_cs, _bo);
-		return;
-	}
+   if (bo->is_virtual) {
+      radv_amdgpu_cs_add_virtual_buffer(_cs, _bo);
+      return;
+   }
 
-	radv_amdgpu_cs_add_buffer_internal(cs, bo->bo_handle, bo->priority);
+   radv_amdgpu_cs_add_buffer_internal(cs, bo->bo_handle, bo->priority);
 }
 
-static void radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent,
-					     struct radeon_cmdbuf *_child)
+static void
+radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cmdbuf *_child)
 {
-	struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
-	struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
-
-	if (parent->status != VK_SUCCESS || child->status != VK_SUCCESS)
-		return;
-
-	for (unsigned i = 0; i < child->num_buffers; ++i) {
-		radv_amdgpu_cs_add_buffer_internal(parent,
-		                                   child->handles[i].bo_handle,
-		                                   child->handles[i].bo_priority);
-	}
-
-	for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
-		radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i]);
-	}
-
-	if (parent->ws->use_ib_bos) {
-		if (parent->base.cdw + 4 > parent->base.max_dw)
-			radv_amdgpu_cs_grow(&parent->base, 4);
-
-		radeon_emit(&parent->base, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
-		radeon_emit(&parent->base, child->ib.ib_mc_address);
-		radeon_emit(&parent->base, child->ib.ib_mc_address >> 32);
-		radeon_emit(&parent->base, child->ib.size);
-	} else {
-		if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
-			radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
-
-		memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
-		parent->base.cdw += child->base.cdw;
-	}
+   struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
+   struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
+
+   if (parent->status != VK_SUCCESS || child->status != VK_SUCCESS)
+      return;
+
+   for (unsigned i = 0; i < child->num_buffers; ++i) {
+      radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i].bo_handle,
+                                         child->handles[i].bo_priority);
+   }
+
+   for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
+      radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i]);
+   }
+
+   if (parent->ws->use_ib_bos) {
+      if (parent->base.cdw + 4 > parent->base.max_dw)
+         radv_amdgpu_cs_grow(&parent->base, 4);
+
+      radeon_emit(&parent->base, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+      radeon_emit(&parent->base, child->ib.ib_mc_address);
+      radeon_emit(&parent->base, child->ib.ib_mc_address >> 32);
+      radeon_emit(&parent->base, child->ib.size);
+   } else {
+      if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
+         radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
+
+      memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
+      parent->base.cdw += child->base.cdw;
+   }
 }
 
 static VkResult
-radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws,
-			struct radeon_cmdbuf **cs_array,
-			unsigned count,
-			struct radv_amdgpu_winsys_bo **extra_bo_array,
-			unsigned num_extra_bo,
-			struct radeon_cmdbuf *extra_cs,
-			unsigned *rnum_handles,
-			struct drm_amdgpu_bo_list_entry **rhandles)
+radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_cmdbuf **cs_array,
+                        unsigned count, struct radv_amdgpu_winsys_bo **extra_bo_array,
+                        unsigned num_extra_bo, struct radeon_cmdbuf *extra_cs,
+                        unsigned *rnum_handles, struct drm_amdgpu_bo_list_entry **rhandles)
 {
-	struct drm_amdgpu_bo_list_entry *handles = NULL;
-	unsigned num_handles = 0;
-
-	if (ws->debug_all_bos) {
-		handles = malloc(sizeof(handles[0]) * ws->global_bo_list.count);
-		if (!handles) {
-			return VK_ERROR_OUT_OF_HOST_MEMORY;
-		}
-
-		for (uint32_t i = 0; i < ws->global_bo_list.count; i++) {
-			handles[i].bo_handle = ws->global_bo_list.bos[i]->bo_handle;
-			handles[i].bo_priority = ws->global_bo_list.bos[i]->priority;
-			num_handles++;
-		}
-	} else if (count == 1 && !num_extra_bo && !extra_cs &&
-	           !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers &&
-		   !ws->global_bo_list.count) {
-		struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
-		if (cs->num_buffers == 0)
-			return VK_SUCCESS;
-
-		handles = malloc(sizeof(handles[0]) * cs->num_buffers);
-		if (!handles)
-			return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-		memcpy(handles, cs->handles,
-		       sizeof(handles[0]) * cs->num_buffers);
-		num_handles = cs->num_buffers;
-	} else {
-		unsigned total_buffer_count = num_extra_bo;
-		num_handles = num_extra_bo;
-		for (unsigned i = 0; i < count; ++i) {
-			struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
-			total_buffer_count += cs->num_buffers;
-			for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
-				total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
-		}
-
-		if (extra_cs) {
-			total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers;
-		}
-
-		total_buffer_count += ws->global_bo_list.count;
-
-		if (total_buffer_count == 0)
-			return VK_SUCCESS;
-
-		handles = malloc(sizeof(handles[0]) * total_buffer_count);
-		if (!handles)
-			return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-		for (unsigned i = 0; i < num_extra_bo; i++) {
-			handles[i].bo_handle = extra_bo_array[i]->bo_handle;
-			handles[i].bo_priority = extra_bo_array[i]->priority;
-		}
-
-		for (unsigned i = 0; i < count + !!extra_cs; ++i) {
-			struct radv_amdgpu_cs *cs;
-
-			if (i == count)
-				cs = (struct radv_amdgpu_cs*)extra_cs;
-			else
-				cs = (struct radv_amdgpu_cs*)cs_array[i];
-
-			if (!cs->num_buffers)
-				continue;
-
-			if (num_handles == 0 && !cs->num_virtual_buffers) {
-				memcpy(handles, cs->handles, cs->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
-				num_handles = cs->num_buffers;
-				continue;
-			}
-			int unique_bo_so_far = num_handles;
-			for (unsigned j = 0; j < cs->num_buffers; ++j) {
-				bool found = false;
-				for (unsigned k = 0; k < unique_bo_so_far; ++k) {
-					if (handles[k].bo_handle == cs->handles[j].bo_handle) {
-						found = true;
-						break;
-					}
-				}
-				if (!found) {
-					handles[num_handles] = cs->handles[j];
-					++num_handles;
-				}
-			}
-			for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
-				struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
-				for(unsigned k = 0; k < virtual_bo->bo_count; ++k) {
-					struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
-					bool found = false;
-					for (unsigned m = 0; m < num_handles; ++m) {
-						if (handles[m].bo_handle == bo->bo_handle) {
-							found = true;
-							break;
-						}
-					}
-					if (!found) {
-						handles[num_handles].bo_handle = bo->bo_handle;
-						handles[num_handles].bo_priority = bo->priority;
-						++num_handles;
-					}
-				}
-			}
-		}
-
-		unsigned unique_bo_so_far = num_handles;
-		for (unsigned i = 0; i < ws->global_bo_list.count; ++i) {
-			struct radv_amdgpu_winsys_bo *bo = ws->global_bo_list.bos[i];
-			bool found = false;
-			for (unsigned j = 0; j < unique_bo_so_far; ++j) {
-				if (bo->bo_handle == handles[j].bo_handle) {
-					found = true;
-					break;
-				}
-			}
-			if (!found) {
-				handles[num_handles].bo_handle = bo->bo_handle;
-				handles[num_handles].bo_priority = bo->priority;
-				++num_handles;
-			}
-		}
-	}
-
-	*rhandles = handles;
-	*rnum_handles = num_handles;
-
-	return VK_SUCCESS;
+   struct drm_amdgpu_bo_list_entry *handles = NULL;
+   unsigned num_handles = 0;
+
+   if (ws->debug_all_bos) {
+      handles = malloc(sizeof(handles[0]) * ws->global_bo_list.count);
+      if (!handles) {
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      }
+
+      for (uint32_t i = 0; i < ws->global_bo_list.count; i++) {
+         handles[i].bo_handle = ws->global_bo_list.bos[i]->bo_handle;
+         handles[i].bo_priority = ws->global_bo_list.bos[i]->priority;
+         num_handles++;
+      }
+   } else if (count == 1 && !num_extra_bo && !extra_cs &&
+              !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers && !ws->global_bo_list.count) {
+      struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)cs_array[0];
+      if (cs->num_buffers == 0)
+         return VK_SUCCESS;
+
+      handles = malloc(sizeof(handles[0]) * cs->num_buffers);
+      if (!handles)
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+      memcpy(handles, cs->handles, sizeof(handles[0]) * cs->num_buffers);
+      num_handles = cs->num_buffers;
+   } else {
+      unsigned total_buffer_count = num_extra_bo;
+      num_handles = num_extra_bo;
+      for (unsigned i = 0; i < count; ++i) {
+         struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)cs_array[i];
+         total_buffer_count += cs->num_buffers;
+         for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
+            total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
+      }
+
+      if (extra_cs) {
+         total_buffer_count += ((struct radv_amdgpu_cs *)extra_cs)->num_buffers;
+      }
+
+      total_buffer_count += ws->global_bo_list.count;
+
+      if (total_buffer_count == 0)
+         return VK_SUCCESS;
+
+      handles = malloc(sizeof(handles[0]) * total_buffer_count);
+      if (!handles)
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+      for (unsigned i = 0; i < num_extra_bo; i++) {
+         handles[i].bo_handle = extra_bo_array[i]->bo_handle;
+         handles[i].bo_priority = extra_bo_array[i]->priority;
+      }
+
+      for (unsigned i = 0; i < count + !!extra_cs; ++i) {
+         struct radv_amdgpu_cs *cs;
+
+         if (i == count)
+            cs = (struct radv_amdgpu_cs *)extra_cs;
+         else
+            cs = (struct radv_amdgpu_cs *)cs_array[i];
+
+         if (!cs->num_buffers)
+            continue;
+
+         if (num_handles == 0 && !cs->num_virtual_buffers) {
+            memcpy(handles, cs->handles, cs->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
+            num_handles = cs->num_buffers;
+            continue;
+         }
+         int unique_bo_so_far = num_handles;
+         for (unsigned j = 0; j < cs->num_buffers; ++j) {
+            bool found = false;
+            for (unsigned k = 0; k < unique_bo_so_far; ++k) {
+               if (handles[k].bo_handle == cs->handles[j].bo_handle) {
+                  found = true;
+                  break;
+               }
+            }
+            if (!found) {
+               handles[num_handles] = cs->handles[j];
+               ++num_handles;
+            }
+         }
+         for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
+            struct radv_amdgpu_winsys_bo *virtual_bo =
+               radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
+            for (unsigned k = 0; k < virtual_bo->bo_count; ++k) {
+               struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
+               bool found = false;
+               for (unsigned m = 0; m < num_handles; ++m) {
+                  if (handles[m].bo_handle == bo->bo_handle) {
+                     found = true;
+                     break;
+                  }
+               }
+               if (!found) {
+                  handles[num_handles].bo_handle = bo->bo_handle;
+                  handles[num_handles].bo_priority = bo->priority;
+                  ++num_handles;
+               }
+            }
+         }
+      }
+
+      unsigned unique_bo_so_far = num_handles;
+      for (unsigned i = 0; i < ws->global_bo_list.count; ++i) {
+         struct radv_amdgpu_winsys_bo *bo = ws->global_bo_list.bos[i];
+         bool found = false;
+         for (unsigned j = 0; j < unique_bo_so_far; ++j) {
+            if (bo->bo_handle == handles[j].bo_handle) {
+               found = true;
+               break;
+            }
+         }
+         if (!found) {
+            handles[num_handles].bo_handle = bo->bo_handle;
+            handles[num_handles].bo_priority = bo->priority;
+            ++num_handles;
+         }
+      }
+   }
+
+   *rhandles = handles;
+   *rnum_handles = num_handles;
+
+   return VK_SUCCESS;
 }
 
-static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
-				    struct radv_amdgpu_cs_request *request)
+static void
+radv_assign_last_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request *request)
 {
-	radv_amdgpu_request_to_fence(ctx,
-	                             &ctx->last_submission[request->ip_type][request->ring],
-	                             request);
+   radv_amdgpu_request_to_fence(ctx, &ctx->last_submission[request->ip_type][request->ring],
+                                request);
 }
 
 static VkResult
-radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
-				     int queue_idx,
-				     struct radv_winsys_sem_info *sem_info,
-				     struct radeon_cmdbuf **cs_array,
-				     unsigned cs_count,
-				     struct radeon_cmdbuf *initial_preamble_cs)
+radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, int queue_idx,
+                                     struct radv_winsys_sem_info *sem_info,
+                                     struct radeon_cmdbuf **cs_array, unsigned cs_count,
+                                     struct radeon_cmdbuf *initial_preamble_cs)
 {
-	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
-	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
-	struct radv_amdgpu_winsys *aws = cs0->ws;
-	struct drm_amdgpu_bo_list_entry *handles = NULL;
-	struct radv_amdgpu_cs_request request;
-	struct amdgpu_cs_ib_info ibs[2];
-	unsigned number_of_ibs = 1;
-	unsigned num_handles = 0;
-	VkResult result;
-
-	for (unsigned i = cs_count; i--;) {
-		struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
-
-		if (cs->is_chained) {
-			*cs->ib_size_ptr -= 4;
-			cs->is_chained = false;
-		}
-
-		if (i + 1 < cs_count) {
-			struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
-			assert(cs->base.cdw + 4 <= cs->base.max_dw);
-
-			cs->is_chained = true;
-			*cs->ib_size_ptr += 4;
-
-			cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
-			cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
-			cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
-			cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
-		}
-	}
-
-	u_rwlock_rdlock(&aws->global_bo_list.lock);
-
-	/* Get the BO list. */
-	result = radv_amdgpu_get_bo_list(cs0->ws, cs_array, cs_count, NULL, 0,
-					 initial_preamble_cs,
-					 &num_handles, &handles);
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	/* Configure the CS request. */
-	if (initial_preamble_cs) {
-		ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
-		ibs[1] = cs0->ib;
-		number_of_ibs++;
-	} else {
-		ibs[0] = cs0->ib;
-	}
-
-	request.ip_type = cs0->hw_ip;
-	request.ip_instance = 0;
-	request.ring = queue_idx;
-	request.number_of_ibs = number_of_ibs;
-	request.ibs = ibs;
-	request.handles = handles;
-	request.num_handles = num_handles;
-
-	/* Submit the CS. */
-	result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
-
-	free(request.handles);
-
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	radv_assign_last_submit(ctx, &request);
+   struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
+   struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
+   struct radv_amdgpu_winsys *aws = cs0->ws;
+   struct drm_amdgpu_bo_list_entry *handles = NULL;
+   struct radv_amdgpu_cs_request request;
+   struct amdgpu_cs_ib_info ibs[2];
+   unsigned number_of_ibs = 1;
+   unsigned num_handles = 0;
+   VkResult result;
+
+   for (unsigned i = cs_count; i--;) {
+      struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
+
+      if (cs->is_chained) {
+         *cs->ib_size_ptr -= 4;
+         cs->is_chained = false;
+      }
+
+      if (i + 1 < cs_count) {
+         struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
+         assert(cs->base.cdw + 4 <= cs->base.max_dw);
+
+         cs->is_chained = true;
+         *cs->ib_size_ptr += 4;
+
+         cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
+         cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
+         cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
+         cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
+      }
+   }
+
+   u_rwlock_rdlock(&aws->global_bo_list.lock);
+
+   /* Get the BO list. */
+   result = radv_amdgpu_get_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, initial_preamble_cs,
+                                    &num_handles, &handles);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   /* Configure the CS request. */
+   if (initial_preamble_cs) {
+      ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
+      ibs[1] = cs0->ib;
+      number_of_ibs++;
+   } else {
+      ibs[0] = cs0->ib;
+   }
+
+   request.ip_type = cs0->hw_ip;
+   request.ip_instance = 0;
+   request.ring = queue_idx;
+   request.number_of_ibs = number_of_ibs;
+   request.ibs = ibs;
+   request.handles = handles;
+   request.num_handles = num_handles;
+
+   /* Submit the CS. */
+   result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
+
+   free(request.handles);
+
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   radv_assign_last_submit(ctx, &request);
 
 fail:
-	u_rwlock_rdunlock(&aws->global_bo_list.lock);
-	return result;
+   u_rwlock_rdunlock(&aws->global_bo_list.lock);
+   return result;
 }
 
 static VkResult
-radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
-				      int queue_idx,
-				      struct radv_winsys_sem_info *sem_info,
-				      struct radeon_cmdbuf **cs_array,
-				      unsigned cs_count,
-				      struct radeon_cmdbuf *initial_preamble_cs)
+radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, int queue_idx,
+                                      struct radv_winsys_sem_info *sem_info,
+                                      struct radeon_cmdbuf **cs_array, unsigned cs_count,
+                                      struct radeon_cmdbuf *initial_preamble_cs)
 {
-	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
-	struct drm_amdgpu_bo_list_entry *handles = NULL;
-	struct radv_amdgpu_cs_request request;
-	struct amdgpu_cs_ib_info *ibs;
-	struct radv_amdgpu_cs *cs0;
-	struct radv_amdgpu_winsys *aws;
-	unsigned num_handles = 0;
-	unsigned number_of_ibs;
-	VkResult result;
-
-	assert(cs_count);
-	cs0 = radv_amdgpu_cs(cs_array[0]);
-	aws = cs0->ws;
-
-	/* Compute the number of IBs for this submit. */
-	number_of_ibs = cs_count + !!initial_preamble_cs;
-
-	u_rwlock_rdlock(&aws->global_bo_list.lock);
-
-	/* Get the BO list. */
-	result = radv_amdgpu_get_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0,
-					 initial_preamble_cs,
-					 &num_handles, &handles);
-	if (result != VK_SUCCESS) {
-		goto fail;
-	}
-
-	ibs = malloc(number_of_ibs * sizeof(*ibs));
-	if (!ibs) {
-		free(handles);
-		result = VK_ERROR_OUT_OF_HOST_MEMORY;
-		goto fail;
-	}
-
-	/* Configure the CS request. */
-	if (initial_preamble_cs)
-		ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
-
-	for (unsigned i = 0; i < cs_count; i++) {
-		struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
-
-		ibs[i + !!initial_preamble_cs] = cs->ib;
-
-		if (cs->is_chained) {
-			*cs->ib_size_ptr -= 4;
-			cs->is_chained = false;
-		}
-	}
-
-	request.ip_type = cs0->hw_ip;
-	request.ip_instance = 0;
-	request.ring = queue_idx;
-	request.handles = handles;
-	request.num_handles = num_handles;
-	request.number_of_ibs = number_of_ibs;
-	request.ibs = ibs;
-
-	/* Submit the CS. */
-	result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
-
-	free(request.handles);
-	free(ibs);
-
-	if (result != VK_SUCCESS)
-		goto fail;
-
-	radv_assign_last_submit(ctx, &request);
+   struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
+   struct drm_amdgpu_bo_list_entry *handles = NULL;
+   struct radv_amdgpu_cs_request request;
+   struct amdgpu_cs_ib_info *ibs;
+   struct radv_amdgpu_cs *cs0;
+   struct radv_amdgpu_winsys *aws;
+   unsigned num_handles = 0;
+   unsigned number_of_ibs;
+   VkResult result;
+
+   assert(cs_count);
+   cs0 = radv_amdgpu_cs(cs_array[0]);
+   aws = cs0->ws;
+
+   /* Compute the number of IBs for this submit. */
+   number_of_ibs = cs_count + !!initial_preamble_cs;
+
+   u_rwlock_rdlock(&aws->global_bo_list.lock);
+
+   /* Get the BO list. */
+   result = radv_amdgpu_get_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0, initial_preamble_cs,
+                                    &num_handles, &handles);
+   if (result != VK_SUCCESS) {
+      goto fail;
+   }
+
+   ibs = malloc(number_of_ibs * sizeof(*ibs));
+   if (!ibs) {
+      free(handles);
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto fail;
+   }
+
+   /* Configure the CS request. */
+   if (initial_preamble_cs)
+      ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
+
+   for (unsigned i = 0; i < cs_count; i++) {
+      struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
+
+      ibs[i + !!initial_preamble_cs] = cs->ib;
+
+      if (cs->is_chained) {
+         *cs->ib_size_ptr -= 4;
+         cs->is_chained = false;
+      }
+   }
+
+   request.ip_type = cs0->hw_ip;
+   request.ip_instance = 0;
+   request.ring = queue_idx;
+   request.handles = handles;
+   request.num_handles = num_handles;
+   request.number_of_ibs = number_of_ibs;
+   request.ibs = ibs;
+
+   /* Submit the CS. */
+   result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
+
+   free(request.handles);
+   free(ibs);
+
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   radv_assign_last_submit(ctx, &request);
 
 fail:
-	u_rwlock_rdunlock(&aws->global_bo_list.lock);
-	return result;
+   u_rwlock_rdunlock(&aws->global_bo_list.lock);
+   return result;
 }
 
 static VkResult
-radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
-				    int queue_idx,
-				    struct radv_winsys_sem_info *sem_info,
-				    struct radeon_cmdbuf **cs_array,
-				    unsigned cs_count,
-				    struct radeon_cmdbuf *initial_preamble_cs,
-				    struct radeon_cmdbuf *continue_preamble_cs)
+radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_idx,
+                                    struct radv_winsys_sem_info *sem_info,
+                                    struct radeon_cmdbuf **cs_array, unsigned cs_count,
+                                    struct radeon_cmdbuf *initial_preamble_cs,
+                                    struct radeon_cmdbuf *continue_preamble_cs)
 {
-	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
-	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
-	struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
-	struct radv_amdgpu_winsys *aws = cs0->ws;
-	struct radv_amdgpu_cs_request request;
-	uint32_t pad_word = PKT3_NOP_PAD;
-	bool emit_signal_sem = sem_info->cs_emit_signal;
-	VkResult result;
-
-	if (radv_amdgpu_winsys(ws)->info.chip_class == GFX6)
-		pad_word = 0x80000000;
-
-	assert(cs_count);
-
-	for (unsigned i = 0; i < cs_count;) {
-		struct amdgpu_cs_ib_info *ibs;
-		struct radeon_winsys_bo **bos;
-		struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
-		struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
-		struct drm_amdgpu_bo_list_entry *handles = NULL;
-		unsigned num_handles = 0;
-		unsigned number_of_ibs;
-		uint32_t *ptr;
-		unsigned cnt = 0;
-		unsigned pad_words = 0;
-
-		/* Compute the number of IBs for this submit. */
-		number_of_ibs = cs->num_old_cs_buffers + 1;
-
-		ibs = malloc(number_of_ibs * sizeof(*ibs));
-		if (!ibs)
-			return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-		bos = malloc(number_of_ibs * sizeof(*bos));
-		if (!bos) {
-			free(ibs);
-			return VK_ERROR_OUT_OF_HOST_MEMORY;
-		}
-
-		if (number_of_ibs > 1) {
-			/* Special path when the maximum size in dwords has
-			 * been reached because we need to handle more than one
-			 * IB per submit.
-			 */
-			struct radeon_cmdbuf **new_cs_array;
-			unsigned idx = 0;
-
-			new_cs_array = malloc(cs->num_old_cs_buffers *
-					      sizeof(*new_cs_array));
-			assert(new_cs_array);
-
-			for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
-				new_cs_array[idx++] = &cs->old_cs_buffers[j];
-			new_cs_array[idx++] = cs_array[i];
-
-			for (unsigned j = 0; j < number_of_ibs; j++) {
-				struct radeon_cmdbuf *rcs = new_cs_array[j];
-				bool needs_preamble = preamble_cs && j == 0;
-				unsigned size = 0;
-
-				if (needs_preamble)
-					size += preamble_cs->cdw;
-				size += rcs->cdw;
-
-				assert(size < 0xffff8);
-
-				while (!size || (size & 7)) {
-					size++;
-					pad_words++;
-				}
-
-				bos[j] = ws->buffer_create(ws, 4 * size, 4096,
-							   aws->cs_bo_domain,
-							   RADEON_FLAG_CPU_ACCESS |
-							   RADEON_FLAG_NO_INTERPROCESS_SHARING |
-							   RADEON_FLAG_READ_ONLY,
-							   RADV_BO_PRIORITY_CS);
-				ptr = ws->buffer_map(bos[j]);
-
-				if (needs_preamble) {
-					memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
-					ptr += preamble_cs->cdw;
-				}
-
-				memcpy(ptr, rcs->buf, 4 * rcs->cdw);
-				ptr += rcs->cdw;
-
-				for (unsigned k = 0; k < pad_words; ++k)
-					*ptr++ = pad_word;
-
-				ibs[j].size = size;
-				ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]);
-				ibs[j].flags = 0;
-			}
-
-			cnt++;
-			free(new_cs_array);
-		} else {
-			unsigned size = 0;
-
-			if (preamble_cs)
-				size += preamble_cs->cdw;
-
-			while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
-				size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
-				++cnt;
-			}
-
-			while (!size || (size & 7)) {
-				size++;
-				pad_words++;
-			}
-			assert(cnt);
-
-			bos[0] = ws->buffer_create(ws, 4 * size, 4096,
-						   aws->cs_bo_domain,
-						   RADEON_FLAG_CPU_ACCESS |
-						   RADEON_FLAG_NO_INTERPROCESS_SHARING |
-						   RADEON_FLAG_READ_ONLY,
-						   RADV_BO_PRIORITY_CS);
-			ptr = ws->buffer_map(bos[0]);
-
-			if (preamble_cs) {
-				memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
-				ptr += preamble_cs->cdw;
-			}
-
-			for (unsigned j = 0; j < cnt; ++j) {
-				struct radv_amdgpu_cs *cs2 = radv_amdgpu_cs(cs_array[i + j]);
-				memcpy(ptr, cs2->base.buf, 4 * cs2->base.cdw);
-				ptr += cs2->base.cdw;
-
-			}
-
-			for (unsigned j = 0; j < pad_words; ++j)
-				*ptr++ = pad_word;
-
-			ibs[0].size = size;
-			ibs[0].ib_mc_address = radv_buffer_get_va(bos[0]);
-			ibs[0].flags = 0;
-		}
-
-		u_rwlock_rdlock(&aws->global_bo_list.lock);
-
-		result = radv_amdgpu_get_bo_list(cs0->ws, &cs_array[i], cnt,
-						 (struct radv_amdgpu_winsys_bo **)bos,
-						 number_of_ibs, preamble_cs,
-						 &num_handles, &handles);
-		if (result != VK_SUCCESS) {
-			free(ibs);
-			free(bos);
-			u_rwlock_rdunlock(&aws->global_bo_list.lock);
-			return result;
-		}
-
-		request.ip_type = cs0->hw_ip;
-		request.ip_instance = 0;
-		request.ring = queue_idx;
-		request.handles = handles;
-		request.num_handles = num_handles;
-		request.number_of_ibs = number_of_ibs;
-		request.ibs = ibs;
-
-		sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
-		result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
-
-		free(request.handles);
-		u_rwlock_rdunlock(&aws->global_bo_list.lock);
-
-		for (unsigned j = 0; j < number_of_ibs; j++) {
-			ws->buffer_destroy(ws, bos[j]);
-		}
-
-		free(ibs);
-		free(bos);
-
-		if (result != VK_SUCCESS)
-			return result;
-
-		i += cnt;
-	}
-
-	radv_assign_last_submit(ctx, &request);
-
-	return VK_SUCCESS;
+   struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
+   struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
+   struct radeon_winsys *ws = (struct radeon_winsys *)cs0->ws;
+   struct radv_amdgpu_winsys *aws = cs0->ws;
+   struct radv_amdgpu_cs_request request;
+   uint32_t pad_word = PKT3_NOP_PAD;
+   bool emit_signal_sem = sem_info->cs_emit_signal;
+   VkResult result;
+
+   if (radv_amdgpu_winsys(ws)->info.chip_class == GFX6)
+      pad_word = 0x80000000;
+
+   assert(cs_count);
+
+   for (unsigned i = 0; i < cs_count;) {
+      struct amdgpu_cs_ib_info *ibs;
+      struct radeon_winsys_bo **bos;
+      struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
+      struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
+      struct drm_amdgpu_bo_list_entry *handles = NULL;
+      unsigned num_handles = 0;
+      unsigned number_of_ibs;
+      uint32_t *ptr;
+      unsigned cnt = 0;
+      unsigned pad_words = 0;
+
+      /* Compute the number of IBs for this submit. */
+      number_of_ibs = cs->num_old_cs_buffers + 1;
+
+      ibs = malloc(number_of_ibs * sizeof(*ibs));
+      if (!ibs)
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+      bos = malloc(number_of_ibs * sizeof(*bos));
+      if (!bos) {
+         free(ibs);
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      }
+
+      if (number_of_ibs > 1) {
+         /* Special path when the maximum size in dwords has
+          * been reached because we need to handle more than one
+          * IB per submit.
+          */
+         struct radeon_cmdbuf **new_cs_array;
+         unsigned idx = 0;
+
+         new_cs_array = malloc(cs->num_old_cs_buffers * sizeof(*new_cs_array));
+         assert(new_cs_array);
+
+         for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
+            new_cs_array[idx++] = &cs->old_cs_buffers[j];
+         new_cs_array[idx++] = cs_array[i];
+
+         for (unsigned j = 0; j < number_of_ibs; j++) {
+            struct radeon_cmdbuf *rcs = new_cs_array[j];
+            bool needs_preamble = preamble_cs && j == 0;
+            unsigned size = 0;
+
+            if (needs_preamble)
+               size += preamble_cs->cdw;
+            size += rcs->cdw;
+
+            assert(size < 0xffff8);
+
+            while (!size || (size & 7)) {
+               size++;
+               pad_words++;
+            }
+
+            bos[j] = ws->buffer_create(
+               ws, 4 * size, 4096, aws->cs_bo_domain,
+               RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
+               RADV_BO_PRIORITY_CS);
+            ptr = ws->buffer_map(bos[j]);
+
+            if (needs_preamble) {
+               memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
+               ptr += preamble_cs->cdw;
+            }
+
+            memcpy(ptr, rcs->buf, 4 * rcs->cdw);
+            ptr += rcs->cdw;
+
+            for (unsigned k = 0; k < pad_words; ++k)
+               *ptr++ = pad_word;
+
+            ibs[j].size = size;
+            ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]);
+            ibs[j].flags = 0;
+         }
+
+         cnt++;
+         free(new_cs_array);
+      } else {
+         unsigned size = 0;
+
+         if (preamble_cs)
+            size += preamble_cs->cdw;
+
+         while (i + cnt < cs_count &&
+                0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
+            size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
+            ++cnt;
+         }
+
+         while (!size || (size & 7)) {
+            size++;
+            pad_words++;
+         }
+         assert(cnt);
+
+         bos[0] = ws->buffer_create(
+            ws, 4 * size, 4096, aws->cs_bo_domain,
+            RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
+            RADV_BO_PRIORITY_CS);
+         ptr = ws->buffer_map(bos[0]);
+
+         if (preamble_cs) {
+            memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
+            ptr += preamble_cs->cdw;
+         }
+
+         for (unsigned j = 0; j < cnt; ++j) {
+            struct radv_amdgpu_cs *cs2 = radv_amdgpu_cs(cs_array[i + j]);
+            memcpy(ptr, cs2->base.buf, 4 * cs2->base.cdw);
+            ptr += cs2->base.cdw;
+         }
+
+         for (unsigned j = 0; j < pad_words; ++j)
+            *ptr++ = pad_word;
+
+         ibs[0].size = size;
+         ibs[0].ib_mc_address = radv_buffer_get_va(bos[0]);
+         ibs[0].flags = 0;
+      }
+
+      u_rwlock_rdlock(&aws->global_bo_list.lock);
+
+      result =
+         radv_amdgpu_get_bo_list(cs0->ws, &cs_array[i], cnt, (struct radv_amdgpu_winsys_bo **)bos,
+                                 number_of_ibs, preamble_cs, &num_handles, &handles);
+      if (result != VK_SUCCESS) {
+         free(ibs);
+         free(bos);
+         u_rwlock_rdunlock(&aws->global_bo_list.lock);
+         return result;
+      }
+
+      request.ip_type = cs0->hw_ip;
+      request.ip_instance = 0;
+      request.ring = queue_idx;
+      request.handles = handles;
+      request.num_handles = num_handles;
+      request.number_of_ibs = number_of_ibs;
+      request.ibs = ibs;
+
+      sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
+      result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
+
+      free(request.handles);
+      u_rwlock_rdunlock(&aws->global_bo_list.lock);
+
+      for (unsigned j = 0; j < number_of_ibs; j++) {
+         ws->buffer_destroy(ws, bos[j]);
+      }
+
+      free(ibs);
+      free(bos);
+
+      if (result != VK_SUCCESS)
+         return result;
+
+      i += cnt;
+   }
+
+   radv_assign_last_submit(ctx, &request);
+
+   return VK_SUCCESS;
 }
 
-static VkResult radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
-					     int queue_idx,
-					     struct radeon_cmdbuf **cs_array,
-					     unsigned cs_count,
-					     struct radeon_cmdbuf *initial_preamble_cs,
-					     struct radeon_cmdbuf *continue_preamble_cs,
-					     struct radv_winsys_sem_info *sem_info,
-					     bool can_patch)
+static VkResult
+radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, int queue_idx,
+                             struct radeon_cmdbuf **cs_array, unsigned cs_count,
+                             struct radeon_cmdbuf *initial_preamble_cs,
+                             struct radeon_cmdbuf *continue_preamble_cs,
+                             struct radv_winsys_sem_info *sem_info, bool can_patch)
 {
-	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
-	VkResult result;
-
-	assert(sem_info);
-	if (!cs->ws->use_ib_bos) {
-		result = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, cs_array,
-							     cs_count, initial_preamble_cs, continue_preamble_cs);
-	} else if (can_patch) {
-		result = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, cs_array,
-							      cs_count, initial_preamble_cs);
-	} else {
-		result = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, cs_array,
-							       cs_count, initial_preamble_cs);
-	}
-
-	return result;
+   struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
+   VkResult result;
+
+   assert(sem_info);
+   if (!cs->ws->use_ib_bos) {
+      result = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, cs_array, cs_count,
+                                                   initial_preamble_cs, continue_preamble_cs);
+   } else if (can_patch) {
+      result = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, cs_array, cs_count,
+                                                    initial_preamble_cs);
+   } else {
+      result = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, cs_array, cs_count,
+                                                     initial_preamble_cs);
+   }
+
+   return result;
 }
 
-static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
+static void *
+radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
 {
-	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
-	void *ret = NULL;
-
-	if (!cs->ib_buffer)
-		return NULL;
-	for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
-		struct radv_amdgpu_winsys_bo *bo;
-
-		bo = (struct radv_amdgpu_winsys_bo*)
-		       (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
-		if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
-			if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
-				return (char *)ret + (addr - bo->base.va);
-		}
-	}
-	u_rwlock_rdlock(&cs->ws->global_bo_list.lock);
-	for (uint32_t i = 0; i < cs->ws->global_bo_list.count; i++) {
-		struct radv_amdgpu_winsys_bo *bo = cs->ws->global_bo_list.bos[i];
-		if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
-			if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0) {
-				u_rwlock_rdunlock(&cs->ws->global_bo_list.lock);
-				return (char *)ret + (addr - bo->base.va);
-			}
-		}
-	}
-	u_rwlock_rdunlock(&cs->ws->global_bo_list.lock);
-
-	return ret;
+   struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+   void *ret = NULL;
+
+   if (!cs->ib_buffer)
+      return NULL;
+   for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
+      struct radv_amdgpu_winsys_bo *bo;
+
+      bo = (struct radv_amdgpu_winsys_bo *)(i == cs->num_old_ib_buffers ? cs->ib_buffer
+                                                                        : cs->old_ib_buffers[i]);
+      if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
+         if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
+            return (char *)ret + (addr - bo->base.va);
+      }
+   }
+   u_rwlock_rdlock(&cs->ws->global_bo_list.lock);
+   for (uint32_t i = 0; i < cs->ws->global_bo_list.count; i++) {
+      struct radv_amdgpu_winsys_bo *bo = cs->ws->global_bo_list.bos[i];
+      if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
+         if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0) {
+            u_rwlock_rdunlock(&cs->ws->global_bo_list.lock);
+            return (char *)ret + (addr - bo->base.va);
+         }
+      }
+   }
+   u_rwlock_rdunlock(&cs->ws->global_bo_list.lock);
+
+   return ret;
 }
 
-static void radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf *_cs,
-                                       FILE* file,
-                                       const int *trace_ids, int trace_id_count)
+static void
+radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf *_cs, FILE *file, const int *trace_ids,
+                           int trace_id_count)
 {
-	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
-	void *ib = cs->base.buf;
-	int num_dw = cs->base.cdw;
-
-	if (cs->ws->use_ib_bos) {
-		ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address);
-		num_dw = cs->ib.size;
-	}
-	assert(ib);
-	ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count,  "main IB",
-		    cs->ws->info.chip_class, radv_amdgpu_winsys_get_cpu_addr, cs);
+   struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+   void *ib = cs->base.buf;
+   int num_dw = cs->base.cdw;
+
+   if (cs->ws->use_ib_bos) {
+      ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address);
+      num_dw = cs->ib.size;
+   }
+   assert(ib);
+   ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count, "main IB", cs->ws->info.chip_class,
+               radv_amdgpu_winsys_get_cpu_addr, cs);
 }
 
-static uint32_t radv_to_amdgpu_priority(enum radeon_ctx_priority radv_priority)
+static uint32_t
+radv_to_amdgpu_priority(enum radeon_ctx_priority radv_priority)
 {
-	switch (radv_priority) {
-		case RADEON_CTX_PRIORITY_REALTIME:
-			return AMDGPU_CTX_PRIORITY_VERY_HIGH;
-		case RADEON_CTX_PRIORITY_HIGH:
-			return AMDGPU_CTX_PRIORITY_HIGH;
-		case RADEON_CTX_PRIORITY_MEDIUM:
-			return AMDGPU_CTX_PRIORITY_NORMAL;
-		case RADEON_CTX_PRIORITY_LOW:
-			return AMDGPU_CTX_PRIORITY_LOW;
-		default:
-			unreachable("Invalid context priority");
-	}
+   switch (radv_priority) {
+   case RADEON_CTX_PRIORITY_REALTIME:
+      return AMDGPU_CTX_PRIORITY_VERY_HIGH;
+   case RADEON_CTX_PRIORITY_HIGH:
+      return AMDGPU_CTX_PRIORITY_HIGH;
+   case RADEON_CTX_PRIORITY_MEDIUM:
+      return AMDGPU_CTX_PRIORITY_NORMAL;
+   case RADEON_CTX_PRIORITY_LOW:
+      return AMDGPU_CTX_PRIORITY_LOW;
+   default:
+      unreachable("Invalid context priority");
+   }
 }
 
-static VkResult radv_amdgpu_ctx_create(struct radeon_winsys *_ws,
-                                       enum radeon_ctx_priority priority,
-                                       struct radeon_winsys_ctx **rctx)
+static VkResult
+radv_amdgpu_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority,
+                       struct radeon_winsys_ctx **rctx)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
-	uint32_t amdgpu_priority = radv_to_amdgpu_priority(priority);
-	VkResult result;
-	int r;
-
-	if (!ctx)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-	r = amdgpu_cs_ctx_create2(ws->dev, amdgpu_priority, &ctx->ctx);
-	if (r && r == -EACCES) {
-		result = VK_ERROR_NOT_PERMITTED_EXT;
-		goto fail_create;
-	} else if (r) {
-		fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create2 failed. (%i)\n", r);
-		result = VK_ERROR_OUT_OF_HOST_MEMORY;
-		goto fail_create;
-	}
-	ctx->ws = ws;
-
-	assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
-	ctx->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8,
-	                                      RADEON_DOMAIN_GTT,
-	                                      RADEON_FLAG_CPU_ACCESS |
-					      RADEON_FLAG_NO_INTERPROCESS_SHARING,
-					      RADV_BO_PRIORITY_CS);
-	if (!ctx->fence_bo) {
-		result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
-		goto fail_alloc;
-	}
-
-	ctx->fence_map = (uint64_t *)ws->base.buffer_map(ctx->fence_bo);
-	if (!ctx->fence_map) {
-		result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
-		goto fail_map;
-	}
-
-	memset(ctx->fence_map, 0, 4096);
-
-	*rctx = (struct radeon_winsys_ctx *)ctx;
-	return VK_SUCCESS;
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
+   uint32_t amdgpu_priority = radv_to_amdgpu_priority(priority);
+   VkResult result;
+   int r;
+
+   if (!ctx)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   r = amdgpu_cs_ctx_create2(ws->dev, amdgpu_priority, &ctx->ctx);
+   if (r && r == -EACCES) {
+      result = VK_ERROR_NOT_PERMITTED_EXT;
+      goto fail_create;
+   } else if (r) {
+      fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create2 failed. (%i)\n", r);
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto fail_create;
+   }
+   ctx->ws = ws;
+
+   assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
+   ctx->fence_bo = ws->base.buffer_create(
+      &ws->base, 4096, 8, RADEON_DOMAIN_GTT,
+      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_CS);
+   if (!ctx->fence_bo) {
+      result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+      goto fail_alloc;
+   }
+
+   ctx->fence_map = (uint64_t *)ws->base.buffer_map(ctx->fence_bo);
+   if (!ctx->fence_map) {
+      result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+      goto fail_map;
+   }
+
+   memset(ctx->fence_map, 0, 4096);
+
+   *rctx = (struct radeon_winsys_ctx *)ctx;
+   return VK_SUCCESS;
 
 fail_map:
-	ws->base.buffer_destroy(&ws->base, ctx->fence_bo);
+   ws->base.buffer_destroy(&ws->base, ctx->fence_bo);
 fail_alloc:
-	amdgpu_cs_ctx_free(ctx->ctx);
+   amdgpu_cs_ctx_free(ctx->ctx);
 fail_create:
-	FREE(ctx);
-	return result;
+   FREE(ctx);
+   return result;
 }
 
-static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
+static void
+radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
 {
-	struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
-	ctx->ws->base.buffer_destroy(&ctx->ws->base, ctx->fence_bo);
-	amdgpu_cs_ctx_free(ctx->ctx);
-	FREE(ctx);
+   struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
+   ctx->ws->base.buffer_destroy(&ctx->ws->base, ctx->fence_bo);
+   amdgpu_cs_ctx_free(ctx->ctx);
+   FREE(ctx);
 }
 
-static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
-                                      enum ring_type ring_type, int ring_index)
+static bool
+radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx, enum ring_type ring_type, int ring_index)
 {
-	struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
-	int ip_type = ring_to_hw_ip(ring_type);
+   struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
+   int ip_type = ring_to_hw_ip(ring_type);
 
-	if (ctx->last_submission[ip_type][ring_index].fence.fence) {
-		uint32_t expired;
-		int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence,
-		                                       1000000000ull, 0, &expired);
+   if (ctx->last_submission[ip_type][ring_index].fence.fence) {
+      uint32_t expired;
+      int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence,
+                                             1000000000ull, 0, &expired);
 
-		if (ret || !expired)
-			return false;
-	}
+      if (ret || !expired)
+         return false;
+   }
 
-	return true;
+   return true;
 }
 
-static void *radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
-						const uint32_t *syncobj_override,
-						struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
+static void *
+radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
+                                   const uint32_t *syncobj_override,
+                                   struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
 {
-	const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
-	struct drm_amdgpu_cs_chunk_sem *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * counts->syncobj_count);
-	if (!syncobj)
-		return NULL;
-
-	for (unsigned i = 0; i < counts->syncobj_count; i++) {
-		struct drm_amdgpu_cs_chunk_sem *sem = &syncobj[i];
-		sem->handle = src[i];
-	}
-
-	chunk->chunk_id = chunk_id;
-	chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * counts->syncobj_count;
-	chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
-	return syncobj;
+   const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
+   struct drm_amdgpu_cs_chunk_sem *syncobj =
+      malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * counts->syncobj_count);
+   if (!syncobj)
+      return NULL;
+
+   for (unsigned i = 0; i < counts->syncobj_count; i++) {
+      struct drm_amdgpu_cs_chunk_sem *sem = &syncobj[i];
+      sem->handle = src[i];
+   }
+
+   chunk->chunk_id = chunk_id;
+   chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * counts->syncobj_count;
+   chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
+   return syncobj;
 }
 
 static void *
@@ -1278,442 +1243,431 @@ radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *count
                                             const uint32_t *syncobj_override,
                                             struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
 {
-	const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
-	struct drm_amdgpu_cs_chunk_syncobj *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) *
-	                                                     (counts->syncobj_count + counts->timeline_syncobj_count));
-	if (!syncobj)
-		return NULL;
-
-	for (unsigned i = 0; i < counts->syncobj_count; i++) {
-		struct drm_amdgpu_cs_chunk_syncobj *sem = &syncobj[i];
-		sem->handle = src[i];
-		sem->flags = 0;
-		sem->point = 0;
-	}
-
-	for (unsigned i = 0; i < counts->timeline_syncobj_count; i++) {
-		struct drm_amdgpu_cs_chunk_syncobj *sem = &syncobj[i + counts->syncobj_count];
-		sem->handle = counts->syncobj[i + counts->syncobj_count];
-		sem->flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
-		sem->point = counts->points[i];
-	}
-
-	chunk->chunk_id = chunk_id;
-	chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4 *
-		(counts->syncobj_count + counts->timeline_syncobj_count);
-	chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
-	return syncobj;
+   const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
+   struct drm_amdgpu_cs_chunk_syncobj *syncobj =
+      malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) *
+             (counts->syncobj_count + counts->timeline_syncobj_count));
+   if (!syncobj)
+      return NULL;
+
+   for (unsigned i = 0; i < counts->syncobj_count; i++) {
+      struct drm_amdgpu_cs_chunk_syncobj *sem = &syncobj[i];
+      sem->handle = src[i];
+      sem->flags = 0;
+      sem->point = 0;
+   }
+
+   for (unsigned i = 0; i < counts->timeline_syncobj_count; i++) {
+      struct drm_amdgpu_cs_chunk_syncobj *sem = &syncobj[i + counts->syncobj_count];
+      sem->handle = counts->syncobj[i + counts->syncobj_count];
+      sem->flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+      sem->point = counts->points[i];
+   }
+
+   chunk->chunk_id = chunk_id;
+   chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4 *
+                      (counts->syncobj_count + counts->timeline_syncobj_count);
+   chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
+   return syncobj;
 }
 
-static int radv_amdgpu_cache_alloc_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *dst)
+static int
+radv_amdgpu_cache_alloc_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *dst)
 {
-	pthread_mutex_lock(&ws->syncobj_lock);
-	if (count > ws->syncobj_capacity) {
-		if (ws->syncobj_capacity > UINT32_MAX / 2)
-			goto fail;
-
-		unsigned new_capacity = MAX2(count, ws->syncobj_capacity * 2);
-		uint32_t *n = realloc(ws->syncobj, new_capacity * sizeof(*ws->syncobj));
-		if (!n)
-			goto fail;
-		ws->syncobj_capacity = new_capacity;
-		ws->syncobj = n;
-	}
-
-	while(ws->syncobj_count < count) {
-		int r = amdgpu_cs_create_syncobj(ws->dev, ws->syncobj + ws->syncobj_count);
-		if (r)
-			goto fail;
-		++ws->syncobj_count;
-	}
-
-	for (unsigned i = 0; i < count; ++i)
-		dst[i] = ws->syncobj[--ws->syncobj_count];
-
-	pthread_mutex_unlock(&ws->syncobj_lock);
-	return 0;
+   pthread_mutex_lock(&ws->syncobj_lock);
+   if (count > ws->syncobj_capacity) {
+      if (ws->syncobj_capacity > UINT32_MAX / 2)
+         goto fail;
+
+      unsigned new_capacity = MAX2(count, ws->syncobj_capacity * 2);
+      uint32_t *n = realloc(ws->syncobj, new_capacity * sizeof(*ws->syncobj));
+      if (!n)
+         goto fail;
+      ws->syncobj_capacity = new_capacity;
+      ws->syncobj = n;
+   }
+
+   while (ws->syncobj_count < count) {
+      int r = amdgpu_cs_create_syncobj(ws->dev, ws->syncobj + ws->syncobj_count);
+      if (r)
+         goto fail;
+      ++ws->syncobj_count;
+   }
+
+   for (unsigned i = 0; i < count; ++i)
+      dst[i] = ws->syncobj[--ws->syncobj_count];
+
+   pthread_mutex_unlock(&ws->syncobj_lock);
+   return 0;
 
 fail:
-	pthread_mutex_unlock(&ws->syncobj_lock);
-	return -ENOMEM;
+   pthread_mutex_unlock(&ws->syncobj_lock);
+   return -ENOMEM;
 }
 
-static void radv_amdgpu_cache_free_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *src)
+static void
+radv_amdgpu_cache_free_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *src)
 {
-	pthread_mutex_lock(&ws->syncobj_lock);
-
-	uint32_t cache_count = MIN2(count, UINT32_MAX - ws->syncobj_count);
-	if (cache_count + ws->syncobj_count > ws->syncobj_capacity) {
-		unsigned new_capacity = MAX2(ws->syncobj_count + cache_count, ws->syncobj_capacity * 2);
-		uint32_t* n = realloc(ws->syncobj, new_capacity * sizeof(*ws->syncobj));
-		if (n) {
-			ws->syncobj_capacity = new_capacity;
-			ws->syncobj = n;
-		}
-	}
-
-	for (unsigned i = 0; i < count; ++i) {
-		if (ws->syncobj_count < ws->syncobj_capacity)
-			ws->syncobj[ws->syncobj_count++] = src[i];
-		else
-			amdgpu_cs_destroy_syncobj(ws->dev, src[i]);
-	}
-
-	pthread_mutex_unlock(&ws->syncobj_lock);
-
+   pthread_mutex_lock(&ws->syncobj_lock);
+
+   uint32_t cache_count = MIN2(count, UINT32_MAX - ws->syncobj_count);
+   if (cache_count + ws->syncobj_count > ws->syncobj_capacity) {
+      unsigned new_capacity = MAX2(ws->syncobj_count + cache_count, ws->syncobj_capacity * 2);
+      uint32_t *n = realloc(ws->syncobj, new_capacity * sizeof(*ws->syncobj));
+      if (n) {
+         ws->syncobj_capacity = new_capacity;
+         ws->syncobj = n;
+      }
+   }
+
+   for (unsigned i = 0; i < count; ++i) {
+      if (ws->syncobj_count < ws->syncobj_capacity)
+         ws->syncobj[ws->syncobj_count++] = src[i];
+      else
+         amdgpu_cs_destroy_syncobj(ws->dev, src[i]);
+   }
+
+   pthread_mutex_unlock(&ws->syncobj_lock);
 }
 
-static int radv_amdgpu_cs_prepare_syncobjs(struct radv_amdgpu_winsys *ws,
-                                           struct radv_winsys_sem_counts *counts,
-                                           uint32_t **out_syncobjs)
+static int
+radv_amdgpu_cs_prepare_syncobjs(struct radv_amdgpu_winsys *ws,
+                                struct radv_winsys_sem_counts *counts, uint32_t **out_syncobjs)
 {
-	int r = 0;
-
-	if (!ws->info.has_timeline_syncobj || !counts->syncobj_count) {
-		*out_syncobjs = NULL;
-		return 0;
-	}
-
-	*out_syncobjs = malloc(counts->syncobj_count * sizeof(**out_syncobjs));
-	if (!*out_syncobjs)
-		return -ENOMEM;
-
-	r = radv_amdgpu_cache_alloc_syncobjs(ws, counts->syncobj_count, *out_syncobjs);
-	if (r)
-		return r;
-	
-	for (unsigned i = 0; i < counts->syncobj_count; ++i) {
-		r = amdgpu_cs_syncobj_transfer(ws->dev, (*out_syncobjs)[i], 0, counts->syncobj[i], 0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
-		if (r)
-			goto fail;
-	}
-
-	r = amdgpu_cs_syncobj_reset(ws->dev, counts->syncobj, counts->syncobj_reset_count);
-	if (r)
-		goto fail;
-
-	return 0;
+   int r = 0;
+
+   if (!ws->info.has_timeline_syncobj || !counts->syncobj_count) {
+      *out_syncobjs = NULL;
+      return 0;
+   }
+
+   *out_syncobjs = malloc(counts->syncobj_count * sizeof(**out_syncobjs));
+   if (!*out_syncobjs)
+      return -ENOMEM;
+
+   r = radv_amdgpu_cache_alloc_syncobjs(ws, counts->syncobj_count, *out_syncobjs);
+   if (r)
+      return r;
+
+   for (unsigned i = 0; i < counts->syncobj_count; ++i) {
+      r = amdgpu_cs_syncobj_transfer(ws->dev, (*out_syncobjs)[i], 0, counts->syncobj[i], 0,
+                                     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
+      if (r)
+         goto fail;
+   }
+
+   r = amdgpu_cs_syncobj_reset(ws->dev, counts->syncobj, counts->syncobj_reset_count);
+   if (r)
+      goto fail;
+
+   return 0;
 fail:
-	radv_amdgpu_cache_free_syncobjs(ws, counts->syncobj_count, *out_syncobjs);
-	free(*out_syncobjs);
-	*out_syncobjs = NULL;
-	return r;
+   radv_amdgpu_cache_free_syncobjs(ws, counts->syncobj_count, *out_syncobjs);
+   free(*out_syncobjs);
+   *out_syncobjs = NULL;
+   return r;
 }
 
 static VkResult
-radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
-		      struct radv_amdgpu_cs_request *request,
-		      struct radv_winsys_sem_info *sem_info)
+radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request *request,
+                      struct radv_winsys_sem_info *sem_info)
 {
-	int r;
-	int num_chunks;
-	int size;
-	struct drm_amdgpu_cs_chunk *chunks;
-	struct drm_amdgpu_cs_chunk_data *chunk_data;
-	struct drm_amdgpu_bo_list_in bo_list_in;
-	void *wait_syncobj = NULL, *signal_syncobj = NULL;
-	uint32_t *in_syncobjs = NULL;
-	int i;
-	uint32_t bo_list = 0;
-	VkResult result = VK_SUCCESS;
-
-	size = request->number_of_ibs + 2 /* user fence */ + 4;
-
-	chunks = malloc(sizeof(chunks[0]) * size);
-	if (!chunks)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-	size = request->number_of_ibs + 1 /* user fence */;
-
-	chunk_data = malloc(sizeof(chunk_data[0]) * size);
-	if (!chunk_data) {
-		result = VK_ERROR_OUT_OF_HOST_MEMORY;
-		goto error_out;
-	}
-
-	num_chunks = request->number_of_ibs;
-	for (i = 0; i < request->number_of_ibs; i++) {
-		struct amdgpu_cs_ib_info *ib;
-		chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
-		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
-		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
-
-		ib = &request->ibs[i];
-
-		chunk_data[i].ib_data._pad = 0;
-		chunk_data[i].ib_data.va_start = ib->ib_mc_address;
-		chunk_data[i].ib_data.ib_bytes = ib->size * 4;
-		chunk_data[i].ib_data.ip_type = request->ip_type;
-		chunk_data[i].ib_data.ip_instance = request->ip_instance;
-		chunk_data[i].ib_data.ring = request->ring;
-		chunk_data[i].ib_data.flags = ib->flags;
-	}
-
-	i = num_chunks++;
-	chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
-	chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
-	chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
-
-	struct amdgpu_cs_fence_info fence_info;
-	fence_info.handle = radv_amdgpu_winsys_bo(ctx->fence_bo)->bo;
-	fence_info.offset = (request->ip_type * MAX_RINGS_PER_TYPE + request->ring) * sizeof(uint64_t);
-	amdgpu_cs_chunk_fence_info_to_data(&fence_info, &chunk_data[i]);
-
-	if ((sem_info->wait.syncobj_count || sem_info->wait.timeline_syncobj_count) && sem_info->cs_emit_wait) {
-		r = radv_amdgpu_cs_prepare_syncobjs(ctx->ws, &sem_info->wait, &in_syncobjs);
-		if (r)
-			goto error_out;
-
-		if (ctx->ws->info.has_timeline_syncobj) {
-			wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(&sem_info->wait,
-										   in_syncobjs,
-										   &chunks[num_chunks],
-										   AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT);
-		} else {
-			wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait,
-									  in_syncobjs,
-									  &chunks[num_chunks],
-									  AMDGPU_CHUNK_ID_SYNCOBJ_IN);
-		}
-		if (!wait_syncobj) {
-			result = VK_ERROR_OUT_OF_HOST_MEMORY;
-			goto error_out;
-		}
-		num_chunks++;
-
-		sem_info->cs_emit_wait = false;
-	}
-
-	if ((sem_info->signal.syncobj_count || sem_info->signal.timeline_syncobj_count) && sem_info->cs_emit_signal) {
-		if (ctx->ws->info.has_timeline_syncobj) {
-			signal_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(&sem_info->signal,
-										     NULL,
-										     &chunks[num_chunks],
-										     AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL);
-		} else {
-			signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal,
-									    NULL,
-									    &chunks[num_chunks],
-									    AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
-		}
-		if (!signal_syncobj) {
-			result = VK_ERROR_OUT_OF_HOST_MEMORY;
-			goto error_out;
-		}
-		num_chunks++;
-	}
-
-	/* Standard path passing the buffer list via the CS ioctl. */
-	bo_list_in.operation = ~0;
-	bo_list_in.list_handle = ~0;
-	bo_list_in.bo_number = request->num_handles;
-	bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry);
-	bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)request->handles;
-
-	chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES;
-	chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4;
-	chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in;
-	num_chunks++;
-
-	r = amdgpu_cs_submit_raw2(ctx->ws->dev,
-				 ctx->ctx,
-				 bo_list,
-				 num_chunks,
-				 chunks,
-				 &request->seq_no);
-
-	if (r) {
-		if (r == -ENOMEM) {
-			fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
-			result = VK_ERROR_OUT_OF_HOST_MEMORY;
-		} else if (r == -ECANCELED) {
-			fprintf(stderr, "amdgpu: The CS has been cancelled because the context is lost.\n");
-			result = VK_ERROR_DEVICE_LOST;
-		} else {
-			fprintf(stderr, "amdgpu: The CS has been rejected, "
-					"see dmesg for more information (%i).\n", r);
-			result = VK_ERROR_UNKNOWN;
-		}
-	}
-
-	if (bo_list)
-		amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list);
+   int r;
+   int num_chunks;
+   int size;
+   struct drm_amdgpu_cs_chunk *chunks;
+   struct drm_amdgpu_cs_chunk_data *chunk_data;
+   struct drm_amdgpu_bo_list_in bo_list_in;
+   void *wait_syncobj = NULL, *signal_syncobj = NULL;
+   uint32_t *in_syncobjs = NULL;
+   int i;
+   uint32_t bo_list = 0;
+   VkResult result = VK_SUCCESS;
+
+   size = request->number_of_ibs + 2 /* user fence */ + 4;
+
+   chunks = malloc(sizeof(chunks[0]) * size);
+   if (!chunks)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   size = request->number_of_ibs + 1 /* user fence */;
+
+   chunk_data = malloc(sizeof(chunk_data[0]) * size);
+   if (!chunk_data) {
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto error_out;
+   }
+
+   num_chunks = request->number_of_ibs;
+   for (i = 0; i < request->number_of_ibs; i++) {
+      struct amdgpu_cs_ib_info *ib;
+      chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
+      chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+      chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
+
+      ib = &request->ibs[i];
+
+      chunk_data[i].ib_data._pad = 0;
+      chunk_data[i].ib_data.va_start = ib->ib_mc_address;
+      chunk_data[i].ib_data.ib_bytes = ib->size * 4;
+      chunk_data[i].ib_data.ip_type = request->ip_type;
+      chunk_data[i].ib_data.ip_instance = request->ip_instance;
+      chunk_data[i].ib_data.ring = request->ring;
+      chunk_data[i].ib_data.flags = ib->flags;
+   }
+
+   i = num_chunks++;
+   chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
+   chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
+   chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
+
+   struct amdgpu_cs_fence_info fence_info;
+   fence_info.handle = radv_amdgpu_winsys_bo(ctx->fence_bo)->bo;
+   fence_info.offset = (request->ip_type * MAX_RINGS_PER_TYPE + request->ring) * sizeof(uint64_t);
+   amdgpu_cs_chunk_fence_info_to_data(&fence_info, &chunk_data[i]);
+
+   if ((sem_info->wait.syncobj_count || sem_info->wait.timeline_syncobj_count) &&
+       sem_info->cs_emit_wait) {
+      r = radv_amdgpu_cs_prepare_syncobjs(ctx->ws, &sem_info->wait, &in_syncobjs);
+      if (r)
+         goto error_out;
+
+      if (ctx->ws->info.has_timeline_syncobj) {
+         wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(
+            &sem_info->wait, in_syncobjs, &chunks[num_chunks],
+            AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT);
+      } else {
+         wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(
+            &sem_info->wait, in_syncobjs, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_IN);
+      }
+      if (!wait_syncobj) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto error_out;
+      }
+      num_chunks++;
+
+      sem_info->cs_emit_wait = false;
+   }
+
+   if ((sem_info->signal.syncobj_count || sem_info->signal.timeline_syncobj_count) &&
+       sem_info->cs_emit_signal) {
+      if (ctx->ws->info.has_timeline_syncobj) {
+         signal_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(
+            &sem_info->signal, NULL, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL);
+      } else {
+         signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(
+            &sem_info->signal, NULL, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
+      }
+      if (!signal_syncobj) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto error_out;
+      }
+      num_chunks++;
+   }
+
+   /* Standard path passing the buffer list via the CS ioctl. */
+   bo_list_in.operation = ~0;
+   bo_list_in.list_handle = ~0;
+   bo_list_in.bo_number = request->num_handles;
+   bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry);
+   bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)request->handles;
+
+   chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES;
+   chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4;
+   chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in;
+   num_chunks++;
+
+   r = amdgpu_cs_submit_raw2(ctx->ws->dev, ctx->ctx, bo_list, num_chunks, chunks, &request->seq_no);
+
+   if (r) {
+      if (r == -ENOMEM) {
+         fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      } else if (r == -ECANCELED) {
+         fprintf(stderr, "amdgpu: The CS has been cancelled because the context is lost.\n");
+         result = VK_ERROR_DEVICE_LOST;
+      } else {
+         fprintf(stderr,
+                 "amdgpu: The CS has been rejected, "
+                 "see dmesg for more information (%i).\n",
+                 r);
+         result = VK_ERROR_UNKNOWN;
+      }
+   }
+
+   if (bo_list)
+      amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list);
 
 error_out:
-	if (in_syncobjs) {
-		radv_amdgpu_cache_free_syncobjs(ctx->ws, sem_info->wait.syncobj_count, in_syncobjs);
-		free(in_syncobjs);
-	}
-	free(chunks);
-	free(chunk_data);
-	free(wait_syncobj);
-	free(signal_syncobj);
-	return result;
+   if (in_syncobjs) {
+      radv_amdgpu_cache_free_syncobjs(ctx->ws, sem_info->wait.syncobj_count, in_syncobjs);
+      free(in_syncobjs);
+   }
+   free(chunks);
+   free(chunk_data);
+   free(wait_syncobj);
+   free(signal_syncobj);
+   return result;
 }
 
-static int radv_amdgpu_create_syncobj(struct radeon_winsys *_ws,
-				      bool create_signaled,
-				      uint32_t *handle)
+static int
+radv_amdgpu_create_syncobj(struct radeon_winsys *_ws, bool create_signaled, uint32_t *handle)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	uint32_t flags = 0;
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   uint32_t flags = 0;
 
-	if (create_signaled)
-		flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
+   if (create_signaled)
+      flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
 
-	return amdgpu_cs_create_syncobj2(ws->dev, flags, handle);
+   return amdgpu_cs_create_syncobj2(ws->dev, flags, handle);
 }
 
-static void radv_amdgpu_destroy_syncobj(struct radeon_winsys *_ws,
-				    uint32_t handle)
+static void
+radv_amdgpu_destroy_syncobj(struct radeon_winsys *_ws, uint32_t handle)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	amdgpu_cs_destroy_syncobj(ws->dev, handle);
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   amdgpu_cs_destroy_syncobj(ws->dev, handle);
 }
 
-static void radv_amdgpu_reset_syncobj(struct radeon_winsys *_ws,
-				    uint32_t handle)
+static void
+radv_amdgpu_reset_syncobj(struct radeon_winsys *_ws, uint32_t handle)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	amdgpu_cs_syncobj_reset(ws->dev, &handle, 1);
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   amdgpu_cs_syncobj_reset(ws->dev, &handle, 1);
 }
 
-static void radv_amdgpu_signal_syncobj(struct radeon_winsys *_ws,
-				    uint32_t handle, uint64_t point)
+static void
+radv_amdgpu_signal_syncobj(struct radeon_winsys *_ws, uint32_t handle, uint64_t point)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	if (point)
-		amdgpu_cs_syncobj_timeline_signal(ws->dev, &handle, &point, 1);
-	else
-		amdgpu_cs_syncobj_signal(ws->dev, &handle, 1);
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   if (point)
+      amdgpu_cs_syncobj_timeline_signal(ws->dev, &handle, &point, 1);
+   else
+      amdgpu_cs_syncobj_signal(ws->dev, &handle, 1);
 }
 
-static VkResult radv_amdgpu_query_syncobj(struct radeon_winsys *_ws,
-                                      uint32_t handle, uint64_t *point)
+static VkResult
+radv_amdgpu_query_syncobj(struct radeon_winsys *_ws, uint32_t handle, uint64_t *point)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	int ret = amdgpu_cs_syncobj_query(ws->dev, &handle, point, 1);
-	if (ret == 0)
-		return VK_SUCCESS;
-	else if (ret == -ENOMEM)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
-	else {
-		/* Remaining error are driver internal issues: EFAULT for
-		 * dangling pointers and ENOENT for non-existing syncobj. */
-		fprintf(stderr, "amdgpu: internal error in radv_amdgpu_query_syncobj. (%d)\n", ret);
-		return VK_ERROR_UNKNOWN;
-	}
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   int ret = amdgpu_cs_syncobj_query(ws->dev, &handle, point, 1);
+   if (ret == 0)
+      return VK_SUCCESS;
+   else if (ret == -ENOMEM)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+   else {
+      /* Remaining error are driver internal issues: EFAULT for
+       * dangling pointers and ENOENT for non-existing syncobj. */
+      fprintf(stderr, "amdgpu: internal error in radv_amdgpu_query_syncobj. (%d)\n", ret);
+      return VK_ERROR_UNKNOWN;
+   }
 }
 
-static bool radv_amdgpu_wait_syncobj(struct radeon_winsys *_ws, const uint32_t *handles,
-                                     uint32_t handle_count, bool wait_all, uint64_t timeout)
+static bool
+radv_amdgpu_wait_syncobj(struct radeon_winsys *_ws, const uint32_t *handles, uint32_t handle_count,
+                         bool wait_all, uint64_t timeout)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	uint32_t tmp;
-
-	/* The timeouts are signed, while vulkan timeouts are unsigned. */
-	timeout = MIN2(timeout, INT64_MAX);
-
-	int ret = amdgpu_cs_syncobj_wait(ws->dev, (uint32_t*)handles, handle_count, timeout,
-					 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
-					 (wait_all ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL : 0),
-					 &tmp);
-	if (ret == 0) {
-		return true;
-	} else if (ret == -ETIME) {
-		return false;
-	} else {
-		fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno);
-		return false;
-	}
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   uint32_t tmp;
+
+   /* The timeouts are signed, while vulkan timeouts are unsigned. */
+   timeout = MIN2(timeout, INT64_MAX);
+
+   int ret = amdgpu_cs_syncobj_wait(
+      ws->dev, (uint32_t *)handles, handle_count, timeout,
+      DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | (wait_all ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL : 0),
+      &tmp);
+   if (ret == 0) {
+      return true;
+   } else if (ret == -ETIME) {
+      return false;
+   } else {
+      fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno);
+      return false;
+   }
 }
 
-static bool radv_amdgpu_wait_timeline_syncobj(struct radeon_winsys *_ws, const uint32_t *handles,
-                                              const uint64_t *points, uint32_t handle_count,
-                                              bool wait_all, bool available, uint64_t timeout)
+static bool
+radv_amdgpu_wait_timeline_syncobj(struct radeon_winsys *_ws, const uint32_t *handles,
+                                  const uint64_t *points, uint32_t handle_count, bool wait_all,
+                                  bool available, uint64_t timeout)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-
-	/* The timeouts are signed, while vulkan timeouts are unsigned. */
-	timeout = MIN2(timeout, INT64_MAX);
-
-	int ret = amdgpu_cs_syncobj_timeline_wait(ws->dev, (uint32_t*)handles, (uint64_t*)points,
-	                                          handle_count, timeout,
-	                                          DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
-	                                          (wait_all ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL : 0) |
-	                                          (available ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE : 0),
-	                                          NULL);
-	if (ret == 0) {
-		return true;
-	} else if (ret == -ETIME) {
-		return false;
-	} else {
-		fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed! (%d)\n", errno);
-		return false;
-	}
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+
+   /* The timeouts are signed, while vulkan timeouts are unsigned. */
+   timeout = MIN2(timeout, INT64_MAX);
+
+   int ret = amdgpu_cs_syncobj_timeline_wait(
+      ws->dev, (uint32_t *)handles, (uint64_t *)points, handle_count, timeout,
+      DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | (wait_all ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL : 0) |
+         (available ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE : 0),
+      NULL);
+   if (ret == 0) {
+      return true;
+   } else if (ret == -ETIME) {
+      return false;
+   } else {
+      fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed! (%d)\n", errno);
+      return false;
+   }
 }
 
-
-static int radv_amdgpu_export_syncobj(struct radeon_winsys *_ws,
-				      uint32_t syncobj,
-				      int *fd)
+static int
+radv_amdgpu_export_syncobj(struct radeon_winsys *_ws, uint32_t syncobj, int *fd)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
 
-	return amdgpu_cs_export_syncobj(ws->dev, syncobj, fd);
+   return amdgpu_cs_export_syncobj(ws->dev, syncobj, fd);
 }
 
-static int radv_amdgpu_import_syncobj(struct radeon_winsys *_ws,
-				      int fd,
-				      uint32_t *syncobj)
+static int
+radv_amdgpu_import_syncobj(struct radeon_winsys *_ws, int fd, uint32_t *syncobj)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
 
-	return amdgpu_cs_import_syncobj(ws->dev, fd, syncobj);
+   return amdgpu_cs_import_syncobj(ws->dev, fd, syncobj);
 }
 
-
-static int radv_amdgpu_export_syncobj_to_sync_file(struct radeon_winsys *_ws,
-                                                   uint32_t syncobj,
-                                                   int *fd)
+static int
+radv_amdgpu_export_syncobj_to_sync_file(struct radeon_winsys *_ws, uint32_t syncobj, int *fd)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
 
-	return amdgpu_cs_syncobj_export_sync_file(ws->dev, syncobj, fd);
+   return amdgpu_cs_syncobj_export_sync_file(ws->dev, syncobj, fd);
 }
 
-static int radv_amdgpu_import_syncobj_from_sync_file(struct radeon_winsys *_ws,
-                                                     uint32_t syncobj,
-                                                     int fd)
+static int
+radv_amdgpu_import_syncobj_from_sync_file(struct radeon_winsys *_ws, uint32_t syncobj, int fd)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
 
-	return amdgpu_cs_syncobj_import_sync_file(ws->dev, syncobj, fd);
+   return amdgpu_cs_syncobj_import_sync_file(ws->dev, syncobj, fd);
 }
 
-void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
+void
+radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
 {
-	ws->base.ctx_create = radv_amdgpu_ctx_create;
-	ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
-	ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
-	ws->base.cs_create = radv_amdgpu_cs_create;
-	ws->base.cs_destroy = radv_amdgpu_cs_destroy;
-	ws->base.cs_grow = radv_amdgpu_cs_grow;
-	ws->base.cs_finalize = radv_amdgpu_cs_finalize;
-	ws->base.cs_reset = radv_amdgpu_cs_reset;
-	ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
-	ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
-	ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
-	ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
-	ws->base.create_syncobj = radv_amdgpu_create_syncobj;
-	ws->base.destroy_syncobj = radv_amdgpu_destroy_syncobj;
-	ws->base.reset_syncobj = radv_amdgpu_reset_syncobj;
-	ws->base.signal_syncobj = radv_amdgpu_signal_syncobj;
-	ws->base.query_syncobj = radv_amdgpu_query_syncobj;
-	ws->base.wait_syncobj = radv_amdgpu_wait_syncobj;
-	ws->base.wait_timeline_syncobj = radv_amdgpu_wait_timeline_syncobj;
-	ws->base.export_syncobj = radv_amdgpu_export_syncobj;
-	ws->base.import_syncobj = radv_amdgpu_import_syncobj;
-	ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file;
-	ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file;
+   ws->base.ctx_create = radv_amdgpu_ctx_create;
+   ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
+   ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
+   ws->base.cs_create = radv_amdgpu_cs_create;
+   ws->base.cs_destroy = radv_amdgpu_cs_destroy;
+   ws->base.cs_grow = radv_amdgpu_cs_grow;
+   ws->base.cs_finalize = radv_amdgpu_cs_finalize;
+   ws->base.cs_reset = radv_amdgpu_cs_reset;
+   ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
+   ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
+   ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
+   ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
+   ws->base.create_syncobj = radv_amdgpu_create_syncobj;
+   ws->base.destroy_syncobj = radv_amdgpu_destroy_syncobj;
+   ws->base.reset_syncobj = radv_amdgpu_reset_syncobj;
+   ws->base.signal_syncobj = radv_amdgpu_signal_syncobj;
+   ws->base.query_syncobj = radv_amdgpu_query_syncobj;
+   ws->base.wait_syncobj = radv_amdgpu_wait_syncobj;
+   ws->base.wait_timeline_syncobj = radv_amdgpu_wait_timeline_syncobj;
+   ws->base.export_syncobj = radv_amdgpu_export_syncobj;
+   ws->base.import_syncobj = radv_amdgpu_import_syncobj;
+   ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file;
+   ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file;
 }
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
index 135d4faf943..a901afbf5d8 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
@@ -29,37 +29,34 @@
 #ifndef RADV_AMDGPU_CS_H
 #define RADV_AMDGPU_CS_H
 
-#include <string.h>
-#include <stdint.h>
-#include <assert.h>
 #include <amdgpu.h>
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
 
-#include "radv_radeon_winsys.h"
 #include "radv_amdgpu_winsys.h"
+#include "radv_radeon_winsys.h"
 
-enum {
-	MAX_RINGS_PER_TYPE = 8
-};
-
+enum { MAX_RINGS_PER_TYPE = 8 };
 
 struct radv_amdgpu_fence {
-	struct amdgpu_cs_fence fence;
-	volatile uint64_t *user_ptr;
+   struct amdgpu_cs_fence fence;
+   volatile uint64_t *user_ptr;
 };
 
 struct radv_amdgpu_ctx {
-	struct radv_amdgpu_winsys *ws;
-	amdgpu_context_handle ctx;
-	struct radv_amdgpu_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
+   struct radv_amdgpu_winsys *ws;
+   amdgpu_context_handle ctx;
+   struct radv_amdgpu_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
 
-	struct radeon_winsys_bo *fence_bo;
-	uint64_t *fence_map;
+   struct radeon_winsys_bo *fence_bo;
+   uint64_t *fence_map;
 };
 
 static inline struct radv_amdgpu_ctx *
 radv_amdgpu_ctx(struct radeon_winsys_ctx *base)
 {
-	return (struct radv_amdgpu_ctx *)base;
+   return (struct radv_amdgpu_ctx *)base;
 }
 
 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws);
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
index 598baa2addc..d1f0cd6cbc6 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
@@ -28,77 +28,77 @@
 
 #include <errno.h>
 
-#include "radv_private.h"
 #include "util/bitset.h"
-#include "radv_amdgpu_winsys.h"
 #include "radv_amdgpu_surface.h"
+#include "radv_amdgpu_winsys.h"
+#include "radv_private.h"
 #include "sid.h"
 
 #include "ac_surface.h"
 
-static int radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info,
-				      const struct radeon_surf *surf)
+static int
+radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info, const struct radeon_surf *surf)
 {
-	unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
+   unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
 
-	if (!surf->blk_w || !surf->blk_h)
-		return -EINVAL;
+   if (!surf->blk_w || !surf->blk_h)
+      return -EINVAL;
 
-	switch (type) {
-	case RADEON_SURF_TYPE_1D:
-		if (surf_info->height > 1)
-			return -EINVAL;
-		/* fall through */
-	case RADEON_SURF_TYPE_2D:
-	case RADEON_SURF_TYPE_CUBEMAP:
-		if (surf_info->depth > 1 || surf_info->array_size > 1)
-			return -EINVAL;
-		break;
-	case RADEON_SURF_TYPE_3D:
-		if (surf_info->array_size > 1)
-			return -EINVAL;
-		break;
-	case RADEON_SURF_TYPE_1D_ARRAY:
-		if (surf_info->height > 1)
-			return -EINVAL;
-		/* fall through */
-	case RADEON_SURF_TYPE_2D_ARRAY:
-		if (surf_info->depth > 1)
-			return -EINVAL;
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
+   switch (type) {
+   case RADEON_SURF_TYPE_1D:
+      if (surf_info->height > 1)
+         return -EINVAL;
+      /* fall through */
+   case RADEON_SURF_TYPE_2D:
+   case RADEON_SURF_TYPE_CUBEMAP:
+      if (surf_info->depth > 1 || surf_info->array_size > 1)
+         return -EINVAL;
+      break;
+   case RADEON_SURF_TYPE_3D:
+      if (surf_info->array_size > 1)
+         return -EINVAL;
+      break;
+   case RADEON_SURF_TYPE_1D_ARRAY:
+      if (surf_info->height > 1)
+         return -EINVAL;
+      /* fall through */
+   case RADEON_SURF_TYPE_2D_ARRAY:
+      if (surf_info->depth > 1)
+         return -EINVAL;
+      break;
+   default:
+      return -EINVAL;
+   }
+   return 0;
 }
 
-static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
-					   const struct ac_surf_info *surf_info,
-					   struct radeon_surf *surf)
+static int
+radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, const struct ac_surf_info *surf_info,
+                                struct radeon_surf *surf)
 {
-	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	unsigned mode, type;
-	int r;
+   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+   unsigned mode, type;
+   int r;
 
-	r = radv_amdgpu_surface_sanity(surf_info, surf);
-	if (r)
-		return r;
+   r = radv_amdgpu_surface_sanity(surf_info, surf);
+   if (r)
+      return r;
 
-	type = RADEON_SURF_GET(surf->flags, TYPE);
-	mode = RADEON_SURF_GET(surf->flags, MODE);
+   type = RADEON_SURF_GET(surf->flags, TYPE);
+   mode = RADEON_SURF_GET(surf->flags, MODE);
 
-	struct ac_surf_config config;
+   struct ac_surf_config config;
 
-	memcpy(&config.info, surf_info, sizeof(config.info));
-	config.is_1d = type == RADEON_SURF_TYPE_1D ||
-		       type == RADEON_SURF_TYPE_1D_ARRAY;
-	config.is_3d = type == RADEON_SURF_TYPE_3D;
-	config.is_cube = type == RADEON_SURF_TYPE_CUBEMAP;
+   memcpy(&config.info, surf_info, sizeof(config.info));
+   config.is_1d = type == RADEON_SURF_TYPE_1D || type == RADEON_SURF_TYPE_1D_ARRAY;
+   config.is_3d = type == RADEON_SURF_TYPE_3D;
+   config.is_cube = type == RADEON_SURF_TYPE_CUBEMAP;
 
-	return ac_compute_surface(ws->addrlib, &ws->info, &config, mode, surf);
+   return ac_compute_surface(ws->addrlib, &ws->info, &config, mode, surf);
 }
 
-void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws)
+void
+radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws)
 {
-	ws->base.surface_init = radv_amdgpu_winsys_surface_init;
+   ws->base.surface_init = radv_amdgpu_winsys_surface_init;
 }
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h
index a5652a32570..90bc2b97883 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h
@@ -27,6 +27,8 @@
 
 #include <amdgpu.h>
 
+struct radv_amdgpu_winsys;
+
 void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws);
 
 #endif /* RADV_AMDGPU_SURFACE_H */
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
index 0a15b6bc287..ad5953aeb11 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
@@ -25,255 +25,246 @@
  * IN THE SOFTWARE.
  */
 #include "radv_amdgpu_winsys.h"
-#include "radv_amdgpu_winsys_public.h"
-#include "radv_amdgpu_surface.h"
-#include "radv_debug.h"
-#include "ac_surface.h"
-#include "xf86drm.h"
+#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "drm-uapi/amdgpu_drm.h"
-#include <assert.h>
-#include "radv_amdgpu_cs.h"
+#include "ac_surface.h"
 #include "radv_amdgpu_bo.h"
+#include "radv_amdgpu_cs.h"
 #include "radv_amdgpu_surface.h"
+#include "radv_amdgpu_winsys_public.h"
+#include "radv_debug.h"
+#include "xf86drm.h"
 
 static bool
 do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
 {
-	if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
-		return false;
-
-	if (ws->info.drm_minor < 35) {
-		fprintf(stderr, "radv: DRM 3.35+ is required (Linux kernel 4.15+)\n");
-		return false;
-	}
-
-	/* LLVM 11 is required for GFX10.3. */
-	if (ws->info.chip_class == GFX10_3 && ws->use_llvm && LLVM_VERSION_MAJOR < 11) {
-		fprintf(stderr, "radv: GFX 10.3 requires LLVM 11 or higher\n");
-		return false;
-	}
-
-	/* LLVM 9.0 is required for GFX10. */
-	if (ws->info.chip_class == GFX10 && ws->use_llvm && LLVM_VERSION_MAJOR < 9) {
-		fprintf(stderr, "radv: Navi family support requires LLVM 9 or higher\n");
-		return false;
-	}
-
-	ws->addrlib = ac_addrlib_create(&ws->info, &ws->info.max_alignment);
-	if (!ws->addrlib) {
-		fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
-		return false;
-	}
-
-	ws->info.num_rings[RING_DMA] = MIN2(ws->info.num_rings[RING_DMA], MAX_RINGS_PER_TYPE);
-	ws->info.num_rings[RING_COMPUTE] = MIN2(ws->info.num_rings[RING_COMPUTE], MAX_RINGS_PER_TYPE);
-
-	ws->use_ib_bos = ws->info.chip_class >= GFX7;
-	return true;
+   if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
+      return false;
+
+   if (ws->info.drm_minor < 35) {
+      fprintf(stderr, "radv: DRM 3.35+ is required (Linux kernel 4.15+)\n");
+      return false;
+   }
+
+   /* LLVM 11 is required for GFX10.3. */
+   if (ws->info.chip_class == GFX10_3 && ws->use_llvm && LLVM_VERSION_MAJOR < 11) {
+      fprintf(stderr, "radv: GFX 10.3 requires LLVM 11 or higher\n");
+      return false;
+   }
+
+   /* LLVM 9.0 is required for GFX10. */
+   if (ws->info.chip_class == GFX10 && ws->use_llvm && LLVM_VERSION_MAJOR < 9) {
+      fprintf(stderr, "radv: Navi family support requires LLVM 9 or higher\n");
+      return false;
+   }
+
+   ws->addrlib = ac_addrlib_create(&ws->info, &ws->info.max_alignment);
+   if (!ws->addrlib) {
+      fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
+      return false;
+   }
+
+   ws->info.num_rings[RING_DMA] = MIN2(ws->info.num_rings[RING_DMA], MAX_RINGS_PER_TYPE);
+   ws->info.num_rings[RING_COMPUTE] = MIN2(ws->info.num_rings[RING_COMPUTE], MAX_RINGS_PER_TYPE);
+
+   ws->use_ib_bos = ws->info.chip_class >= GFX7;
+   return true;
 }
 
-static void radv_amdgpu_winsys_query_info(struct radeon_winsys *rws,
-                                     struct radeon_info *info)
+static void
+radv_amdgpu_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
 {
-	*info = ((struct radv_amdgpu_winsys *)rws)->info;
+   *info = ((struct radv_amdgpu_winsys *)rws)->info;
 }
 
-static uint64_t radv_amdgpu_winsys_query_value(struct radeon_winsys *rws,
-					       enum radeon_value_id value)
+static uint64_t
+radv_amdgpu_winsys_query_value(struct radeon_winsys *rws, enum radeon_value_id value)
 {
-	struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
-	struct amdgpu_heap_info heap;
-	uint64_t retval = 0;
-
-	switch (value) {
-	case RADEON_ALLOCATED_VRAM:
-		return ws->allocated_vram;
-	case RADEON_ALLOCATED_VRAM_VIS:
-		return ws->allocated_vram_vis;
-	case RADEON_ALLOCATED_GTT:
-		return ws->allocated_gtt;
-	case RADEON_TIMESTAMP:
-		amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
-		return retval;
-	case RADEON_NUM_BYTES_MOVED:
-		amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED,
-				  8, &retval);
-		return retval;
-	case RADEON_NUM_EVICTIONS:
-		amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS,
-				  8, &retval);
-		return retval;
-	case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
-		amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS,
-				  8, &retval);
-		return retval;
-	case RADEON_VRAM_USAGE:
-		amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
-				       0, &heap);
-		return heap.heap_usage;
-	case RADEON_VRAM_VIS_USAGE:
-		amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
-				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
-				       &heap);
-		return heap.heap_usage;
-	case RADEON_GTT_USAGE:
-		amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT,
-				       0, &heap);
-		return heap.heap_usage;
-	case RADEON_GPU_TEMPERATURE:
-		amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GPU_TEMP,
-					 4, &retval);
-		return retval;
-	case RADEON_CURRENT_SCLK:
-		amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_SCLK,
-					 4, &retval);
-		return retval;
-	case RADEON_CURRENT_MCLK:
-		amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_MCLK,
-					 4, &retval);
-		return retval;
-	default:
-		unreachable("invalid query value");
-	}
-
-	return 0;
+   struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
+   struct amdgpu_heap_info heap;
+   uint64_t retval = 0;
+
+   switch (value) {
+   case RADEON_ALLOCATED_VRAM:
+      return ws->allocated_vram;
+   case RADEON_ALLOCATED_VRAM_VIS:
+      return ws->allocated_vram_vis;
+   case RADEON_ALLOCATED_GTT:
+      return ws->allocated_gtt;
+   case RADEON_TIMESTAMP:
+      amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
+      return retval;
+   case RADEON_NUM_BYTES_MOVED:
+      amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval);
+      return retval;
+   case RADEON_NUM_EVICTIONS:
+      amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS, 8, &retval);
+      return retval;
+   case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
+      amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS, 8, &retval);
+      return retval;
+   case RADEON_VRAM_USAGE:
+      amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap);
+      return heap.heap_usage;
+   case RADEON_VRAM_VIS_USAGE:
+      amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+                             &heap);
+      return heap.heap_usage;
+   case RADEON_GTT_USAGE:
+      amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
+      return heap.heap_usage;
+   case RADEON_GPU_TEMPERATURE:
+      amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GPU_TEMP, 4, &retval);
+      return retval;
+   case RADEON_CURRENT_SCLK:
+      amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_SCLK, 4, &retval);
+      return retval;
+   case RADEON_CURRENT_MCLK:
+      amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_MCLK, 4, &retval);
+      return retval;
+   default:
+      unreachable("invalid query value");
+   }
+
+   return 0;
 }
 
-static bool radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws,
-					      unsigned reg_offset,
-					      unsigned num_registers, uint32_t *out)
+static bool
+radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws, unsigned reg_offset,
+                                  unsigned num_registers, uint32_t *out)
 {
-	struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws;
+   struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
 
-	return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers,
-					0xffffffff, 0, out) == 0;
+   return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers, 0xffffffff, 0, out) == 0;
 }
 
-static const char *radv_amdgpu_winsys_get_chip_name(struct radeon_winsys *rws)
+static const char *
+radv_amdgpu_winsys_get_chip_name(struct radeon_winsys *rws)
 {
-	amdgpu_device_handle dev = ((struct radv_amdgpu_winsys *)rws)->dev;
+   amdgpu_device_handle dev = ((struct radv_amdgpu_winsys *)rws)->dev;
 
-	return amdgpu_get_marketing_name(dev);
+   return amdgpu_get_marketing_name(dev);
 }
 
 static simple_mtx_t winsys_creation_mutex = _SIMPLE_MTX_INITIALIZER_NP;
 static struct hash_table *winsyses = NULL;
 
-static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
+static void
+radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
 {
-	struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws;
-	bool destroy = false;
-
-	simple_mtx_lock(&winsys_creation_mutex);
-	if (!--ws->refcount) {
-		_mesa_hash_table_remove_key(winsyses, ws->dev);
-
-		/* Clean the hashtable up if empty, though there is no
-		 * empty function. */
-		if (_mesa_hash_table_num_entries(winsyses) == 0) {
-			_mesa_hash_table_destroy(winsyses, NULL);
-			winsyses = NULL;
-		}
-
-		destroy = true;
-	}
-	simple_mtx_unlock(&winsys_creation_mutex);
-	if (!destroy)
-		return;
-
-	for (unsigned i = 0; i < ws->syncobj_count; ++i)
-		amdgpu_cs_destroy_syncobj(ws->dev, ws->syncobj[i]);
-	free(ws->syncobj);
-
-	u_rwlock_destroy(&ws->global_bo_list.lock);
-	free(ws->global_bo_list.bos);
-
-	pthread_mutex_destroy(&ws->syncobj_lock);
-	u_rwlock_destroy(&ws->log_bo_list_lock);
-	ac_addrlib_destroy(ws->addrlib);
-	amdgpu_device_deinitialize(ws->dev);
-	FREE(rws);
+   struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
+   bool destroy = false;
+
+   simple_mtx_lock(&winsys_creation_mutex);
+   if (!--ws->refcount) {
+      _mesa_hash_table_remove_key(winsyses, ws->dev);
+
+      /* Clean the hashtable up if empty, though there is no
+       * empty function. */
+      if (_mesa_hash_table_num_entries(winsyses) == 0) {
+         _mesa_hash_table_destroy(winsyses, NULL);
+         winsyses = NULL;
+      }
+
+      destroy = true;
+   }
+   simple_mtx_unlock(&winsys_creation_mutex);
+   if (!destroy)
+      return;
+
+   for (unsigned i = 0; i < ws->syncobj_count; ++i)
+      amdgpu_cs_destroy_syncobj(ws->dev, ws->syncobj[i]);
+   free(ws->syncobj);
+
+   u_rwlock_destroy(&ws->global_bo_list.lock);
+   free(ws->global_bo_list.bos);
+
+   pthread_mutex_destroy(&ws->syncobj_lock);
+   u_rwlock_destroy(&ws->log_bo_list_lock);
+   ac_addrlib_destroy(ws->addrlib);
+   amdgpu_device_deinitialize(ws->dev);
+   FREE(rws);
 }
 
 struct radeon_winsys *
 radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags)
 {
-	uint32_t drm_major, drm_minor, r;
-	amdgpu_device_handle dev;
-	struct radv_amdgpu_winsys *ws = NULL;
-
-	r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev);
-	if (r)
-		return NULL;
-
-	/* We have to keep this lock till insertion. */
-	simple_mtx_lock(&winsys_creation_mutex);
-	if (!winsyses)
-		winsyses = _mesa_pointer_hash_table_create(NULL);
-	if (!winsyses)
-		goto fail;
-
-	struct hash_entry *entry = _mesa_hash_table_search(winsyses, dev);
-	if (entry) {
-		ws = (struct radv_amdgpu_winsys *)entry->data;
-		++ws->refcount;
-	}
-
-	if (ws) {
-		simple_mtx_unlock(&winsys_creation_mutex);
-		amdgpu_device_deinitialize(dev);
-		return &ws->base;
-	}
-
-	ws = calloc(1, sizeof(struct radv_amdgpu_winsys));
-	if (!ws)
-		goto fail;
-
-	ws->refcount = 1;
-	ws->dev = dev;
-	ws->info.drm_major = drm_major;
-	ws->info.drm_minor = drm_minor;
-	if (!do_winsys_init(ws, fd))
-		goto winsys_fail;
-
-	ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
-	ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG;
-	if (debug_flags & RADV_DEBUG_NO_IBS)
-		ws->use_ib_bos = false;
-
-	ws->use_local_bos = perftest_flags & RADV_PERFTEST_LOCAL_BOS;
-	ws->zero_all_vram_allocs = debug_flags & RADV_DEBUG_ZERO_VRAM;
-	ws->use_llvm = debug_flags & RADV_DEBUG_LLVM;
-	ws->cs_bo_domain = radv_cmdbuffer_domain(&ws->info, perftest_flags);
-	u_rwlock_init(&ws->global_bo_list.lock);
-	list_inithead(&ws->log_bo_list);
-	u_rwlock_init(&ws->log_bo_list_lock);
-	pthread_mutex_init(&ws->syncobj_lock, NULL);
-	ws->base.query_info = radv_amdgpu_winsys_query_info;
-	ws->base.query_value = radv_amdgpu_winsys_query_value;
-	ws->base.read_registers = radv_amdgpu_winsys_read_registers;
-	ws->base.get_chip_name = radv_amdgpu_winsys_get_chip_name;
-	ws->base.destroy = radv_amdgpu_winsys_destroy;
-	radv_amdgpu_bo_init_functions(ws);
-	radv_amdgpu_cs_init_functions(ws);
-	radv_amdgpu_surface_init_functions(ws);
-
-	_mesa_hash_table_insert(winsyses, dev, ws);
-	simple_mtx_unlock(&winsys_creation_mutex);
-
-	return &ws->base;
+   uint32_t drm_major, drm_minor, r;
+   amdgpu_device_handle dev;
+   struct radv_amdgpu_winsys *ws = NULL;
+
+   r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev);
+   if (r)
+      return NULL;
+
+   /* We have to keep this lock till insertion. */
+   simple_mtx_lock(&winsys_creation_mutex);
+   if (!winsyses)
+      winsyses = _mesa_pointer_hash_table_create(NULL);
+   if (!winsyses)
+      goto fail;
+
+   struct hash_entry *entry = _mesa_hash_table_search(winsyses, dev);
+   if (entry) {
+      ws = (struct radv_amdgpu_winsys *)entry->data;
+      ++ws->refcount;
+   }
+
+   if (ws) {
+      simple_mtx_unlock(&winsys_creation_mutex);
+      amdgpu_device_deinitialize(dev);
+      return &ws->base;
+   }
+
+   ws = calloc(1, sizeof(struct radv_amdgpu_winsys));
+   if (!ws)
+      goto fail;
+
+   ws->refcount = 1;
+   ws->dev = dev;
+   ws->info.drm_major = drm_major;
+   ws->info.drm_minor = drm_minor;
+   if (!do_winsys_init(ws, fd))
+      goto winsys_fail;
+
+   ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
+   ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG;
+   if (debug_flags & RADV_DEBUG_NO_IBS)
+      ws->use_ib_bos = false;
+
+   ws->use_local_bos = perftest_flags & RADV_PERFTEST_LOCAL_BOS;
+   ws->zero_all_vram_allocs = debug_flags & RADV_DEBUG_ZERO_VRAM;
+   ws->use_llvm = debug_flags & RADV_DEBUG_LLVM;
+   ws->cs_bo_domain = radv_cmdbuffer_domain(&ws->info, perftest_flags);
+   u_rwlock_init(&ws->global_bo_list.lock);
+   list_inithead(&ws->log_bo_list);
+   u_rwlock_init(&ws->log_bo_list_lock);
+   pthread_mutex_init(&ws->syncobj_lock, NULL);
+   ws->base.query_info = radv_amdgpu_winsys_query_info;
+   ws->base.query_value = radv_amdgpu_winsys_query_value;
+   ws->base.read_registers = radv_amdgpu_winsys_read_registers;
+   ws->base.get_chip_name = radv_amdgpu_winsys_get_chip_name;
+   ws->base.destroy = radv_amdgpu_winsys_destroy;
+   radv_amdgpu_bo_init_functions(ws);
+   radv_amdgpu_cs_init_functions(ws);
+   radv_amdgpu_surface_init_functions(ws);
+
+   _mesa_hash_table_insert(winsyses, dev, ws);
+   simple_mtx_unlock(&winsys_creation_mutex);
+
+   return &ws->base;
 
 winsys_fail:
-	free(ws);
+   free(ws);
 fail:
-	if (winsyses && _mesa_hash_table_num_entries(winsyses) == 0) {
-		_mesa_hash_table_destroy(winsyses, NULL);
-		winsyses = NULL;
-	}
-	simple_mtx_unlock(&winsys_creation_mutex);
-	amdgpu_device_deinitialize(dev);
-	return NULL;
+   if (winsyses && _mesa_hash_table_num_entries(winsyses) == 0) {
+      _mesa_hash_table_destroy(winsyses, NULL);
+      winsyses = NULL;
+   }
+   simple_mtx_unlock(&winsys_creation_mutex);
+   amdgpu_device_deinitialize(dev);
+   return NULL;
 }
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
index 3c8987a1ab9..ba9cf500cd5 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
@@ -28,57 +28,57 @@
 #ifndef RADV_AMDGPU_WINSYS_H
 #define RADV_AMDGPU_WINSYS_H
 
-#include "radv_radeon_winsys.h"
-#include "ac_gpu_info.h"
 #include <amdgpu.h>
+#include <pthread.h>
 #include "util/list.h"
 #include "util/rwlock.h"
-#include <pthread.h>
+#include "ac_gpu_info.h"
+#include "radv_radeon_winsys.h"
 
 struct radv_amdgpu_winsys {
-	struct radeon_winsys base;
-	amdgpu_device_handle dev;
+   struct radeon_winsys base;
+   amdgpu_device_handle dev;
 
-	struct radeon_info info;
-	struct amdgpu_gpu_info amdinfo;
-	struct ac_addrlib *addrlib;
+   struct radeon_info info;
+   struct amdgpu_gpu_info amdinfo;
+   struct ac_addrlib *addrlib;
 
-	bool debug_all_bos;
-	bool debug_log_bos;
-	bool use_ib_bos;
-	enum radeon_bo_domain cs_bo_domain;
-	bool zero_all_vram_allocs;
-	bool use_local_bos;
-	bool use_llvm;
+   bool debug_all_bos;
+   bool debug_log_bos;
+   bool use_ib_bos;
+   enum radeon_bo_domain cs_bo_domain;
+   bool zero_all_vram_allocs;
+   bool use_local_bos;
+   bool use_llvm;
 
-	uint64_t allocated_vram;
-	uint64_t allocated_vram_vis;
-	uint64_t allocated_gtt;
+   uint64_t allocated_vram;
+   uint64_t allocated_vram_vis;
+   uint64_t allocated_gtt;
 
-	/* Global BO list */
-	struct {
-		struct radv_amdgpu_winsys_bo **bos;
-		uint32_t count;
-		uint32_t capacity;
-		struct u_rwlock lock;
-	} global_bo_list;
+   /* Global BO list */
+   struct {
+      struct radv_amdgpu_winsys_bo **bos;
+      uint32_t count;
+      uint32_t capacity;
+      struct u_rwlock lock;
+   } global_bo_list;
 
-	/* syncobj cache */
-	pthread_mutex_t syncobj_lock;
-	uint32_t *syncobj;
-	uint32_t syncobj_count, syncobj_capacity;
+   /* syncobj cache */
+   pthread_mutex_t syncobj_lock;
+   uint32_t *syncobj;
+   uint32_t syncobj_count, syncobj_capacity;
 
-	/* BO log */
-	struct u_rwlock log_bo_list_lock;
-	struct list_head log_bo_list;
+   /* BO log */
+   struct u_rwlock log_bo_list_lock;
+   struct list_head log_bo_list;
 
-	uint32_t refcount;
+   uint32_t refcount;
 };
 
 static inline struct radv_amdgpu_winsys *
 radv_amdgpu_winsys(struct radeon_winsys *base)
 {
-	return (struct radv_amdgpu_winsys*)base;
+   return (struct radv_amdgpu_winsys *)base;
 }
 
 #endif /* RADV_AMDGPU_WINSYS_H */
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
index 790a404d24a..84fe347ec3e 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
@@ -30,7 +30,7 @@
 #define RADV_AMDGPU_WINSYS_PUBLIC_H
 
 struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags,
-						uint64_t perftest_flags);
+                                                uint64_t perftest_flags);
 
 struct radeon_winsys *radv_dummy_winsys_create(void);
 
diff --git a/src/amd/vulkan/winsys/null/radv_null_bo.c b/src/amd/vulkan/winsys/null/radv_null_bo.c
index 318b498a23c..3c8d12431a7 100644
--- a/src/amd/vulkan/winsys/null/radv_null_bo.c
+++ b/src/amd/vulkan/winsys/null/radv_null_bo.c
@@ -29,34 +29,31 @@
 #include "util/u_memory.h"
 
 static struct radeon_winsys_bo *
-radv_null_winsys_bo_create(struct radeon_winsys *_ws,
-			   uint64_t size,
-			   unsigned alignment,
-			   enum radeon_bo_domain initial_domain,
-			   enum radeon_bo_flag flags,
-			   unsigned priority)
+radv_null_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
+                           enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
+                           unsigned priority)
 {
-	struct radv_null_winsys_bo *bo;
+   struct radv_null_winsys_bo *bo;
 
-	bo = CALLOC_STRUCT(radv_null_winsys_bo);
-	if (!bo)
-		return NULL;
+   bo = CALLOC_STRUCT(radv_null_winsys_bo);
+   if (!bo)
+      return NULL;
 
-	bo->ptr = malloc(size);
-	if (!bo->ptr)
-		goto error_ptr_alloc;
+   bo->ptr = malloc(size);
+   if (!bo->ptr)
+      goto error_ptr_alloc;
 
-	return (struct radeon_winsys_bo *)bo;
+   return (struct radeon_winsys_bo *)bo;
 error_ptr_alloc:
-	FREE(bo);
-	return NULL;
+   FREE(bo);
+   return NULL;
 }
 
 static void *
 radv_null_winsys_bo_map(struct radeon_winsys_bo *_bo)
 {
-	struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
-	return bo->ptr;
+   struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
+   return bo->ptr;
 }
 
 static void
@@ -64,18 +61,19 @@ radv_null_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
 {
 }
 
-static void radv_null_winsys_bo_destroy(struct radeon_winsys *_ws,
-					struct radeon_winsys_bo *_bo)
+static void
+radv_null_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
 {
-	struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
-	FREE(bo->ptr);
-	FREE(bo);
+   struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
+   FREE(bo->ptr);
+   FREE(bo);
 }
 
-void radv_null_bo_init_functions(struct radv_null_winsys *ws)
+void
+radv_null_bo_init_functions(struct radv_null_winsys *ws)
 {
-	ws->base.buffer_create = radv_null_winsys_bo_create;
-	ws->base.buffer_destroy = radv_null_winsys_bo_destroy;
-	ws->base.buffer_map = radv_null_winsys_bo_map;
-	ws->base.buffer_unmap = radv_null_winsys_bo_unmap;
+   ws->base.buffer_create = radv_null_winsys_bo_create;
+   ws->base.buffer_destroy = radv_null_winsys_bo_destroy;
+   ws->base.buffer_map = radv_null_winsys_bo_map;
+   ws->base.buffer_unmap = radv_null_winsys_bo_unmap;
 }
diff --git a/src/amd/vulkan/winsys/null/radv_null_bo.h b/src/amd/vulkan/winsys/null/radv_null_bo.h
index 2f2f8b711d6..49a7440b613 100644
--- a/src/amd/vulkan/winsys/null/radv_null_bo.h
+++ b/src/amd/vulkan/winsys/null/radv_null_bo.h
@@ -31,15 +31,15 @@
 #include "radv_null_winsys.h"
 
 struct radv_null_winsys_bo {
-	struct radeon_winsys_bo base;
-	struct radv_null_winsys *ws;
-	void *ptr;
+   struct radeon_winsys_bo base;
+   struct radv_null_winsys *ws;
+   void *ptr;
 };
 
-static inline
-struct radv_null_winsys_bo *radv_null_winsys_bo(struct radeon_winsys_bo *bo)
+static inline struct radv_null_winsys_bo *
+radv_null_winsys_bo(struct radeon_winsys_bo *bo)
 {
-	return (struct radv_null_winsys_bo *)bo;
+   return (struct radv_null_winsys_bo *)bo;
 }
 
 void radv_null_bo_init_functions(struct radv_null_winsys *ws);
diff --git a/src/amd/vulkan/winsys/null/radv_null_cs.c b/src/amd/vulkan/winsys/null/radv_null_cs.c
index 5788cbf92cb..f5a186fe04a 100644
--- a/src/amd/vulkan/winsys/null/radv_null_cs.c
+++ b/src/amd/vulkan/winsys/null/radv_null_cs.c
@@ -29,73 +29,75 @@
 #include "util/u_memory.h"
 
 struct radv_null_cs {
-	struct radeon_cmdbuf base;
-	struct radv_null_winsys *ws;
+   struct radeon_cmdbuf base;
+   struct radv_null_winsys *ws;
 };
 
 static inline struct radv_null_cs *
 radv_null_cs(struct radeon_cmdbuf *base)
 {
-	return (struct radv_null_cs*)base;
+   return (struct radv_null_cs *)base;
 }
 
-static VkResult radv_null_ctx_create(struct radeon_winsys *_ws,
-				     enum radeon_ctx_priority priority,
-				     struct radeon_winsys_ctx **rctx)
+static VkResult
+radv_null_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority,
+                     struct radeon_winsys_ctx **rctx)
 {
-	struct radv_null_ctx *ctx = CALLOC_STRUCT(radv_null_ctx);
+   struct radv_null_ctx *ctx = CALLOC_STRUCT(radv_null_ctx);
 
-	if (!ctx)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
+   if (!ctx)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
 
-	*rctx = (struct radeon_winsys_ctx *)ctx;
-	return VK_SUCCESS;
+   *rctx = (struct radeon_winsys_ctx *)ctx;
+   return VK_SUCCESS;
 }
 
-static void radv_null_ctx_destroy(struct radeon_winsys_ctx *rwctx)
+static void
+radv_null_ctx_destroy(struct radeon_winsys_ctx *rwctx)
 {
-	struct radv_null_ctx *ctx = (struct radv_null_ctx *)rwctx;
-	FREE(ctx);
+   struct radv_null_ctx *ctx = (struct radv_null_ctx *)rwctx;
+   FREE(ctx);
 }
 
 static struct radeon_cmdbuf *
-radv_null_cs_create(struct radeon_winsys *ws,
-		    enum ring_type ring_type)
+radv_null_cs_create(struct radeon_winsys *ws, enum ring_type ring_type)
 {
-	struct radv_null_cs *cs = calloc(1, sizeof(struct radv_null_cs));
-	if (!cs)
-		return NULL;
+   struct radv_null_cs *cs = calloc(1, sizeof(struct radv_null_cs));
+   if (!cs)
+      return NULL;
 
-	cs->ws = radv_null_winsys(ws);
+   cs->ws = radv_null_winsys(ws);
 
-	cs->base.buf = malloc(16384);
-	cs->base.max_dw = 4096;
-	if (!cs->base.buf) {
-		FREE(cs);
-		return NULL;
-	}
+   cs->base.buf = malloc(16384);
+   cs->base.max_dw = 4096;
+   if (!cs->base.buf) {
+      FREE(cs);
+      return NULL;
+   }
 
-	return &cs->base;
+   return &cs->base;
 }
 
-static VkResult radv_null_cs_finalize(struct radeon_cmdbuf *_cs)
+static VkResult
+radv_null_cs_finalize(struct radeon_cmdbuf *_cs)
 {
-	return VK_SUCCESS;
+   return VK_SUCCESS;
 }
 
-static void radv_null_cs_destroy(struct radeon_cmdbuf *rcs)
+static void
+radv_null_cs_destroy(struct radeon_cmdbuf *rcs)
 {
-	struct radv_null_cs *cs = radv_null_cs(rcs);
-	FREE(cs->base.buf);
-	FREE(cs);
+   struct radv_null_cs *cs = radv_null_cs(rcs);
+   FREE(cs->base.buf);
+   FREE(cs);
 }
 
-void radv_null_cs_init_functions(struct radv_null_winsys *ws)
+void
+radv_null_cs_init_functions(struct radv_null_winsys *ws)
 {
-	ws->base.ctx_create = radv_null_ctx_create;
-	ws->base.ctx_destroy = radv_null_ctx_destroy;
-	ws->base.cs_create = radv_null_cs_create;
-	ws->base.cs_finalize = radv_null_cs_finalize;
-	ws->base.cs_destroy = radv_null_cs_destroy;
-
+   ws->base.ctx_create = radv_null_ctx_create;
+   ws->base.ctx_destroy = radv_null_ctx_destroy;
+   ws->base.cs_create = radv_null_cs_create;
+   ws->base.cs_finalize = radv_null_cs_finalize;
+   ws->base.cs_destroy = radv_null_cs_destroy;
 }
diff --git a/src/amd/vulkan/winsys/null/radv_null_cs.h b/src/amd/vulkan/winsys/null/radv_null_cs.h
index 344e9502ff6..cfb467ebdd1 100644
--- a/src/amd/vulkan/winsys/null/radv_null_cs.h
+++ b/src/amd/vulkan/winsys/null/radv_null_cs.h
@@ -28,17 +28,17 @@
 #ifndef RADV_NULL_CS_H
 #define RADV_NULL_CS_H
 
-#include "radv_radeon_winsys.h"
 #include "radv_null_winsys.h"
+#include "radv_radeon_winsys.h"
 
 struct radv_null_ctx {
-	struct radv_null_winsys *ws;
+   struct radv_null_winsys *ws;
 };
 
 static inline struct radv_null_ctx *
 radv_null_ctx(struct radeon_winsys_ctx *base)
 {
-	return (struct radv_null_ctx *)base;
+   return (struct radv_null_ctx *)base;
 }
 
 void radv_null_cs_init_functions(struct radv_null_winsys *ws);
diff --git a/src/amd/vulkan/winsys/null/radv_null_winsys.c b/src/amd/vulkan/winsys/null/radv_null_winsys.c
index 5df0b13f493..6afd02e8d6e 100644
--- a/src/amd/vulkan/winsys/null/radv_null_winsys.c
+++ b/src/amd/vulkan/winsys/null/radv_null_winsys.c
@@ -33,129 +33,131 @@
 
 /* Hardcode some GPU info that are needed for the driver or for some tools. */
 static const struct {
-	uint32_t pci_id;
-	uint32_t num_render_backends;
-	bool has_dedicated_vram;
+   uint32_t pci_id;
+   uint32_t num_render_backends;
+   bool has_dedicated_vram;
 } gpu_info[] = {
-	[CHIP_TAHITI] = { 0x6780, 8, true },
-	[CHIP_PITCAIRN] = { 0x6800, 8, true },
-	[CHIP_VERDE] = { 0x6820, 4, true },
-	[CHIP_OLAND] = { 0x6060, 2, true },
-	[CHIP_HAINAN] = { 0x6660, 2, true },
-	[CHIP_BONAIRE] = { 0x6640, 4, true },
-	[CHIP_KAVERI] = { 0x1304, 2, false },
-	[CHIP_KABINI] = { 0x9830, 2, false },
-	[CHIP_HAWAII] = { 0x67A0, 16, true },
-	[CHIP_TONGA] = { 0x6920, 8, true },
-	[CHIP_ICELAND] = { 0x6900, 2, true },
-	[CHIP_CARRIZO] = { 0x9870, 2, false },
-	[CHIP_FIJI] = { 0x7300, 16, true },
-	[CHIP_STONEY] = { 0x98E4, 2, false },
-	[CHIP_POLARIS10] = { 0x67C0, 8, true },
-	[CHIP_POLARIS11] = { 0x67E0, 4, true },
-	[CHIP_POLARIS12] = { 0x6980, 4, true },
-	[CHIP_VEGAM] = { 0x694C, 4, true },
-	[CHIP_VEGA10] = { 0x6860, 16, true },
-	[CHIP_VEGA12] = { 0x69A0, 8, true },
-	[CHIP_VEGA20] = { 0x66A0, 16, true },
-	[CHIP_RAVEN] = { 0x15DD, 2, false },
-	[CHIP_RENOIR] = { 0x1636, 2, false },
-	[CHIP_ARCTURUS] = { 0x738C, 2, true },
-	[CHIP_NAVI10] = { 0x7310, 16, true },
-	[CHIP_NAVI12] = { 0x7360, 8, true },
-	[CHIP_NAVI14] = { 0x7340, 8, true },
-	[CHIP_SIENNA_CICHLID] = { 0x73A0, 8, true },
-	[CHIP_VANGOGH] = { 0x163F, 8, false },
-	[CHIP_NAVY_FLOUNDER] = { 0x73C0, 8, true },
-	[CHIP_DIMGREY_CAVEFISH] = { 0x73E0, 8, true },
+   [CHIP_TAHITI] = {0x6780, 8, true},
+   [CHIP_PITCAIRN] = {0x6800, 8, true},
+   [CHIP_VERDE] = {0x6820, 4, true},
+   [CHIP_OLAND] = {0x6060, 2, true},
+   [CHIP_HAINAN] = {0x6660, 2, true},
+   [CHIP_BONAIRE] = {0x6640, 4, true},
+   [CHIP_KAVERI] = {0x1304, 2, false},
+   [CHIP_KABINI] = {0x9830, 2, false},
+   [CHIP_HAWAII] = {0x67A0, 16, true},
+   [CHIP_TONGA] = {0x6920, 8, true},
+   [CHIP_ICELAND] = {0x6900, 2, true},
+   [CHIP_CARRIZO] = {0x9870, 2, false},
+   [CHIP_FIJI] = {0x7300, 16, true},
+   [CHIP_STONEY] = {0x98E4, 2, false},
+   [CHIP_POLARIS10] = {0x67C0, 8, true},
+   [CHIP_POLARIS11] = {0x67E0, 4, true},
+   [CHIP_POLARIS12] = {0x6980, 4, true},
+   [CHIP_VEGAM] = {0x694C, 4, true},
+   [CHIP_VEGA10] = {0x6860, 16, true},
+   [CHIP_VEGA12] = {0x69A0, 8, true},
+   [CHIP_VEGA20] = {0x66A0, 16, true},
+   [CHIP_RAVEN] = {0x15DD, 2, false},
+   [CHIP_RENOIR] = {0x1636, 2, false},
+   [CHIP_ARCTURUS] = {0x738C, 2, true},
+   [CHIP_NAVI10] = {0x7310, 16, true},
+   [CHIP_NAVI12] = {0x7360, 8, true},
+   [CHIP_NAVI14] = {0x7340, 8, true},
+   [CHIP_SIENNA_CICHLID] = {0x73A0, 8, true},
+   [CHIP_VANGOGH] = {0x163F, 8, false},
+   [CHIP_NAVY_FLOUNDER] = {0x73C0, 8, true},
+   [CHIP_DIMGREY_CAVEFISH] = {0x73E0, 8, true},
 };
 
-static void radv_null_winsys_query_info(struct radeon_winsys *rws,
-					struct radeon_info *info)
+static void
+radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
 {
-	const char *family = getenv("RADV_FORCE_FAMILY");
-	unsigned i;
-
-	info->chip_class = CLASS_UNKNOWN;
-	info->family = CHIP_UNKNOWN;
-
-	for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
-		if (!strcmp(family, ac_get_family_name(i))) {
-			/* Override family and chip_class. */
-			info->family = i;
-			info->name = "OVERRIDDEN";
-
-			if (i >= CHIP_SIENNA_CICHLID)
-				info->chip_class = GFX10_3;
-			else if (i >= CHIP_NAVI10)
-				info->chip_class = GFX10;
-			else if (i >= CHIP_VEGA10)
-				info->chip_class = GFX9;
-			else if (i >= CHIP_TONGA)
-				info->chip_class = GFX8;
-			else if (i >= CHIP_BONAIRE)
-				info->chip_class = GFX7;
-			else
-				info->chip_class = GFX6;
-		}
-	}
-
-	if (info->family == CHIP_UNKNOWN) {
-		fprintf(stderr, "radv: Unknown family: %s\n", family);
-		abort();
-	}
-
-	info->pci_id = gpu_info[info->family].pci_id;
-	info->max_se = 4;
-        info->num_se = 4;
-	if (info->chip_class >= GFX10_3)
-		info->max_wave64_per_simd = 16;
-	else if (info->chip_class >= GFX10)
-		info->max_wave64_per_simd = 20;
-	else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
-		info->max_wave64_per_simd = 8;
-	else
-		info->max_wave64_per_simd = 10;
-
-	if (info->chip_class >= GFX10)
-		info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd * 2;
-	else if (info->chip_class >= GFX8)
-		info->num_physical_sgprs_per_simd = 800;
-	else
-		info->num_physical_sgprs_per_simd = 512;
-
-	info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
-	info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;
-	info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
-	info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
-	info->lds_alloc_granularity = info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
-	info->max_render_backends = gpu_info[info->family].num_render_backends;
-
-	info->has_dedicated_vram = gpu_info[info->family].has_dedicated_vram;
-	info->has_packed_math_16bit = info->chip_class >= GFX9;
-
-	info->has_image_load_dcc_bug = info->family == CHIP_DIMGREY_CAVEFISH ||
-				       info->family == CHIP_VANGOGH;
+   const char *family = getenv("RADV_FORCE_FAMILY");
+   unsigned i;
+
+   info->chip_class = CLASS_UNKNOWN;
+   info->family = CHIP_UNKNOWN;
+
+   for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
+      if (!strcmp(family, ac_get_family_name(i))) {
+         /* Override family and chip_class. */
+         info->family = i;
+         info->name = "OVERRIDDEN";
+
+         if (i >= CHIP_SIENNA_CICHLID)
+            info->chip_class = GFX10_3;
+         else if (i >= CHIP_NAVI10)
+            info->chip_class = GFX10;
+         else if (i >= CHIP_VEGA10)
+            info->chip_class = GFX9;
+         else if (i >= CHIP_TONGA)
+            info->chip_class = GFX8;
+         else if (i >= CHIP_BONAIRE)
+            info->chip_class = GFX7;
+         else
+            info->chip_class = GFX6;
+      }
+   }
+
+   if (info->family == CHIP_UNKNOWN) {
+      fprintf(stderr, "radv: Unknown family: %s\n", family);
+      abort();
+   }
+
+   info->pci_id = gpu_info[info->family].pci_id;
+   info->max_se = 4;
+   info->num_se = 4;
+   if (info->chip_class >= GFX10_3)
+      info->max_wave64_per_simd = 16;
+   else if (info->chip_class >= GFX10)
+      info->max_wave64_per_simd = 20;
+   else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
+      info->max_wave64_per_simd = 8;
+   else
+      info->max_wave64_per_simd = 10;
+
+   if (info->chip_class >= GFX10)
+      info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd * 2;
+   else if (info->chip_class >= GFX8)
+      info->num_physical_sgprs_per_simd = 800;
+   else
+      info->num_physical_sgprs_per_simd = 512;
+
+   info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
+   info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;
+   info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
+   info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
+   info->lds_alloc_granularity =
+      info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
+   info->max_render_backends = gpu_info[info->family].num_render_backends;
+
+   info->has_dedicated_vram = gpu_info[info->family].has_dedicated_vram;
+   info->has_packed_math_16bit = info->chip_class >= GFX9;
+
+   info->has_image_load_dcc_bug =
+      info->family == CHIP_DIMGREY_CAVEFISH || info->family == CHIP_VANGOGH;
 }
 
-static void radv_null_winsys_destroy(struct radeon_winsys *rws)
+static void
+radv_null_winsys_destroy(struct radeon_winsys *rws)
 {
-	FREE(rws);
+   FREE(rws);
 }
 
 struct radeon_winsys *
 radv_null_winsys_create()
 {
-	struct radv_null_winsys *ws;
+   struct radv_null_winsys *ws;
 
-	ws = calloc(1, sizeof(struct radv_null_winsys));
-	if (!ws)
-		return NULL;
+   ws = calloc(1, sizeof(struct radv_null_winsys));
+   if (!ws)
+      return NULL;
 
-	ws->base.destroy = radv_null_winsys_destroy;
-	ws->base.query_info = radv_null_winsys_query_info;
-	radv_null_bo_init_functions(ws);
-	radv_null_cs_init_functions(ws);
+   ws->base.destroy = radv_null_winsys_destroy;
+   ws->base.query_info = radv_null_winsys_query_info;
+   radv_null_bo_init_functions(ws);
+   radv_null_cs_init_functions(ws);
 
-	return &ws->base;
+   return &ws->base;
 }
diff --git a/src/amd/vulkan/winsys/null/radv_null_winsys.h b/src/amd/vulkan/winsys/null/radv_null_winsys.h
index b763875ddaa..c29b6ce5862 100644
--- a/src/amd/vulkan/winsys/null/radv_null_winsys.h
+++ b/src/amd/vulkan/winsys/null/radv_null_winsys.h
@@ -28,18 +28,18 @@
 #ifndef RADV_NULL_WINSYS_H
 #define RADV_NULL_WINSYS_H
 
-#include "radv_radeon_winsys.h"
-#include "ac_gpu_info.h"
 #include "util/list.h"
+#include "ac_gpu_info.h"
+#include "radv_radeon_winsys.h"
 
 struct radv_null_winsys {
-	struct radeon_winsys base;
+   struct radeon_winsys base;
 };
 
 static inline struct radv_null_winsys *
 radv_null_winsys(struct radeon_winsys *base)
 {
-	return (struct radv_null_winsys*)base;
+   return (struct radv_null_winsys *)base;
 }
 
 #endif /* RADV_NULL_WINSYS_H */
author	Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>	2021-04-10 03:24:05 +0200
committer	Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>	2021-04-10 03:31:58 +0200
commit	59c501ca353f8ec9d2717c98af2bfa1a1dbf4d75 (patch)
tree	dd56c73e05cea59c5c8931605bf9d5efc986677e /src
parent	8451b41022757763a4a46eb597b9392e39a26b6a (diff)