/* * Copyright © 2020 Valve Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "radv_private.h" #include "radv_shader.h" #include "ac_rgp.h" #include "ac_sqtt.h" static void radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type) { struct rgp_sqtt_marker_general_api marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API; marker.api_type = api_type; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } static void radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type) { struct rgp_sqtt_marker_general_api marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API; marker.api_type = api_type; marker.is_end = 1; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } static void radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type, uint32_t vertex_offset_user_data, uint32_t instance_offset_user_data, uint32_t draw_index_user_data) { struct rgp_sqtt_marker_event marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; marker.api_type = api_type; marker.cmd_id = cmd_buffer->state.num_events++; marker.cb_id = 0; if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) { vertex_offset_user_data = 0; instance_offset_user_data = 0; } if (draw_index_user_data == UINT_MAX) draw_index_user_data = vertex_offset_user_data; marker.vertex_offset_reg_idx = vertex_offset_user_data; marker.instance_offset_reg_idx = instance_offset_user_data; marker.draw_index_reg_idx = draw_index_user_data; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } static void radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type, uint32_t x, uint32_t y, uint32_t z) { struct rgp_sqtt_marker_event_with_dims marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; marker.event.api_type = api_type; marker.event.cmd_id = cmd_buffer->state.num_events++; marker.event.cb_id = 0; marker.event.has_thread_dims = 1; marker.thread_x = x; marker.thread_y = y; marker.thread_z = z; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } static void radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type, const char *str) { struct radeon_cmdbuf *cs = cmd_buffer->cs; if (type == UserEventPop) { assert(str == NULL); struct rgp_sqtt_marker_user_event marker = {0}; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; marker.data_type = type; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } else { assert(str != NULL); unsigned len = strlen(str); struct rgp_sqtt_marker_user_event_with_length marker = {0}; marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; marker.user_event.data_type = type; marker.length = align(len, 4); uint8_t *buffer = alloca(sizeof(marker) + marker.length); memset(buffer, 0, sizeof(marker) + marker.length); memcpy(buffer, &marker, sizeof(marker)); memcpy(buffer + sizeof(marker), str, len); radv_emit_thread_trace_userdata(cmd_buffer->device, cs, buffer, sizeof(marker) / 4 + marker.length / 4); } } void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) { uint64_t device_id = (uintptr_t)cmd_buffer->device; struct rgp_sqtt_marker_cb_start marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; if (likely(!cmd_buffer->device->thread_trace.bo)) return; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START; marker.cb_id = 0; marker.device_id_low = device_id; marker.device_id_high = device_id >> 32; marker.queue = cmd_buffer->queue_family_index; marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT; if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) { uint64_t device_id = (uintptr_t)cmd_buffer->device; struct rgp_sqtt_marker_cb_end marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; if (likely(!cmd_buffer->device->thread_trace.bo)) return; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END; marker.cb_id = 0; marker.device_id_low = device_id; marker.device_id_high = device_id >> 32; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer) { if (likely(!cmd_buffer->device->thread_trace.bo)) return; radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX); } void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z) { if (likely(!cmd_buffer->device->thread_trace.bo)) return; radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, x, y, z); } void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects) { cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? EventRenderPassColorClear : EventRenderPassDepthStencilClear; } void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer) { cmd_buffer->state.current_event_type = EventInternalUnknown; } void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer) { cmd_buffer->state.current_event_type = EventRenderPassResolve; } void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer) { cmd_buffer->state.current_event_type = EventInternalUnknown; } void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer) { struct rgp_sqtt_marker_barrier_end marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; if (likely(!cmd_buffer->device->thread_trace.bo) || !cmd_buffer->state.pending_sqtt_barrier_end) return; cmd_buffer->state.pending_sqtt_barrier_end = false; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END; marker.cb_id = 0; marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS) marker.wait_on_eop_ts = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH) marker.vs_partial_flush = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH) marker.ps_partial_flush = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH) marker.cs_partial_flush = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME) marker.pfp_sync_me = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA) marker.sync_cp_dma = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0) marker.inval_tcp = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE) marker.inval_sqI = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0) marker.inval_sqK = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2) marker.flush_tcc = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2) marker.inval_tcc = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB) marker.flush_cb = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB) marker.inval_cb = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB) marker.flush_db = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB) marker.inval_db = true; if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1) marker.inval_gl1 = true; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); cmd_buffer->state.num_layout_transitions = 0; } void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason) { struct rgp_sqtt_marker_barrier_start marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; if (likely(!cmd_buffer->device->thread_trace.bo)) return; radv_describe_barrier_end_delayed(cmd_buffer); cmd_buffer->state.sqtt_flush_bits = 0; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START; marker.cb_id = 0; marker.dword02 = reason; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer) { cmd_buffer->state.pending_sqtt_barrier_end = true; } void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier) { struct rgp_sqtt_marker_layout_transition marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; if (likely(!cmd_buffer->device->thread_trace.bo)) return; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION; marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand; marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand; marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize; marker.dcc_decompress = barrier->layout_transitions.dcc_decompress; marker.fmask_decompress = barrier->layout_transitions.fmask_decompress; marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate; marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand; marker.init_mask_ram = barrier->layout_transitions.init_mask_ram; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); cmd_buffer->state.num_layout_transitions++; } static void radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint, struct radv_pipeline *pipeline) { struct rgp_sqtt_marker_pipeline_bind marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; if (likely(!cmd_buffer->device->thread_trace.bo)) return; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE; marker.cb_id = 0; marker.bind_point = pipelineBindPoint; marker.api_pso_hash[0] = pipeline->pipeline_hash; marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } /* TODO: Improve the way to trigger capture (overlay, etc). */ static void radv_handle_thread_trace(VkQueue _queue) { RADV_FROM_HANDLE(radv_queue, queue, _queue); static bool thread_trace_enabled = false; static uint64_t num_frames = 0; bool resize_trigger = false; if (thread_trace_enabled) { struct ac_thread_trace thread_trace = {0}; radv_end_thread_trace(queue); thread_trace_enabled = false; /* TODO: Do something better than this whole sync. */ radv_QueueWaitIdle(_queue); if (radv_get_thread_trace(queue, &thread_trace)) { ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace, &queue->device->thread_trace); } else { /* Trigger a new capture if the driver failed to get * the trace because the buffer was too small. */ resize_trigger = true; } } if (!thread_trace_enabled) { bool frame_trigger = num_frames == queue->device->thread_trace.start_frame; bool file_trigger = false; #ifndef _WIN32 if (queue->device->thread_trace.trigger_file && access(queue->device->thread_trace.trigger_file, W_OK) == 0) { if (unlink(queue->device->thread_trace.trigger_file) == 0) { file_trigger = true; } else { /* Do not enable tracing if we cannot remove the file, * because by then we'll trace every frame ... */ fprintf(stderr, "RADV: could not remove thread trace trigger file, ignoring\n"); } } #endif if (frame_trigger || file_trigger || resize_trigger) { /* FIXME: SQTT on compute hangs. */ if (queue->queue_family_index == RADV_QUEUE_COMPUTE) { fprintf(stderr, "RADV: Capturing a SQTT trace on the compute " "queue is currently broken and might hang! " "Please, disable presenting on compute if " "you can.\n"); return; } radv_begin_thread_trace(queue); assert(!thread_trace_enabled); thread_trace_enabled = true; } } num_frames++; } VkResult sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo) { VkResult result; result = radv_QueuePresentKHR(_queue, pPresentInfo); if (result != VK_SUCCESS) return result; radv_handle_thread_trace(_queue); return VK_SUCCESS; } #define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) \ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ cmd_buffer->state.current_event_type = EventCmd##api_name; \ radv_Cmd##cmd_name(__VA_ARGS__); \ cmd_buffer->state.current_event_type = EventInternalUnknown; \ radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name); #define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); void sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) { EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance); } void sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) { EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); } void sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) { EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride); } void sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) { EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride); } void sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); } void sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); } void sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) { EVENT_MARKER(Dispatch, commandBuffer, x, y, z); } void sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) { EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset); } void sqtt_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2KHR *pCopyBufferInfo) { EVENT_MARKER_ALIAS(CopyBuffer2KHR, CopyBuffer, commandBuffer, pCopyBufferInfo); } void sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize, uint32_t data) { EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data); } void sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const void *pData) { EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData); } void sqtt_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, const VkCopyImageInfo2KHR *pCopyImageInfo) { EVENT_MARKER_ALIAS(CopyImage2KHR, CopyImage, commandBuffer, pCopyImageInfo); } void sqtt_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo) { EVENT_MARKER_ALIAS(CopyBufferToImage2KHR, CopyBufferToImage, commandBuffer, pCopyBufferToImageInfo); } void sqtt_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo) { EVENT_MARKER_ALIAS(CopyImageToBuffer2KHR, CopyImageToBuffer, commandBuffer, pCopyImageToBufferInfo); } void sqtt_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, const VkBlitImageInfo2KHR *pBlitImageInfo) { EVENT_MARKER_ALIAS(BlitImage2KHR, BlitImage, commandBuffer, pBlitImageInfo); } void sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout, const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges) { EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges); } void sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout, const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange *pRanges) { EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, rangeCount, pRanges); } void sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments, uint32_t rectCount, const VkClearRect *pRects) { EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects); } void sqtt_CmdResolveImage2KHR(VkCommandBuffer commandBuffer, const VkResolveImageInfo2KHR *pResolveImageInfo) { EVENT_MARKER_ALIAS(ResolveImage2KHR, ResolveImage, commandBuffer, pResolveImageInfo); } void sqtt_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier *pImageMemoryBarriers) { EVENT_MARKER(WaitEvents, commandBuffer, eventCount, pEvents, srcStageMask, dstStageMask, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers); } void sqtt_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags destStageMask, VkBool32 byRegion, uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier *pImageMemoryBarriers) { EVENT_MARKER(PipelineBarrier, commandBuffer, srcStageMask, destStageMask, byRegion, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers); } void sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount) { EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount); } void sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags) { EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride, flags); } #undef EVENT_MARKER #define API_MARKER_ALIAS(cmd_name, api_name, ...) \ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ radv_Cmd##cmd_name(__VA_ARGS__); \ radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name); #define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); static bool radv_sqtt_dump_pipeline() { return getenv("RADV_THREAD_TRACE_PIPELINE"); } void sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) { RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline); if (radv_sqtt_dump_pipeline()) radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline); } void sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount, const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t *pDynamicOffsets) { API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets); } void sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType) { API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType); } void sqtt_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, const VkBuffer *pBuffers, const VkDeviceSize *pOffsets) { API_MARKER(BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers, pOffsets); } void sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags) { API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags); } void sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query) { API_MARKER(EndQuery, commandBuffer, queryPool, query); } void sqtt_CmdWriteTimestamp(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, uint32_t flags) { API_MARKER(WriteTimestamp, commandBuffer, pipelineStage, queryPool, flags); } void sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, const void *pValues) { API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues); } void sqtt_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo *pRenderPassBeginInfo, const VkSubpassBeginInfo *pSubpassBeginInfo) { API_MARKER_ALIAS(BeginRenderPass2, BeginRenderPass, commandBuffer, pRenderPassBeginInfo, pSubpassBeginInfo); } void sqtt_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo, const VkSubpassEndInfo *pSubpassEndInfo) { API_MARKER_ALIAS(NextSubpass2, NextSubpass, commandBuffer, pSubpassBeginInfo, pSubpassEndInfo); } void sqtt_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo) { API_MARKER_ALIAS(EndRenderPass2, EndRenderPass, commandBuffer, pSubpassEndInfo); } void sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers) { API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers); } void sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, const VkViewport *pViewports) { API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports); } void sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount, const VkRect2D *pScissors) { API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors); } void sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) { API_MARKER(SetLineWidth, commandBuffer, lineWidth); } void sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp, float depthBiasSlopeFactor) { API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor); } void sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4]) { API_MARKER(SetBlendConstants, commandBuffer, blendConstants); } void sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds) { API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds); } void sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask) { API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask); } void sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask) { API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask); } void sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference) { API_MARKER(SetStencilReference, commandBuffer, faceMask, reference); } /* VK_EXT_debug_marker */ void sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName); } void sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL); } void sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName); } VkResult sqtt_DebugMarkerSetObjectNameEXT(VkDevice device, const VkDebugMarkerObjectNameInfoEXT *pNameInfo) { /* no-op */ return VK_SUCCESS; } VkResult sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo) { /* no-op */ return VK_SUCCESS; } /* Pipelines */ static enum rgp_hardware_stages radv_mesa_to_rgp_shader_stage(struct radv_pipeline *pipeline, gl_shader_stage stage) { struct radv_shader_variant *shader = pipeline->shaders[stage]; switch (stage) { case MESA_SHADER_VERTEX: if (shader->info.vs.as_ls) return RGP_HW_STAGE_LS; else if (shader->info.vs.as_es) return RGP_HW_STAGE_ES; else if (shader->info.is_ngg) return RGP_HW_STAGE_GS; else return RGP_HW_STAGE_VS; case MESA_SHADER_TESS_CTRL: return RGP_HW_STAGE_HS; case MESA_SHADER_TESS_EVAL: if (shader->info.tes.as_es) return RGP_HW_STAGE_ES; else if (shader->info.is_ngg) return RGP_HW_STAGE_GS; else return RGP_HW_STAGE_VS; case MESA_SHADER_GEOMETRY: return RGP_HW_STAGE_GS; case MESA_SHADER_FRAGMENT: return RGP_HW_STAGE_PS; case MESA_SHADER_COMPUTE: return RGP_HW_STAGE_CS; default: unreachable("invalid mesa shader stage"); } } static VkResult radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline) { struct ac_thread_trace_data *thread_trace_data = &device->thread_trace; struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object; struct rgp_code_object_record *record; record = malloc(sizeof(struct rgp_code_object_record)); if (!record) return VK_ERROR_OUT_OF_HOST_MEMORY; record->shader_stages_mask = 0; record->num_shaders_combined = 0; record->pipeline_hash[0] = pipeline->pipeline_hash; record->pipeline_hash[1] = pipeline->pipeline_hash; for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { struct radv_shader_variant *shader = pipeline->shaders[i]; uint8_t *code; uint64_t va; if (!shader) continue; code = malloc(shader->code_size); if (!code) { free(record); return VK_ERROR_OUT_OF_HOST_MEMORY; } memcpy(code, shader->code_ptr, shader->code_size); va = radv_buffer_get_va(shader->bo) + shader->bo_offset; record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader; record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32; record->shader_data[i].code_size = shader->code_size; record->shader_data[i].code = code; record->shader_data[i].vgpr_count = shader->config.num_vgprs; record->shader_data[i].sgpr_count = shader->config.num_sgprs; record->shader_data[i].base_address = va & 0xffffffffffff; record->shader_data[i].elf_symbol_offset = 0; record->shader_data[i].hw_stage = radv_mesa_to_rgp_shader_stage(pipeline, i); record->shader_data[i].is_combined = false; record->shader_stages_mask |= (1 << i); record->num_shaders_combined++; } simple_mtx_lock(&code_object->lock); list_addtail(&record->list, &code_object->record); code_object->record_count++; simple_mtx_unlock(&code_object->lock); return VK_SUCCESS; } static VkResult radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline) { bool result; uint64_t base_va = ~0; result = ac_sqtt_add_pso_correlation(&device->thread_trace, pipeline->pipeline_hash); if (!result) return VK_ERROR_OUT_OF_HOST_MEMORY; /* Find the lowest shader BO VA. */ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { struct radv_shader_variant *shader = pipeline->shaders[i]; uint64_t va; if (!shader) continue; va = radv_buffer_get_va(shader->bo) + shader->bo_offset; base_va = MIN2(base_va, va); } result = ac_sqtt_add_code_object_loader_event(&device->thread_trace, pipeline->pipeline_hash, base_va); if (!result) return VK_ERROR_OUT_OF_HOST_MEMORY; result = radv_add_code_object(device, pipeline); if (result != VK_SUCCESS) return result; return VK_SUCCESS; } static void radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipeline) { struct ac_thread_trace_data *thread_trace_data = &device->thread_trace; struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation; struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events; struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object; /* Destroy the PSO correlation record. */ simple_mtx_lock(&pso_correlation->lock); list_for_each_entry_safe(struct rgp_pso_correlation_record, record, &pso_correlation->record, list) { if (record->pipeline_hash[0] == pipeline->pipeline_hash) { pso_correlation->record_count--; list_del(&record->list); free(record); break; } } simple_mtx_unlock(&pso_correlation->lock); /* Destroy the code object loader record. */ simple_mtx_lock(&loader_events->lock); list_for_each_entry_safe(struct rgp_loader_events_record, record, &loader_events->record, list) { if (record->code_object_hash[0] == pipeline->pipeline_hash) { loader_events->record_count--; list_del(&record->list); free(record); break; } } simple_mtx_unlock(&loader_events->lock); /* Destroy the code object record. */ simple_mtx_lock(&code_object->lock); list_for_each_entry_safe(struct rgp_code_object_record, record, &code_object->record, list) { if (record->pipeline_hash[0] == pipeline->pipeline_hash) { uint32_t mask = record->shader_stages_mask; int i; /* Free the disassembly. */ while (mask) { i = u_bit_scan(&mask); free(record->shader_data[i].code); } code_object->record_count--; list_del(&record->list); free(record); break; } } simple_mtx_unlock(&code_object->lock); } VkResult sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { RADV_FROM_HANDLE(radv_device, device, _device); VkResult result; result = radv_CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines); if (result != VK_SUCCESS) return result; if (radv_sqtt_dump_pipeline()) { for (unsigned i = 0; i < count; i++) { RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]); if (!pipeline) continue; result = radv_register_pipeline(device, pipeline); if (result != VK_SUCCESS) goto fail; } } return VK_SUCCESS; fail: for (unsigned i = 0; i < count; i++) { sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator); pPipelines[i] = VK_NULL_HANDLE; } return result; } VkResult sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { RADV_FROM_HANDLE(radv_device, device, _device); VkResult result; result = radv_CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines); if (result != VK_SUCCESS) return result; if (radv_sqtt_dump_pipeline()) { for (unsigned i = 0; i < count; i++) { RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]); if (!pipeline) continue; result = radv_register_pipeline(device, pipeline); if (result != VK_SUCCESS) goto fail; } } return VK_SUCCESS; fail: for (unsigned i = 0; i < count; i++) { sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator); pPipelines[i] = VK_NULL_HANDLE; } return result; } void sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); if (!_pipeline) return; if (radv_sqtt_dump_pipeline()) radv_unregister_pipeline(device, pipeline); radv_DestroyPipeline(_device, _pipeline, pAllocator); } #undef API_MARKER