From 88f42ea93e0dbe16ef56c965083d2d9324a423cf Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 9 Feb 2021 14:07:45 +0100 Subject: radv: emit the trap handler registers earlier Directly into the GFX init IB. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_device.c | 42 ------------------------------------------ src/amd/vulkan/si_cmd_buffer.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 828548f766e..ffd2696a92e 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -3629,47 +3629,6 @@ radv_emit_global_shader_pointers(struct radv_queue *queue, } } -static void -radv_emit_trap_handler(struct radv_queue *queue, - struct radeon_cmdbuf *cs, - struct radeon_winsys_bo *tma_bo) -{ - struct radv_device *device = queue->device; - struct radeon_winsys_bo *tba_bo; - uint64_t tba_va, tma_va; - - if (!device->trap_handler_shader || !tma_bo) - return; - - tba_bo = device->trap_handler_shader->bo; - - tba_va = radv_buffer_get_va(tba_bo) + device->trap_handler_shader->bo_offset; - tma_va = radv_buffer_get_va(tma_bo); - - if (queue->queue_family_index == RADV_QUEUE_GENERAL) { - uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS, - R_00B100_SPI_SHADER_TBA_LO_VS, - R_00B200_SPI_SHADER_TBA_LO_GS, - R_00B300_SPI_SHADER_TBA_LO_ES, - R_00B400_SPI_SHADER_TBA_LO_HS, - R_00B500_SPI_SHADER_TBA_LO_LS}; - - for (int i = 0; i < ARRAY_SIZE(regs); ++i) { - radeon_set_sh_reg_seq(cs, regs[i], 4); - radeon_emit(cs, tba_va >> 8); - radeon_emit(cs, tba_va >> 40); - radeon_emit(cs, tma_va >> 8); - radeon_emit(cs, tma_va >> 40); - } - } else { - radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4); - radeon_emit(cs, tba_va >> 8); - radeon_emit(cs, tba_va >> 40); - radeon_emit(cs, tma_va >> 8); - radeon_emit(cs, tma_va >> 40); - } -} - static void radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue) { @@ -3975,7 +3934,6 @@ radv_get_preamble_cs(struct radv_queue *queue, compute_scratch_waves, compute_scratch_bo); radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave, scratch_waves, scratch_bo); - radv_emit_trap_handler(queue, cs, queue->device->tma_bo); if (gds_bo) radv_cs_add_buffer(queue->device->ws, cs, gds_bo); diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index e529167b25c..39ac7abc198 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -140,6 +140,22 @@ si_emit_compute(struct radv_device *device, radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8); } } + + if (device->tma_bo) { + uint64_t tba_va, tma_va; + + assert(device->physical_device->rad_info.chip_class == GFX8); + + tba_va = radv_buffer_get_va(device->trap_handler_shader->bo) + + device->trap_handler_shader->bo_offset; + tma_va = radv_buffer_get_va(device->tma_bo); + + radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4); + radeon_emit(cs, tba_va >> 8); + radeon_emit(cs, tba_va >> 40); + radeon_emit(cs, tma_va >> 8); + radeon_emit(cs, tma_va >> 40); + } } /* 12.4 fixed-point */ @@ -605,6 +621,31 @@ si_emit_graphics(struct radv_device *device, S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); + if (device->tma_bo) { + uint64_t tba_va, tma_va; + + assert(device->physical_device->rad_info.chip_class == GFX8); + + tba_va = radv_buffer_get_va(device->trap_handler_shader->bo) + + device->trap_handler_shader->bo_offset; + tma_va = radv_buffer_get_va(device->tma_bo); + + uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS, + R_00B100_SPI_SHADER_TBA_LO_VS, + R_00B200_SPI_SHADER_TBA_LO_GS, + R_00B300_SPI_SHADER_TBA_LO_ES, + R_00B400_SPI_SHADER_TBA_LO_HS, + R_00B500_SPI_SHADER_TBA_LO_LS}; + + for (i = 0; i < ARRAY_SIZE(regs); ++i) { + radeon_set_sh_reg_seq(cs, regs[i], 4); + radeon_emit(cs, tba_va >> 8); + radeon_emit(cs, tba_va >> 40); + radeon_emit(cs, tma_va >> 8); + radeon_emit(cs, tma_va >> 40); + } + } + si_emit_compute(device, cs); } -- cgit v1.2.3