diff options
-rw-r--r-- | src/freedreno/vulkan/tu_clear_blit.cc | 25 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_cmd_buffer.cc | 49 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_cmd_buffer.h | 14 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_device.cc | 7 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_pass.cc | 4 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_pass.h | 1 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_pipeline.cc | 96 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_pipeline.h | 36 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_shader.cc | 101 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_shader.h | 11 |
10 files changed, 285 insertions, 59 deletions
diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 798eadf0aba..cf21073e35a 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -3959,10 +3959,9 @@ fdm_apply_sysmem_clear_coords(struct tu_cmd_buffer *cmd, { const struct apply_sysmem_clear_coords_state *state = (const struct apply_sysmem_clear_coords_state *)data; - assert(state->view < views); - VkExtent2D frag_area = frag_areas[state->view]; - VkRect2D bin = bins[state->view]; + VkExtent2D frag_area = frag_areas[MIN2(state->view, views - 1)]; + VkRect2D bin = bins[MIN2(state->view, views - 1)]; VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin, common_bin_offset); @@ -4233,10 +4232,9 @@ fdm_apply_gmem_clear_coords(struct tu_cmd_buffer *cmd, { const struct apply_gmem_clear_coords_state *state = (const struct apply_gmem_clear_coords_state *)data; - assert(state->view < views); - VkExtent2D frag_area = frag_areas[state->view]; - VkRect2D bin = bins[state->view]; + VkExtent2D frag_area = frag_areas[MIN2(state->view, views - 1)]; + VkRect2D bin = bins[MIN2(state->view, views - 1)]; VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin, common_bin_offset); @@ -4869,9 +4867,8 @@ fdm_apply_load_coords(struct tu_cmd_buffer *cmd, { const struct apply_load_coords_state *state = (const struct apply_load_coords_state *)data; - assert(state->view < views); - VkExtent2D frag_area = frag_areas[state->view]; - VkRect2D bin = bins[state->view]; + VkExtent2D frag_area = frag_areas[MIN2(state->view, views - 1)]; + VkRect2D bin = bins[MIN2(state->view, views - 1)]; assert(bin.extent.width % frag_area.width == 0); assert(bin.extent.height % frag_area.height == 0); @@ -4923,7 +4920,7 @@ load_3d_blit(struct tu_cmd_buffer *cmd, for_each_layer(i, att->clear_views, cmd->state.framebuffer->layers) { if (cmd->state.pass->has_fdm) { struct apply_load_coords_state state = { - .view = att->clear_views ? i : 0, + .view = i, }; tu_create_fdm_bin_patchpoint(cmd, cs, 4, TU_FDM_SKIP_BINNING, fdm_apply_load_coords, state); @@ -5342,9 +5339,8 @@ fdm_apply_store_coords(struct tu_cmd_buffer *cmd, { const struct apply_store_coords_state *state = (const struct apply_store_coords_state *)data; - assert(state->view < views); - VkExtent2D frag_area = frag_areas[state->view]; - VkRect2D bin = bins[state->view]; + VkExtent2D frag_area = frag_areas[MIN2(state->view, views - 1)]; + VkRect2D bin = bins[MIN2(state->view, views - 1)]; /* The bin width/height must be a multiple of the frag_area to make sure * that the scaling happens correctly. This means there may be some @@ -5496,9 +5492,8 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, for_each_layer (i, layer_mask, layers) { if (cmd->state.pass->has_fdm) { - unsigned view = layer_mask ? i : 0; struct apply_store_coords_state state = { - .view = view, + .view = i, }; tu_create_fdm_bin_patchpoint(cmd, cs, 8, TU_FDM_SKIP_BINNING, fdm_apply_store_coords, state); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 8d0d3b4fc93..f0e9cc7b0f9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -1224,6 +1224,13 @@ tu_bin_offset(VkOffset2D fdm_offset, const struct tu_tiling_config *tiling) }; } +static uint32_t +tu_fdm_num_layers(const struct tu_cmd_buffer *cmd) +{ + return cmd->state.pass->num_views ? cmd->state.pass->num_views : + (cmd->state.fdm_per_layer ? cmd->state.framebuffer->layers : 1); +} + template <chip CHIP> static void tu6_emit_tile_select(struct tu_cmd_buffer *cmd, @@ -1301,8 +1308,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, tu_cs_emit(cs, 0x0); if (fdm) { - unsigned views = - cmd->state.pass->num_views ? cmd->state.pass->num_views : 1; + unsigned views = tu_fdm_num_layers(cmd); VkRect2D bin = { { x1, y1 }, { (x2 - x1) * tile->extent.width, (y2 - y1) * tile->extent.height } @@ -1991,7 +1997,7 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs, */ if ((!(cmd->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) || fdm_offsets) && cmd->fdm_bin_patchpoints.size != 0) { - unsigned num_views = MAX2(cmd->state.pass->num_views, 1); + unsigned num_views = tu_fdm_num_layers(cmd); VkExtent2D unscaled_frag_areas[num_views]; VkRect2D bins[num_views]; for (unsigned i = 0; i < num_views; i++) { @@ -2675,8 +2681,7 @@ tu_calc_frag_area(struct tu_cmd_buffer *cmd, const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE); const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE); - unsigned views = - cmd->state.pass->num_views ? cmd->state.pass->num_views : 1; + unsigned views = tu_fdm_num_layers(cmd); const struct tu_framebuffer *fb = cmd->state.framebuffer; struct tu_frag_area raw_areas[views]; if (fdm) { @@ -2894,8 +2899,7 @@ tu_render_pipe_fdm(struct tu_cmd_buffer *cmd, uint32_t pipe, { uint32_t width = tx2 - tx1; uint32_t height = ty2 - ty1; - unsigned views = - cmd->state.pass->num_views ? cmd->state.pass->num_views : 1; + unsigned views = tu_fdm_num_layers(cmd); bool has_abs_mask = cmd->device->physical_device->info->a7xx.has_abs_bin_mask; @@ -4491,6 +4495,15 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, pipeline->shaders[MESA_SHADER_FRAGMENT]->fs.has_fdm; } + if (pipeline->program.per_layer_viewport != cmd->state.per_layer_viewport || + pipeline->shaders[MESA_SHADER_FRAGMENT]->fs.max_fdm_layers != + cmd->state.max_fdm_layers) { + cmd->state.per_layer_viewport = pipeline->program.per_layer_viewport; + cmd->state.max_fdm_layers = + pipeline->shaders[MESA_SHADER_FRAGMENT]->fs.max_fdm_layers; + cmd->state.dirty |= TU_CMD_DIRTY_FDM; + } + if (pipeline->program.per_view_viewport != cmd->state.per_view_viewport || pipeline->program.fake_single_viewport != cmd->state.fake_single_viewport) { cmd->state.per_view_viewport = pipeline->program.per_view_viewport; @@ -5554,6 +5567,7 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, cmd->state.subpass = pass->subpasses; cmd->state.framebuffer = fb; cmd->state.render_area = pRenderPassBegin->renderArea; + cmd->state.fdm_per_layer = pass->has_layered_fdm; if (pass->attachment_count > 0) { VK_MULTIALLOC(ma); @@ -5622,6 +5636,8 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer, cmd->state.subpass = &cmd->dynamic_subpass; cmd->state.framebuffer = &cmd->dynamic_framebuffer; cmd->state.render_area = pRenderingInfo->renderArea; + cmd->state.fdm_per_layer = + pRenderingInfo->flags & VK_RENDERING_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE; cmd->state.blit_cache_cleaned = false; cmd->state.attachments = cmd->dynamic_attachments; @@ -6417,11 +6433,14 @@ fdm_apply_fs_params(struct tu_cmd_buffer *cmd, unsigned num_consts = state->num_consts; for (unsigned i = 0; i < num_consts; i++) { - assert(i < views); - VkExtent2D area = frag_areas[i]; - VkRect2D bin = bins[i]; + /* FDM per layer may be enabled in the shader but not in the renderpass, + * in which case views will be 1 and we have to replicate the one view + * to all of the layers. + */ + VkExtent2D area = frag_areas[MIN2(i, views - 1)]; + VkRect2D bin = bins[MIN2(i, views - 1)]; VkOffset2D offset = tu_fdm_per_bin_offset(area, bin, common_bin_offset); - + tu_cs_emit(cs, area.width); tu_cs_emit(cs, area.height); tu_cs_emit(cs, fui(offset.x)); @@ -7984,8 +8003,7 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer, VkOffset2D test_offsets[MAX_VIEWS]; if (TU_DEBUG(FDM) && TU_DEBUG(FDM_OFFSET)) { - for (unsigned i = 0; - i < MAX2(cmd_buffer->state.pass->num_views, 1); i++) { + for (unsigned i = 0; i < tu_fdm_num_layers(cmd_buffer); i++) { test_offsets[i] = { 64, 64 }; } fdm_offsets = test_offsets; @@ -8030,8 +8048,7 @@ tu_CmdEndRendering2EXT(VkCommandBuffer commandBuffer, VkOffset2D test_offsets[MAX_VIEWS]; if (TU_DEBUG(FDM) && TU_DEBUG(FDM_OFFSET)) { - for (unsigned i = 0; - i < MAX2(cmd_buffer->state.pass->num_views, 1); i++) { + for (unsigned i = 0; i < tu_fdm_num_layers(cmd_buffer); i++) { test_offsets[i] = { 64, 64 }; } fdm_offsets = test_offsets; @@ -8047,7 +8064,7 @@ tu_CmdEndRendering2EXT(VkCommandBuffer commandBuffer, if (fdm_offsets) { memcpy(cmd_buffer->pre_chain.fdm_offsets, fdm_offsets, sizeof(VkOffset2D) * - MAX2(cmd_buffer->state.pass->num_views, 1)); + tu_fdm_num_layers(cmd_buffer)); } /* Even we don't call tu_cmd_render here, renderpass is finished diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index a2570255ab2..6f095f415be 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -456,9 +456,23 @@ struct tu_cmd_state bool has_fdm; /* See tu_pipeline::per_view_viewport */ bool per_view_viewport; + /* See tu_pipeline::per_layer_viewport */ + bool per_layer_viewport; /* See tu_pipeline::fake_single_viewport */ bool fake_single_viewport; + /* If per_layer_viewport is true, the maximum number of layers rendered to. + * We need to save this because we might not necessarily know the number of + * layers in some corner cases and we need to know this in order to know + * how many viewports to emit. + */ + uint8_t max_fdm_layers; + + /* Set in CmdBeginRendering/CmdBeginRenderPass2, whether the FDM should be + * sampled per layer. + */ + bool fdm_per_layer; + /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */ struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; struct tu_draw_state vertex_buffers; diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index cbc2034557d..0316928a202 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -352,6 +352,7 @@ get_device_extensions(const struct tu_physical_device *device, .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic, .NV_compute_shader_derivatives = device->info->chip >= 7, .QCOM_fragment_density_map_offset = true, + .VALVE_fragment_density_map_layered = true, .VALVE_mutable_descriptor_type = true, } }; @@ -778,6 +779,9 @@ tu_get_features(struct tu_physical_device *pdevice, /* VK_KHR_unified_layouts */ features->unifiedImageLayouts = true; features->unifiedImageLayoutsVideo = false; + + /* VK_VALVE_fragment_density_map_layered */ + features->fragmentDensityMapLayered = true; } static void @@ -1436,6 +1440,9 @@ tu_get_properties(struct tu_physical_device *pdevice, props->fragmentDensityOffsetGranularity = (VkExtent2D) { TU_FDM_OFFSET_GRANULARITY, TU_FDM_OFFSET_GRANULARITY }; + + /* VK_VALVE_fragment_density_map_layered */ + props->maxFragmentDensityMapLayers = MAX_VIEWS; } static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = { diff --git a/src/freedreno/vulkan/tu_pass.cc b/src/freedreno/vulkan/tu_pass.cc index ea4ea3583dd..09f8d6a0f6d 100644 --- a/src/freedreno/vulkan/tu_pass.cc +++ b/src/freedreno/vulkan/tu_pass.cc @@ -999,6 +999,8 @@ tu_CreateRenderPass2(VkDevice _device, pass->fragment_density_map.attachment = fdm_info->fragmentDensityMapAttachment.attachment; pass->has_fdm = true; + if (pCreateInfo->flags & VK_RENDER_PASS_CREATE_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE) + pass->has_layered_fdm = true; } else { pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED; } @@ -1344,6 +1346,8 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE); pass->has_fdm = true; + if (info->flags & VK_RENDERING_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE) + pass->has_layered_fdm = true; } else { pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED; pass->has_fdm = false; diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h index fbe36d838cb..612c5e5402e 100644 --- a/src/freedreno/vulkan/tu_pass.h +++ b/src/freedreno/vulkan/tu_pass.h @@ -139,6 +139,7 @@ struct tu_render_pass bool has_cond_load_store; bool has_fdm; bool allow_ib2_skipping; + bool has_layered_fdm; struct tu_subpass_barrier end_barrier; struct tu_subpass subpasses[0]; diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 80c29d6119d..2e8932293be 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -282,6 +282,8 @@ struct tu_pipeline_builder VkShaderStageFlags active_stages; bool fragment_density_map; + bool fdm_per_layer; + uint8_t max_fdm_layers; struct vk_graphics_pipeline_all_state all_state; struct vk_graphics_pipeline_state graphics_state; @@ -1799,6 +1801,16 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { keys[MESA_SHADER_VERTEX].multiview_mask = builder->graphics_state.rp->view_mask; + + gl_shader_stage last_pre_rast_stage = MESA_SHADER_VERTEX; + for (int i = MESA_SHADER_GEOMETRY; i >= MESA_SHADER_VERTEX; i--) { + if (nir[i]) { + last_pre_rast_stage = (gl_shader_stage)i; + break; + } + } + + keys[last_pre_rast_stage].fdm_per_layer = builder->fdm_per_layer; } if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { @@ -1806,6 +1818,9 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, builder->graphics_state.rp->view_mask; keys[MESA_SHADER_FRAGMENT].fragment_density_map = builder->fragment_density_map; + keys[MESA_SHADER_FRAGMENT].fdm_per_layer = + builder->fdm_per_layer; + keys[MESA_SHADER_FRAGMENT].max_fdm_layers = builder->max_fdm_layers; keys[MESA_SHADER_FRAGMENT].unscaled_input_fragcoord = builder->unscaled_input_fragcoord; @@ -2309,20 +2324,27 @@ tu_emit_program_state(struct tu_cs *sub_cs, tu6_emit_vpc<CHIP>(&prog_cs, vs, hs, ds, gs, fs); prog->vpc_state = tu_cs_end_draw_state(sub_cs, &prog_cs); - const struct ir3_shader_variant *last_shader; - if (gs) - last_shader = gs; - else if (ds) - last_shader = ds; - else - last_shader = vs; + const struct ir3_shader_variant *last_variant; + const struct tu_shader *last_shader; + if (gs) { + last_shader = shaders[MESA_SHADER_GEOMETRY]; + last_variant = gs; + } else if (ds) { + last_shader = shaders[MESA_SHADER_TESS_EVAL]; + last_variant = ds; + } else { + last_shader = shaders[MESA_SHADER_VERTEX]; + last_variant = vs; + } prog->per_view_viewport = - !last_shader->writes_viewport && + !last_variant->writes_viewport && shaders[MESA_SHADER_FRAGMENT]->fs.has_fdm && dev->physical_device->info->a6xx.has_per_view_viewport; - prog->fake_single_viewport = prog->per_view_viewport; - prog->writes_shading_rate = last_shader->writes_shading_rate; + prog->per_layer_viewport = last_shader->per_layer_viewport; + prog->fake_single_viewport = prog->per_view_viewport || + prog->per_layer_viewport; + prog->writes_shading_rate = last_variant->writes_shading_rate; prog->reads_shading_rate = fs->reads_shading_rate; prog->accesses_smask = fs->reads_smask || fs->writes_smask; } @@ -2613,9 +2635,15 @@ fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, * same across all views, we can pick any view. However the number * of viewports and number of views is not guaranteed the same, so we * need to pick the 0'th view which always exists to be safe. + * + * If FDM per layer is enabled in the shader but disabled by the + * renderpass, views will be 1 and we also have to replicate the 0'th + * view to every view. */ - VkExtent2D frag_area = state->share_scale ? frag_areas[0] : frag_areas[i]; - VkRect2D bin = state->share_scale ? bins[0] : bins[i]; + VkExtent2D frag_area = + (state->share_scale || views == 1) ? frag_areas[0] : frag_areas[i]; + VkRect2D bin = + (state->share_scale || views == 1) ? bins[0] : bins[i]; /* Implement fake_single_viewport by replicating viewport 0 across all * views. */ @@ -2655,16 +2683,20 @@ tu6_emit_viewport_fdm(struct tu_cs *cs, struct tu_cmd_buffer *cmd, struct apply_viewport_state state = { .vp = *vp, .rs = *rs, - .share_scale = !cmd->state.per_view_viewport, + .share_scale = !cmd->state.per_view_viewport && + !cmd->state.per_layer_viewport, .fake_single_viewport = cmd->state.fake_single_viewport, }; - if (!state.share_scale) + if (cmd->state.per_view_viewport) state.vp.viewport_count = num_views; + else if (cmd->state.per_layer_viewport) + state.vp.viewport_count = cmd->state.max_fdm_layers; unsigned size = TU_CALLX(cmd->device, tu6_viewport_size)(cmd->device, &state.vp, &state.rs); tu_cs_begin_sub_stream(&cmd->sub_cs, size, cs); tu_create_fdm_bin_patchpoint(cmd, cs, size, TU_FDM_NONE, fdm_apply_viewports, state); - cmd->state.rp.shared_viewport |= !cmd->state.per_view_viewport; + cmd->state.rp.shared_viewport |= !cmd->state.per_view_viewport && + !cmd->state.program.per_layer_viewport; } static const enum mesa_vk_dynamic_graphics_state tu_scissor_state[] = { @@ -2723,8 +2755,10 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data, struct vk_viewport_state vp = state->vp; for (unsigned i = 0; i < vp.scissor_count; i++) { - VkExtent2D frag_area = state->share_scale ? frag_areas[0] : frag_areas[i]; - VkRect2D bin = state->share_scale ? bins[0] : bins[i]; + VkExtent2D frag_area = + (state->share_scale || views == 1) ? frag_areas[0] : frag_areas[i]; + VkRect2D bin = + (state->share_scale || views == 1) ? bins[0] : bins[i]; VkRect2D scissor = state->fake_single_viewport ? state->vp.scissors[0] : state->vp.scissors[i]; @@ -2768,11 +2802,14 @@ tu6_emit_scissor_fdm(struct tu_cs *cs, struct tu_cmd_buffer *cmd, unsigned num_views = MAX2(cmd->state.pass->num_views, 1); struct apply_viewport_state state = { .vp = *vp, - .share_scale = !cmd->state.per_view_viewport, + .share_scale = !cmd->state.per_view_viewport && + !cmd->state.per_layer_viewport, .fake_single_viewport = cmd->state.fake_single_viewport, }; - if (!state.share_scale) + if (cmd->state.per_view_viewport) state.vp.scissor_count = num_views; + else if (cmd->state.per_layer_viewport) + state.vp.scissor_count = cmd->state.max_fdm_layers; unsigned size = TU_CALLX(cmd->device, tu6_scissor_size)(cmd->device, &state.vp); tu_cs_begin_sub_stream(&cmd->sub_cs, size, cs); tu_create_fdm_bin_patchpoint(cmd, cs, size, TU_FDM_NONE, fdm_apply_scissors, @@ -3692,7 +3729,8 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder, * to set viewport and stencil state dynamically. */ bool no_per_view_viewport = pipeline_contains_all_shader_state(pipeline) && - !pipeline->program.per_view_viewport; + !pipeline->program.per_view_viewport && + !pipeline->program.per_layer_viewport; DRAW_STATE_COND(viewport, TU_DYNAMIC_STATE_VIEWPORT, no_per_view_viewport, builder->graphics_state.vp, builder->graphics_state.rs); @@ -3912,7 +3950,7 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd) (TU_CMD_DIRTY_FDM | \ TU_CMD_DIRTY_PER_VIEW_VIEWPORT))) && \ !(cmd->state.pipeline_draw_states & (1u << id))) { \ - if (cmd->state.has_fdm) { \ + if (cmd->state.has_fdm || cmd->state.per_layer_viewport) { \ tu_cs_set_writeable(&cmd->sub_cs, true); \ tu6_emit_##name##_fdm(&cs, cmd, __VA_ARGS__); \ cmd->state.dynamic_state[id] = \ @@ -4501,6 +4539,11 @@ tu_pipeline_builder_init_graphics( VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR; } + if (pass->has_layered_fdm) { + rp_flags |= + VK_PIPELINE_CREATE_2_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE; + } + builder->unscaled_input_fragcoord = 0; for (unsigned i = 0; i < subpass->input_count; i++) { /* Input attachments stored in GMEM must be loaded with unscaled @@ -4526,6 +4569,17 @@ tu_pipeline_builder_init_graphics( builder->fragment_density_map = (builder->graphics_state.pipeline_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT) || TU_DEBUG(FDM); + builder->fdm_per_layer = (builder->graphics_state.pipeline_flags & + VK_PIPELINE_CREATE_2_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE); + if (builder->fdm_per_layer) { + const VkPipelineFragmentDensityMapLayeredCreateInfoVALVE *fdm_layered_info = + vk_find_struct_const(create_info->pNext, + PIPELINE_FRAGMENT_DENSITY_MAP_LAYERED_CREATE_INFO_VALVE); + if (fdm_layered_info) { + builder->max_fdm_layers = + fdm_layered_info->maxFragmentDensityMapLayers; + } + } } } diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 53ec4723b3d..2c4573702e2 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -109,18 +109,46 @@ struct tu_program_state unsigned dynamic_descriptor_offsets[MAX_SETS]; + /* With FDM, we control the fragment area by overriding the viewport and + * scsissor. In order to have different areas for different views, we + * need to have a viewport/scissor per FDM layer. There are various + * possible scenarios based on the shader and whether multiview or + * per-layer sampling is enabled, that are communicated to the driver + * via the struct members below: + * + * - The shader writes gl_ViewportIndex, managing multiple viewports in + * a way that may not correspond to FDM layer: + * - Set everything to false. The driver will set shared_scale and + * apply the same scaling to all viewports/scissors. + * - Multiview is enabled: + * - Set per_view_viewport. + * - Set fake_single_viewport to splat viewport 0 to all viewports. + * - (Not implemented yet) if the user requests per-view + * viewports, don't set fake_single_viewport and let the user + * set multiple viewports that are transformed independently. + * - Multiview is not enabled and per-layer FDM sampling is enabled: + * - Inject code into shader and set per_layer_viewport. + * - Set fake_single_viewport to splat viewport 0 to all viewports. + */ + /* Whether the per-view-viewport feature should be enabled in HW. This * implicitly adds gl_ViewIndex to gl_ViewportIndex so that from a HW * point of view (but not necessarily the user's point of view!) there * is a viewport per view. */ bool per_view_viewport; - /* If per_view_viewport is true and this is true, the app has provided - * a single viewport and we need to fake it by duplicating the viewport - * across views before transforming each viewport separately using FDM - * state. + /* Whether gl_ViewportIndex has been set to gl_Layer, so that from a HW + * point of view (but not necessarily the user's point of view!) there + * is a viewport per view. + */ + bool per_layer_viewport; + /* If per_view_viewport or per_layer_viewport is true and this is true, + * the app has provided a single viewport and we need to fake it by + * duplicating the viewport across views before transforming each + * viewport separately using FDM state. */ bool fake_single_viewport; + bool writes_shading_rate; bool reads_shading_rate; bool accesses_smask; diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index f9af424f5f9..ace7a825f37 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -1012,6 +1012,7 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev, struct lower_fdm_options { unsigned num_views; bool adjust_fragcoord; + bool use_layer; }; static bool @@ -1039,14 +1040,16 @@ lower_fdm_instr(struct nir_builder *b, nir_instr *instr, void *data) nir_def *view; if (options->num_views > 1) { + gl_varying_slot slot = options->use_layer ? + VARYING_SLOT_LAYER : VARYING_SLOT_VIEW_INDEX; nir_variable *view_var = nir_find_variable_with_location(b->shader, nir_var_shader_in, - VARYING_SLOT_VIEW_INDEX); + slot); if (view_var == NULL) { view_var = nir_variable_create(b->shader, nir_var_shader_in, glsl_int_type(), NULL); - view_var->data.location = VARYING_SLOT_VIEW_INDEX; + view_var->data.location = slot; view_var->data.interpolation = INTERP_MODE_FLAT; view_var->data.driver_location = b->shader->num_inputs++; } @@ -1139,6 +1142,81 @@ tu_nir_lower_ssbo_descriptor(nir_shader *shader, (void *)dev); } +struct lower_fdm_state { + nir_variable *layer_var; + nir_variable *viewport_var; +}; + +static bool +lower_layered_fdm_instr(nir_builder *b, nir_intrinsic_instr *intrin, + void *cb) +{ + struct lower_fdm_state *state = (struct lower_fdm_state *)cb; + if (intrin->intrinsic != nir_intrinsic_store_deref) + return false; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (!nir_deref_mode_is(deref, nir_var_shader_out)) + return false; + + nir_variable *var = nir_deref_instr_get_variable(deref); + if (var != state->layer_var) + return false; + + /* Ok, we've finally got a store to gl_Layer. Mirror a store to + * gl_ViewportIndex. + */ + if (!state->viewport_var) { + state->viewport_var = + nir_create_variable_with_location(b->shader, + nir_var_shader_out, + VARYING_SLOT_VIEWPORT, + glsl_int_type()); + state->viewport_var->data.interpolation = INTERP_MODE_FLAT; + } + + b->cursor = nir_after_instr(&intrin->instr); + nir_store_var(b, state->viewport_var, intrin->src[1].ssa, 0x1); + return true; +} + +static bool +tu_nir_lower_layered_fdm(nir_shader *shader, + bool *per_layer_viewport) +{ + nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader); + + /* If viewport is alreay written, there's nothing to do and we will fall + * back. + */ + if (shader->info.outputs_written & VARYING_BIT_VIEWPORT) { + *per_layer_viewport = false; + return nir_no_progress(entrypoint); + } + + *per_layer_viewport = true; + + struct lower_fdm_state state = {}; + + state.layer_var = + nir_find_variable_with_location(shader, nir_var_shader_out, + VARYING_SLOT_LAYER); + + /* If layer is never written, it will get the default value of 0 and we can + * also leave the viewport with the default value of 0. + */ + if (!state.layer_var) + return nir_no_progress(entrypoint); + + state.viewport_var = + nir_find_variable_with_location(shader, nir_var_shader_out, + VARYING_SLOT_VIEWPORT); + + + return nir_shader_intrinsics_pass(shader, lower_layered_fdm_instr, + nir_metadata_control_flow, &state); +} + static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) { @@ -2451,6 +2529,7 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object, sizeof(shader->dynamic_descriptor_sizes)); blob_write_uint32(blob, shader->view_mask); blob_write_uint8(blob, shader->active_desc_sets); + blob_write_uint8(blob, shader->per_layer_viewport); ir3_store_variant(blob, shader->variant); @@ -2496,6 +2575,7 @@ tu_shader_deserialize(struct vk_pipeline_cache *cache, sizeof(shader->dynamic_descriptor_sizes)); shader->view_mask = blob_read_uint32(blob); shader->active_desc_sets = blob_read_uint8(blob); + shader->per_layer_viewport = blob_read_uint8(blob); shader->variant = ir3_retrieve_variant(blob, dev->compiler, NULL); @@ -2568,11 +2648,26 @@ tu_shader_create(struct tu_device *dev, * lower input attachment coordinates except if unscaled. */ const struct lower_fdm_options fdm_options = { - .num_views = MAX2(util_last_bit(key->multiview_mask), 1), + .num_views = MAX2(key->multiview_mask ? + util_last_bit(key->multiview_mask) : + key->max_fdm_layers, 1), .adjust_fragcoord = key->fragment_density_map, + .use_layer = !key->multiview_mask, }; NIR_PASS(_, nir, tu_nir_lower_fdm, &fdm_options); + if (nir->info.stage != MESA_SHADER_FRAGMENT && + nir->info.stage != MESA_SHADER_COMPUTE && + !key->multiview_mask && + key->fdm_per_layer) { + NIR_PASS(_, nir, tu_nir_lower_layered_fdm, &shader->per_layer_viewport); + } + + if (nir->info.stage == MESA_SHADER_FRAGMENT && + key->fdm_per_layer) { + shader->fs.max_fdm_layers = key->max_fdm_layers; + } + /* Note that nir_opt_barrier_modes here breaks tests such as * dEQP-VK.memory_model.message_passing.ext.u32.coherent.fence_atomic.atomicwrite.device.payload_local.image.guard_local.buffer.vert */ diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index 983796f79d7..6e9066a989d 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -82,6 +82,11 @@ struct tu_shader */ int dynamic_descriptor_sizes[MAX_SETS]; + /* For all shader types other than FS, store whether the viewport was + * rewritten to equal the layer. + */ + bool per_layer_viewport; + union { struct { unsigned patch_type; @@ -99,6 +104,10 @@ struct tu_shader uint32_t status; bool force_late_z; } lrz; + + /* If per_layer_viewport is true, the maximum number of layers written to. + */ + uint8_t max_fdm_layers; } fs; }; }; @@ -106,8 +115,10 @@ struct tu_shader struct tu_shader_key { unsigned multiview_mask; uint16_t read_only_input_attachments; + uint8_t max_fdm_layers; bool force_sample_interp; bool fragment_density_map; + bool fdm_per_layer; bool dynamic_renderpass; uint8_t unscaled_input_fragcoord; bool robust_storage_access2; |