summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/freedreno/vulkan/tu_clear_blit.cc25
-rw-r--r--src/freedreno/vulkan/tu_cmd_buffer.cc49
-rw-r--r--src/freedreno/vulkan/tu_cmd_buffer.h14
-rw-r--r--src/freedreno/vulkan/tu_device.cc7
-rw-r--r--src/freedreno/vulkan/tu_pass.cc4
-rw-r--r--src/freedreno/vulkan/tu_pass.h1
-rw-r--r--src/freedreno/vulkan/tu_pipeline.cc96
-rw-r--r--src/freedreno/vulkan/tu_pipeline.h36
-rw-r--r--src/freedreno/vulkan/tu_shader.cc101
-rw-r--r--src/freedreno/vulkan/tu_shader.h11
10 files changed, 285 insertions, 59 deletions
diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc
index 798eadf0aba..cf21073e35a 100644
--- a/src/freedreno/vulkan/tu_clear_blit.cc
+++ b/src/freedreno/vulkan/tu_clear_blit.cc
@@ -3959,10 +3959,9 @@ fdm_apply_sysmem_clear_coords(struct tu_cmd_buffer *cmd,
{
const struct apply_sysmem_clear_coords_state *state =
(const struct apply_sysmem_clear_coords_state *)data;
- assert(state->view < views);
- VkExtent2D frag_area = frag_areas[state->view];
- VkRect2D bin = bins[state->view];
+ VkExtent2D frag_area = frag_areas[MIN2(state->view, views - 1)];
+ VkRect2D bin = bins[MIN2(state->view, views - 1)];
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin, common_bin_offset);
@@ -4233,10 +4232,9 @@ fdm_apply_gmem_clear_coords(struct tu_cmd_buffer *cmd,
{
const struct apply_gmem_clear_coords_state *state =
(const struct apply_gmem_clear_coords_state *)data;
- assert(state->view < views);
- VkExtent2D frag_area = frag_areas[state->view];
- VkRect2D bin = bins[state->view];
+ VkExtent2D frag_area = frag_areas[MIN2(state->view, views - 1)];
+ VkRect2D bin = bins[MIN2(state->view, views - 1)];
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin, common_bin_offset);
@@ -4869,9 +4867,8 @@ fdm_apply_load_coords(struct tu_cmd_buffer *cmd,
{
const struct apply_load_coords_state *state =
(const struct apply_load_coords_state *)data;
- assert(state->view < views);
- VkExtent2D frag_area = frag_areas[state->view];
- VkRect2D bin = bins[state->view];
+ VkExtent2D frag_area = frag_areas[MIN2(state->view, views - 1)];
+ VkRect2D bin = bins[MIN2(state->view, views - 1)];
assert(bin.extent.width % frag_area.width == 0);
assert(bin.extent.height % frag_area.height == 0);
@@ -4923,7 +4920,7 @@ load_3d_blit(struct tu_cmd_buffer *cmd,
for_each_layer(i, att->clear_views, cmd->state.framebuffer->layers) {
if (cmd->state.pass->has_fdm) {
struct apply_load_coords_state state = {
- .view = att->clear_views ? i : 0,
+ .view = i,
};
tu_create_fdm_bin_patchpoint(cmd, cs, 4, TU_FDM_SKIP_BINNING,
fdm_apply_load_coords, state);
@@ -5342,9 +5339,8 @@ fdm_apply_store_coords(struct tu_cmd_buffer *cmd,
{
const struct apply_store_coords_state *state =
(const struct apply_store_coords_state *)data;
- assert(state->view < views);
- VkExtent2D frag_area = frag_areas[state->view];
- VkRect2D bin = bins[state->view];
+ VkExtent2D frag_area = frag_areas[MIN2(state->view, views - 1)];
+ VkRect2D bin = bins[MIN2(state->view, views - 1)];
/* The bin width/height must be a multiple of the frag_area to make sure
* that the scaling happens correctly. This means there may be some
@@ -5496,9 +5492,8 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
for_each_layer (i, layer_mask, layers) {
if (cmd->state.pass->has_fdm) {
- unsigned view = layer_mask ? i : 0;
struct apply_store_coords_state state = {
- .view = view,
+ .view = i,
};
tu_create_fdm_bin_patchpoint(cmd, cs, 8, TU_FDM_SKIP_BINNING,
fdm_apply_store_coords, state);
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc
index 8d0d3b4fc93..f0e9cc7b0f9 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.cc
+++ b/src/freedreno/vulkan/tu_cmd_buffer.cc
@@ -1224,6 +1224,13 @@ tu_bin_offset(VkOffset2D fdm_offset, const struct tu_tiling_config *tiling)
};
}
+static uint32_t
+tu_fdm_num_layers(const struct tu_cmd_buffer *cmd)
+{
+ return cmd->state.pass->num_views ? cmd->state.pass->num_views :
+ (cmd->state.fdm_per_layer ? cmd->state.framebuffer->layers : 1);
+}
+
template <chip CHIP>
static void
tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
@@ -1301,8 +1308,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
tu_cs_emit(cs, 0x0);
if (fdm) {
- unsigned views =
- cmd->state.pass->num_views ? cmd->state.pass->num_views : 1;
+ unsigned views = tu_fdm_num_layers(cmd);
VkRect2D bin = {
{ x1, y1 },
{ (x2 - x1) * tile->extent.width, (y2 - y1) * tile->extent.height }
@@ -1991,7 +1997,7 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
*/
if ((!(cmd->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) ||
fdm_offsets) && cmd->fdm_bin_patchpoints.size != 0) {
- unsigned num_views = MAX2(cmd->state.pass->num_views, 1);
+ unsigned num_views = tu_fdm_num_layers(cmd);
VkExtent2D unscaled_frag_areas[num_views];
VkRect2D bins[num_views];
for (unsigned i = 0; i < num_views; i++) {
@@ -2675,8 +2681,7 @@ tu_calc_frag_area(struct tu_cmd_buffer *cmd,
const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE);
const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE);
- unsigned views =
- cmd->state.pass->num_views ? cmd->state.pass->num_views : 1;
+ unsigned views = tu_fdm_num_layers(cmd);
const struct tu_framebuffer *fb = cmd->state.framebuffer;
struct tu_frag_area raw_areas[views];
if (fdm) {
@@ -2894,8 +2899,7 @@ tu_render_pipe_fdm(struct tu_cmd_buffer *cmd, uint32_t pipe,
{
uint32_t width = tx2 - tx1;
uint32_t height = ty2 - ty1;
- unsigned views =
- cmd->state.pass->num_views ? cmd->state.pass->num_views : 1;
+ unsigned views = tu_fdm_num_layers(cmd);
bool has_abs_mask =
cmd->device->physical_device->info->a7xx.has_abs_bin_mask;
@@ -4491,6 +4495,15 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
pipeline->shaders[MESA_SHADER_FRAGMENT]->fs.has_fdm;
}
+ if (pipeline->program.per_layer_viewport != cmd->state.per_layer_viewport ||
+ pipeline->shaders[MESA_SHADER_FRAGMENT]->fs.max_fdm_layers !=
+ cmd->state.max_fdm_layers) {
+ cmd->state.per_layer_viewport = pipeline->program.per_layer_viewport;
+ cmd->state.max_fdm_layers =
+ pipeline->shaders[MESA_SHADER_FRAGMENT]->fs.max_fdm_layers;
+ cmd->state.dirty |= TU_CMD_DIRTY_FDM;
+ }
+
if (pipeline->program.per_view_viewport != cmd->state.per_view_viewport ||
pipeline->program.fake_single_viewport != cmd->state.fake_single_viewport) {
cmd->state.per_view_viewport = pipeline->program.per_view_viewport;
@@ -5554,6 +5567,7 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
cmd->state.subpass = pass->subpasses;
cmd->state.framebuffer = fb;
cmd->state.render_area = pRenderPassBegin->renderArea;
+ cmd->state.fdm_per_layer = pass->has_layered_fdm;
if (pass->attachment_count > 0) {
VK_MULTIALLOC(ma);
@@ -5622,6 +5636,8 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer,
cmd->state.subpass = &cmd->dynamic_subpass;
cmd->state.framebuffer = &cmd->dynamic_framebuffer;
cmd->state.render_area = pRenderingInfo->renderArea;
+ cmd->state.fdm_per_layer =
+ pRenderingInfo->flags & VK_RENDERING_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE;
cmd->state.blit_cache_cleaned = false;
cmd->state.attachments = cmd->dynamic_attachments;
@@ -6417,11 +6433,14 @@ fdm_apply_fs_params(struct tu_cmd_buffer *cmd,
unsigned num_consts = state->num_consts;
for (unsigned i = 0; i < num_consts; i++) {
- assert(i < views);
- VkExtent2D area = frag_areas[i];
- VkRect2D bin = bins[i];
+ /* FDM per layer may be enabled in the shader but not in the renderpass,
+ * in which case views will be 1 and we have to replicate the one view
+ * to all of the layers.
+ */
+ VkExtent2D area = frag_areas[MIN2(i, views - 1)];
+ VkRect2D bin = bins[MIN2(i, views - 1)];
VkOffset2D offset = tu_fdm_per_bin_offset(area, bin, common_bin_offset);
-
+
tu_cs_emit(cs, area.width);
tu_cs_emit(cs, area.height);
tu_cs_emit(cs, fui(offset.x));
@@ -7984,8 +8003,7 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
VkOffset2D test_offsets[MAX_VIEWS];
if (TU_DEBUG(FDM) && TU_DEBUG(FDM_OFFSET)) {
- for (unsigned i = 0;
- i < MAX2(cmd_buffer->state.pass->num_views, 1); i++) {
+ for (unsigned i = 0; i < tu_fdm_num_layers(cmd_buffer); i++) {
test_offsets[i] = { 64, 64 };
}
fdm_offsets = test_offsets;
@@ -8030,8 +8048,7 @@ tu_CmdEndRendering2EXT(VkCommandBuffer commandBuffer,
VkOffset2D test_offsets[MAX_VIEWS];
if (TU_DEBUG(FDM) && TU_DEBUG(FDM_OFFSET)) {
- for (unsigned i = 0;
- i < MAX2(cmd_buffer->state.pass->num_views, 1); i++) {
+ for (unsigned i = 0; i < tu_fdm_num_layers(cmd_buffer); i++) {
test_offsets[i] = { 64, 64 };
}
fdm_offsets = test_offsets;
@@ -8047,7 +8064,7 @@ tu_CmdEndRendering2EXT(VkCommandBuffer commandBuffer,
if (fdm_offsets) {
memcpy(cmd_buffer->pre_chain.fdm_offsets,
fdm_offsets, sizeof(VkOffset2D) *
- MAX2(cmd_buffer->state.pass->num_views, 1));
+ tu_fdm_num_layers(cmd_buffer));
}
/* Even we don't call tu_cmd_render here, renderpass is finished
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h
index a2570255ab2..6f095f415be 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.h
+++ b/src/freedreno/vulkan/tu_cmd_buffer.h
@@ -456,9 +456,23 @@ struct tu_cmd_state
bool has_fdm;
/* See tu_pipeline::per_view_viewport */
bool per_view_viewport;
+ /* See tu_pipeline::per_layer_viewport */
+ bool per_layer_viewport;
/* See tu_pipeline::fake_single_viewport */
bool fake_single_viewport;
+ /* If per_layer_viewport is true, the maximum number of layers rendered to.
+ * We need to save this because we might not necessarily know the number of
+ * layers in some corner cases and we need to know this in order to know
+ * how many viewports to emit.
+ */
+ uint8_t max_fdm_layers;
+
+ /* Set in CmdBeginRendering/CmdBeginRenderPass2, whether the FDM should be
+ * sampled per layer.
+ */
+ bool fdm_per_layer;
+
/* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
struct tu_draw_state vertex_buffers;
diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc
index cbc2034557d..0316928a202 100644
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@@ -352,6 +352,7 @@ get_device_extensions(const struct tu_physical_device *device,
.IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
.NV_compute_shader_derivatives = device->info->chip >= 7,
.QCOM_fragment_density_map_offset = true,
+ .VALVE_fragment_density_map_layered = true,
.VALVE_mutable_descriptor_type = true,
} };
@@ -778,6 +779,9 @@ tu_get_features(struct tu_physical_device *pdevice,
/* VK_KHR_unified_layouts */
features->unifiedImageLayouts = true;
features->unifiedImageLayoutsVideo = false;
+
+ /* VK_VALVE_fragment_density_map_layered */
+ features->fragmentDensityMapLayered = true;
}
static void
@@ -1436,6 +1440,9 @@ tu_get_properties(struct tu_physical_device *pdevice,
props->fragmentDensityOffsetGranularity = (VkExtent2D) {
TU_FDM_OFFSET_GRANULARITY, TU_FDM_OFFSET_GRANULARITY
};
+
+ /* VK_VALVE_fragment_density_map_layered */
+ props->maxFragmentDensityMapLayers = MAX_VIEWS;
}
static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
diff --git a/src/freedreno/vulkan/tu_pass.cc b/src/freedreno/vulkan/tu_pass.cc
index ea4ea3583dd..09f8d6a0f6d 100644
--- a/src/freedreno/vulkan/tu_pass.cc
+++ b/src/freedreno/vulkan/tu_pass.cc
@@ -999,6 +999,8 @@ tu_CreateRenderPass2(VkDevice _device,
pass->fragment_density_map.attachment =
fdm_info->fragmentDensityMapAttachment.attachment;
pass->has_fdm = true;
+ if (pCreateInfo->flags & VK_RENDER_PASS_CREATE_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE)
+ pass->has_layered_fdm = true;
} else {
pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED;
}
@@ -1344,6 +1346,8 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
VK_ATTACHMENT_STORE_OP_DONT_CARE,
VK_ATTACHMENT_STORE_OP_DONT_CARE);
pass->has_fdm = true;
+ if (info->flags & VK_RENDERING_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE)
+ pass->has_layered_fdm = true;
} else {
pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED;
pass->has_fdm = false;
diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h
index fbe36d838cb..612c5e5402e 100644
--- a/src/freedreno/vulkan/tu_pass.h
+++ b/src/freedreno/vulkan/tu_pass.h
@@ -139,6 +139,7 @@ struct tu_render_pass
bool has_cond_load_store;
bool has_fdm;
bool allow_ib2_skipping;
+ bool has_layered_fdm;
struct tu_subpass_barrier end_barrier;
struct tu_subpass subpasses[0];
diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc
index 80c29d6119d..2e8932293be 100644
--- a/src/freedreno/vulkan/tu_pipeline.cc
+++ b/src/freedreno/vulkan/tu_pipeline.cc
@@ -282,6 +282,8 @@ struct tu_pipeline_builder
VkShaderStageFlags active_stages;
bool fragment_density_map;
+ bool fdm_per_layer;
+ uint8_t max_fdm_layers;
struct vk_graphics_pipeline_all_state all_state;
struct vk_graphics_pipeline_state graphics_state;
@@ -1799,6 +1801,16 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
keys[MESA_SHADER_VERTEX].multiview_mask =
builder->graphics_state.rp->view_mask;
+
+ gl_shader_stage last_pre_rast_stage = MESA_SHADER_VERTEX;
+ for (int i = MESA_SHADER_GEOMETRY; i >= MESA_SHADER_VERTEX; i--) {
+ if (nir[i]) {
+ last_pre_rast_stage = (gl_shader_stage)i;
+ break;
+ }
+ }
+
+ keys[last_pre_rast_stage].fdm_per_layer = builder->fdm_per_layer;
}
if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
@@ -1806,6 +1818,9 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
builder->graphics_state.rp->view_mask;
keys[MESA_SHADER_FRAGMENT].fragment_density_map =
builder->fragment_density_map;
+ keys[MESA_SHADER_FRAGMENT].fdm_per_layer =
+ builder->fdm_per_layer;
+ keys[MESA_SHADER_FRAGMENT].max_fdm_layers = builder->max_fdm_layers;
keys[MESA_SHADER_FRAGMENT].unscaled_input_fragcoord =
builder->unscaled_input_fragcoord;
@@ -2309,20 +2324,27 @@ tu_emit_program_state(struct tu_cs *sub_cs,
tu6_emit_vpc<CHIP>(&prog_cs, vs, hs, ds, gs, fs);
prog->vpc_state = tu_cs_end_draw_state(sub_cs, &prog_cs);
- const struct ir3_shader_variant *last_shader;
- if (gs)
- last_shader = gs;
- else if (ds)
- last_shader = ds;
- else
- last_shader = vs;
+ const struct ir3_shader_variant *last_variant;
+ const struct tu_shader *last_shader;
+ if (gs) {
+ last_shader = shaders[MESA_SHADER_GEOMETRY];
+ last_variant = gs;
+ } else if (ds) {
+ last_shader = shaders[MESA_SHADER_TESS_EVAL];
+ last_variant = ds;
+ } else {
+ last_shader = shaders[MESA_SHADER_VERTEX];
+ last_variant = vs;
+ }
prog->per_view_viewport =
- !last_shader->writes_viewport &&
+ !last_variant->writes_viewport &&
shaders[MESA_SHADER_FRAGMENT]->fs.has_fdm &&
dev->physical_device->info->a6xx.has_per_view_viewport;
- prog->fake_single_viewport = prog->per_view_viewport;
- prog->writes_shading_rate = last_shader->writes_shading_rate;
+ prog->per_layer_viewport = last_shader->per_layer_viewport;
+ prog->fake_single_viewport = prog->per_view_viewport ||
+ prog->per_layer_viewport;
+ prog->writes_shading_rate = last_variant->writes_shading_rate;
prog->reads_shading_rate = fs->reads_shading_rate;
prog->accesses_smask = fs->reads_smask || fs->writes_smask;
}
@@ -2613,9 +2635,15 @@ fdm_apply_viewports(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
* same across all views, we can pick any view. However the number
* of viewports and number of views is not guaranteed the same, so we
* need to pick the 0'th view which always exists to be safe.
+ *
+ * If FDM per layer is enabled in the shader but disabled by the
+ * renderpass, views will be 1 and we also have to replicate the 0'th
+ * view to every view.
*/
- VkExtent2D frag_area = state->share_scale ? frag_areas[0] : frag_areas[i];
- VkRect2D bin = state->share_scale ? bins[0] : bins[i];
+ VkExtent2D frag_area =
+ (state->share_scale || views == 1) ? frag_areas[0] : frag_areas[i];
+ VkRect2D bin =
+ (state->share_scale || views == 1) ? bins[0] : bins[i];
/* Implement fake_single_viewport by replicating viewport 0 across all
* views.
*/
@@ -2655,16 +2683,20 @@ tu6_emit_viewport_fdm(struct tu_cs *cs, struct tu_cmd_buffer *cmd,
struct apply_viewport_state state = {
.vp = *vp,
.rs = *rs,
- .share_scale = !cmd->state.per_view_viewport,
+ .share_scale = !cmd->state.per_view_viewport &&
+ !cmd->state.per_layer_viewport,
.fake_single_viewport = cmd->state.fake_single_viewport,
};
- if (!state.share_scale)
+ if (cmd->state.per_view_viewport)
state.vp.viewport_count = num_views;
+ else if (cmd->state.per_layer_viewport)
+ state.vp.viewport_count = cmd->state.max_fdm_layers;
unsigned size = TU_CALLX(cmd->device, tu6_viewport_size)(cmd->device, &state.vp, &state.rs);
tu_cs_begin_sub_stream(&cmd->sub_cs, size, cs);
tu_create_fdm_bin_patchpoint(cmd, cs, size, TU_FDM_NONE,
fdm_apply_viewports, state);
- cmd->state.rp.shared_viewport |= !cmd->state.per_view_viewport;
+ cmd->state.rp.shared_viewport |= !cmd->state.per_view_viewport &&
+ !cmd->state.program.per_layer_viewport;
}
static const enum mesa_vk_dynamic_graphics_state tu_scissor_state[] = {
@@ -2723,8 +2755,10 @@ fdm_apply_scissors(struct tu_cmd_buffer *cmd, struct tu_cs *cs, void *data,
struct vk_viewport_state vp = state->vp;
for (unsigned i = 0; i < vp.scissor_count; i++) {
- VkExtent2D frag_area = state->share_scale ? frag_areas[0] : frag_areas[i];
- VkRect2D bin = state->share_scale ? bins[0] : bins[i];
+ VkExtent2D frag_area =
+ (state->share_scale || views == 1) ? frag_areas[0] : frag_areas[i];
+ VkRect2D bin =
+ (state->share_scale || views == 1) ? bins[0] : bins[i];
VkRect2D scissor =
state->fake_single_viewport ? state->vp.scissors[0] : state->vp.scissors[i];
@@ -2768,11 +2802,14 @@ tu6_emit_scissor_fdm(struct tu_cs *cs, struct tu_cmd_buffer *cmd,
unsigned num_views = MAX2(cmd->state.pass->num_views, 1);
struct apply_viewport_state state = {
.vp = *vp,
- .share_scale = !cmd->state.per_view_viewport,
+ .share_scale = !cmd->state.per_view_viewport &&
+ !cmd->state.per_layer_viewport,
.fake_single_viewport = cmd->state.fake_single_viewport,
};
- if (!state.share_scale)
+ if (cmd->state.per_view_viewport)
state.vp.scissor_count = num_views;
+ else if (cmd->state.per_layer_viewport)
+ state.vp.scissor_count = cmd->state.max_fdm_layers;
unsigned size = TU_CALLX(cmd->device, tu6_scissor_size)(cmd->device, &state.vp);
tu_cs_begin_sub_stream(&cmd->sub_cs, size, cs);
tu_create_fdm_bin_patchpoint(cmd, cs, size, TU_FDM_NONE, fdm_apply_scissors,
@@ -3692,7 +3729,8 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
* to set viewport and stencil state dynamically.
*/
bool no_per_view_viewport = pipeline_contains_all_shader_state(pipeline) &&
- !pipeline->program.per_view_viewport;
+ !pipeline->program.per_view_viewport &&
+ !pipeline->program.per_layer_viewport;
DRAW_STATE_COND(viewport, TU_DYNAMIC_STATE_VIEWPORT, no_per_view_viewport,
builder->graphics_state.vp,
builder->graphics_state.rs);
@@ -3912,7 +3950,7 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
(TU_CMD_DIRTY_FDM | \
TU_CMD_DIRTY_PER_VIEW_VIEWPORT))) && \
!(cmd->state.pipeline_draw_states & (1u << id))) { \
- if (cmd->state.has_fdm) { \
+ if (cmd->state.has_fdm || cmd->state.per_layer_viewport) { \
tu_cs_set_writeable(&cmd->sub_cs, true); \
tu6_emit_##name##_fdm(&cs, cmd, __VA_ARGS__); \
cmd->state.dynamic_state[id] = \
@@ -4501,6 +4539,11 @@ tu_pipeline_builder_init_graphics(
VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
}
+ if (pass->has_layered_fdm) {
+ rp_flags |=
+ VK_PIPELINE_CREATE_2_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE;
+ }
+
builder->unscaled_input_fragcoord = 0;
for (unsigned i = 0; i < subpass->input_count; i++) {
/* Input attachments stored in GMEM must be loaded with unscaled
@@ -4526,6 +4569,17 @@ tu_pipeline_builder_init_graphics(
builder->fragment_density_map = (builder->graphics_state.pipeline_flags &
VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT) ||
TU_DEBUG(FDM);
+ builder->fdm_per_layer = (builder->graphics_state.pipeline_flags &
+ VK_PIPELINE_CREATE_2_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE);
+ if (builder->fdm_per_layer) {
+ const VkPipelineFragmentDensityMapLayeredCreateInfoVALVE *fdm_layered_info =
+ vk_find_struct_const(create_info->pNext,
+ PIPELINE_FRAGMENT_DENSITY_MAP_LAYERED_CREATE_INFO_VALVE);
+ if (fdm_layered_info) {
+ builder->max_fdm_layers =
+ fdm_layered_info->maxFragmentDensityMapLayers;
+ }
+ }
}
}
diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h
index 53ec4723b3d..2c4573702e2 100644
--- a/src/freedreno/vulkan/tu_pipeline.h
+++ b/src/freedreno/vulkan/tu_pipeline.h
@@ -109,18 +109,46 @@ struct tu_program_state
unsigned dynamic_descriptor_offsets[MAX_SETS];
+ /* With FDM, we control the fragment area by overriding the viewport and
+ * scsissor. In order to have different areas for different views, we
+ * need to have a viewport/scissor per FDM layer. There are various
+ * possible scenarios based on the shader and whether multiview or
+ * per-layer sampling is enabled, that are communicated to the driver
+ * via the struct members below:
+ *
+ * - The shader writes gl_ViewportIndex, managing multiple viewports in
+ * a way that may not correspond to FDM layer:
+ * - Set everything to false. The driver will set shared_scale and
+ * apply the same scaling to all viewports/scissors.
+ * - Multiview is enabled:
+ * - Set per_view_viewport.
+ * - Set fake_single_viewport to splat viewport 0 to all viewports.
+ * - (Not implemented yet) if the user requests per-view
+ * viewports, don't set fake_single_viewport and let the user
+ * set multiple viewports that are transformed independently.
+ * - Multiview is not enabled and per-layer FDM sampling is enabled:
+ * - Inject code into shader and set per_layer_viewport.
+ * - Set fake_single_viewport to splat viewport 0 to all viewports.
+ */
+
/* Whether the per-view-viewport feature should be enabled in HW. This
* implicitly adds gl_ViewIndex to gl_ViewportIndex so that from a HW
* point of view (but not necessarily the user's point of view!) there
* is a viewport per view.
*/
bool per_view_viewport;
- /* If per_view_viewport is true and this is true, the app has provided
- * a single viewport and we need to fake it by duplicating the viewport
- * across views before transforming each viewport separately using FDM
- * state.
+ /* Whether gl_ViewportIndex has been set to gl_Layer, so that from a HW
+ * point of view (but not necessarily the user's point of view!) there
+ * is a viewport per view.
+ */
+ bool per_layer_viewport;
+ /* If per_view_viewport or per_layer_viewport is true and this is true,
+ * the app has provided a single viewport and we need to fake it by
+ * duplicating the viewport across views before transforming each
+ * viewport separately using FDM state.
*/
bool fake_single_viewport;
+
bool writes_shading_rate;
bool reads_shading_rate;
bool accesses_smask;
diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc
index f9af424f5f9..ace7a825f37 100644
--- a/src/freedreno/vulkan/tu_shader.cc
+++ b/src/freedreno/vulkan/tu_shader.cc
@@ -1012,6 +1012,7 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
struct lower_fdm_options {
unsigned num_views;
bool adjust_fragcoord;
+ bool use_layer;
};
static bool
@@ -1039,14 +1040,16 @@ lower_fdm_instr(struct nir_builder *b, nir_instr *instr, void *data)
nir_def *view;
if (options->num_views > 1) {
+ gl_varying_slot slot = options->use_layer ?
+ VARYING_SLOT_LAYER : VARYING_SLOT_VIEW_INDEX;
nir_variable *view_var =
nir_find_variable_with_location(b->shader, nir_var_shader_in,
- VARYING_SLOT_VIEW_INDEX);
+ slot);
if (view_var == NULL) {
view_var = nir_variable_create(b->shader, nir_var_shader_in,
glsl_int_type(), NULL);
- view_var->data.location = VARYING_SLOT_VIEW_INDEX;
+ view_var->data.location = slot;
view_var->data.interpolation = INTERP_MODE_FLAT;
view_var->data.driver_location = b->shader->num_inputs++;
}
@@ -1139,6 +1142,81 @@ tu_nir_lower_ssbo_descriptor(nir_shader *shader,
(void *)dev);
}
+struct lower_fdm_state {
+ nir_variable *layer_var;
+ nir_variable *viewport_var;
+};
+
+static bool
+lower_layered_fdm_instr(nir_builder *b, nir_intrinsic_instr *intrin,
+ void *cb)
+{
+ struct lower_fdm_state *state = (struct lower_fdm_state *)cb;
+ if (intrin->intrinsic != nir_intrinsic_store_deref)
+ return false;
+
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ if (!nir_deref_mode_is(deref, nir_var_shader_out))
+ return false;
+
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ if (var != state->layer_var)
+ return false;
+
+ /* Ok, we've finally got a store to gl_Layer. Mirror a store to
+ * gl_ViewportIndex.
+ */
+ if (!state->viewport_var) {
+ state->viewport_var =
+ nir_create_variable_with_location(b->shader,
+ nir_var_shader_out,
+ VARYING_SLOT_VIEWPORT,
+ glsl_int_type());
+ state->viewport_var->data.interpolation = INTERP_MODE_FLAT;
+ }
+
+ b->cursor = nir_after_instr(&intrin->instr);
+ nir_store_var(b, state->viewport_var, intrin->src[1].ssa, 0x1);
+ return true;
+}
+
+static bool
+tu_nir_lower_layered_fdm(nir_shader *shader,
+ bool *per_layer_viewport)
+{
+ nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
+
+ /* If viewport is alreay written, there's nothing to do and we will fall
+ * back.
+ */
+ if (shader->info.outputs_written & VARYING_BIT_VIEWPORT) {
+ *per_layer_viewport = false;
+ return nir_no_progress(entrypoint);
+ }
+
+ *per_layer_viewport = true;
+
+ struct lower_fdm_state state = {};
+
+ state.layer_var =
+ nir_find_variable_with_location(shader, nir_var_shader_out,
+ VARYING_SLOT_LAYER);
+
+ /* If layer is never written, it will get the default value of 0 and we can
+ * also leave the viewport with the default value of 0.
+ */
+ if (!state.layer_var)
+ return nir_no_progress(entrypoint);
+
+ state.viewport_var =
+ nir_find_variable_with_location(shader, nir_var_shader_out,
+ VARYING_SLOT_VIEWPORT);
+
+
+ return nir_shader_intrinsics_pass(shader, lower_layered_fdm_instr,
+ nir_metadata_control_flow, &state);
+}
+
static void
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
{
@@ -2451,6 +2529,7 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object,
sizeof(shader->dynamic_descriptor_sizes));
blob_write_uint32(blob, shader->view_mask);
blob_write_uint8(blob, shader->active_desc_sets);
+ blob_write_uint8(blob, shader->per_layer_viewport);
ir3_store_variant(blob, shader->variant);
@@ -2496,6 +2575,7 @@ tu_shader_deserialize(struct vk_pipeline_cache *cache,
sizeof(shader->dynamic_descriptor_sizes));
shader->view_mask = blob_read_uint32(blob);
shader->active_desc_sets = blob_read_uint8(blob);
+ shader->per_layer_viewport = blob_read_uint8(blob);
shader->variant = ir3_retrieve_variant(blob, dev->compiler, NULL);
@@ -2568,11 +2648,26 @@ tu_shader_create(struct tu_device *dev,
* lower input attachment coordinates except if unscaled.
*/
const struct lower_fdm_options fdm_options = {
- .num_views = MAX2(util_last_bit(key->multiview_mask), 1),
+ .num_views = MAX2(key->multiview_mask ?
+ util_last_bit(key->multiview_mask) :
+ key->max_fdm_layers, 1),
.adjust_fragcoord = key->fragment_density_map,
+ .use_layer = !key->multiview_mask,
};
NIR_PASS(_, nir, tu_nir_lower_fdm, &fdm_options);
+ if (nir->info.stage != MESA_SHADER_FRAGMENT &&
+ nir->info.stage != MESA_SHADER_COMPUTE &&
+ !key->multiview_mask &&
+ key->fdm_per_layer) {
+ NIR_PASS(_, nir, tu_nir_lower_layered_fdm, &shader->per_layer_viewport);
+ }
+
+ if (nir->info.stage == MESA_SHADER_FRAGMENT &&
+ key->fdm_per_layer) {
+ shader->fs.max_fdm_layers = key->max_fdm_layers;
+ }
+
/* Note that nir_opt_barrier_modes here breaks tests such as
* dEQP-VK.memory_model.message_passing.ext.u32.coherent.fence_atomic.atomicwrite.device.payload_local.image.guard_local.buffer.vert
*/
diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h
index 983796f79d7..6e9066a989d 100644
--- a/src/freedreno/vulkan/tu_shader.h
+++ b/src/freedreno/vulkan/tu_shader.h
@@ -82,6 +82,11 @@ struct tu_shader
*/
int dynamic_descriptor_sizes[MAX_SETS];
+ /* For all shader types other than FS, store whether the viewport was
+ * rewritten to equal the layer.
+ */
+ bool per_layer_viewport;
+
union {
struct {
unsigned patch_type;
@@ -99,6 +104,10 @@ struct tu_shader
uint32_t status;
bool force_late_z;
} lrz;
+
+ /* If per_layer_viewport is true, the maximum number of layers written to.
+ */
+ uint8_t max_fdm_layers;
} fs;
};
};
@@ -106,8 +115,10 @@ struct tu_shader
struct tu_shader_key {
unsigned multiview_mask;
uint16_t read_only_input_attachments;
+ uint8_t max_fdm_layers;
bool force_sample_interp;
bool fragment_density_map;
+ bool fdm_per_layer;
bool dynamic_renderpass;
uint8_t unscaled_input_fragcoord;
bool robust_storage_access2;