summaryrefslogtreecommitdiff
path: root/src/gallium/frontends/lavapipe/lvp_pipeline.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/frontends/lavapipe/lvp_pipeline.c')
-rw-r--r--src/gallium/frontends/lavapipe/lvp_pipeline.c1830
1 files changed, 1155 insertions, 675 deletions
diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c
index 012ac0bd048..0c63e57b3b7 100644
--- a/src/gallium/frontends/lavapipe/lvp_pipeline.c
+++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c
@@ -22,10 +22,15 @@
*/
#include "lvp_private.h"
+#include "vk_nir_convert_ycbcr.h"
+#include "vk_pipeline.h"
+#include "vk_render_pass.h"
#include "vk_util.h"
#include "glsl_types.h"
+#include "util/os_time.h"
#include "spirv/nir_spirv.h"
#include "nir/nir_builder.h"
+#include "nir/nir_serialize.h"
#include "lvp_lower_vulkan_resource.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
@@ -33,12 +38,75 @@
#define SPIR_V_MAGIC_NUMBER 0x07230203
-#define LVP_PIPELINE_DUP(dst, src, type, count) do { \
- type *temp = ralloc_array(mem_ctx, type, count); \
- if (!temp) return VK_ERROR_OUT_OF_HOST_MEMORY; \
- memcpy(temp, (src), sizeof(type) * count); \
- dst = temp; \
- } while(0)
+#define MAX_DYNAMIC_STATES 72
+
+typedef void (*cso_destroy_func)(struct pipe_context*, void*);
+
+static void
+shader_destroy(struct lvp_device *device, struct lvp_shader *shader, bool locked)
+{
+ if (!shader->pipeline_nir)
+ return;
+ gl_shader_stage stage = shader->pipeline_nir->nir->info.stage;
+ cso_destroy_func destroy[] = {
+ device->queue.ctx->delete_vs_state,
+ device->queue.ctx->delete_tcs_state,
+ device->queue.ctx->delete_tes_state,
+ device->queue.ctx->delete_gs_state,
+ device->queue.ctx->delete_fs_state,
+ device->queue.ctx->delete_compute_state,
+ device->queue.ctx->delete_ts_state,
+ device->queue.ctx->delete_ms_state,
+ };
+
+ if (!locked)
+ simple_mtx_lock(&device->queue.lock);
+
+ set_foreach(&shader->inlines.variants, entry) {
+ struct lvp_inline_variant *variant = (void*)entry->key;
+ destroy[stage](device->queue.ctx, variant->cso);
+ free(variant);
+ }
+ ralloc_free(shader->inlines.variants.table);
+
+ if (shader->shader_cso)
+ destroy[stage](device->queue.ctx, shader->shader_cso);
+ if (shader->tess_ccw_cso)
+ destroy[stage](device->queue.ctx, shader->tess_ccw_cso);
+
+ if (!locked)
+ simple_mtx_unlock(&device->queue.lock);
+
+ lvp_pipeline_nir_ref(&shader->pipeline_nir, NULL);
+ lvp_pipeline_nir_ref(&shader->tess_ccw, NULL);
+}
+
+void
+lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline, bool locked)
+{
+ lvp_forall_stage(i)
+ shader_destroy(device, &pipeline->shaders[i], locked);
+
+ if (pipeline->layout)
+ vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk);
+
+ for (unsigned i = 0; i < pipeline->num_groups; i++) {
+ LVP_FROM_HANDLE(lvp_pipeline, p, pipeline->groups[i]);
+ lvp_pipeline_destroy(device, p, locked);
+ }
+
+ if (pipeline->rt.stages) {
+ for (uint32_t i = 0; i < pipeline->rt.stage_count; i++)
+ lvp_pipeline_nir_ref(pipeline->rt.stages + i, NULL);
+ }
+
+ free(pipeline->rt.stages);
+ free(pipeline->rt.groups);
+
+ vk_free(&device->vk.alloc, pipeline->state_data);
+ vk_object_base_finish(&pipeline->base);
+ vk_free(&device->vk.alloc, pipeline);
+}
VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline(
VkDevice _device,
@@ -51,431 +119,187 @@ VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline(
if (!_pipeline)
return;
- if (pipeline->shader_cso[PIPE_SHADER_VERTEX])
- device->queue.ctx->delete_vs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_VERTEX]);
- if (pipeline->shader_cso[PIPE_SHADER_FRAGMENT])
- device->queue.ctx->delete_fs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_FRAGMENT]);
- if (pipeline->shader_cso[PIPE_SHADER_GEOMETRY])
- device->queue.ctx->delete_gs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_GEOMETRY]);
- if (pipeline->shader_cso[PIPE_SHADER_TESS_CTRL])
- device->queue.ctx->delete_tcs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]);
- if (pipeline->shader_cso[PIPE_SHADER_TESS_EVAL])
- device->queue.ctx->delete_tes_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]);
- if (pipeline->shader_cso[PIPE_SHADER_COMPUTE])
- device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]);
-
- ralloc_free(pipeline->mem_ctx);
- vk_object_base_finish(&pipeline->base);
- vk_free2(&device->vk.alloc, pAllocator, pipeline);
+ if (pipeline->used) {
+ simple_mtx_lock(&device->queue.lock);
+ util_dynarray_append(&device->queue.pipeline_destroys, struct lvp_pipeline*, pipeline);
+ simple_mtx_unlock(&device->queue.lock);
+ } else {
+ lvp_pipeline_destroy(device, pipeline, false);
+ }
}
-static VkResult
-deep_copy_shader_stage(void *mem_ctx,
- struct VkPipelineShaderStageCreateInfo *dst,
- const struct VkPipelineShaderStageCreateInfo *src)
+static void
+shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
{
- dst->sType = src->sType;
- dst->pNext = NULL;
- dst->flags = src->flags;
- dst->stage = src->stage;
- dst->module = src->module;
- dst->pName = src->pName;
- dst->pSpecializationInfo = NULL;
- if (src->pSpecializationInfo) {
- const VkSpecializationInfo *src_spec = src->pSpecializationInfo;
- VkSpecializationInfo *dst_spec = ralloc_size(mem_ctx, sizeof(VkSpecializationInfo) +
- src_spec->mapEntryCount * sizeof(VkSpecializationMapEntry) +
- src_spec->dataSize);
- VkSpecializationMapEntry *maps = (VkSpecializationMapEntry *)(dst_spec + 1);
- dst_spec->pMapEntries = maps;
- void *pdata = (void *)(dst_spec->pMapEntries + src_spec->mapEntryCount);
- dst_spec->pData = pdata;
-
-
- dst_spec->mapEntryCount = src_spec->mapEntryCount;
- dst_spec->dataSize = src_spec->dataSize;
- memcpy(pdata, src_spec->pData, src->pSpecializationInfo->dataSize);
- memcpy(maps, src_spec->pMapEntries, src_spec->mapEntryCount * sizeof(VkSpecializationMapEntry));
- dst->pSpecializationInfo = dst_spec;
- }
- return VK_SUCCESS;
+ assert(glsl_type_is_vector_or_scalar(type));
+
+ uint32_t comp_size = glsl_type_is_boolean(type)
+ ? 4 : glsl_get_bit_size(type) / 8;
+ unsigned length = glsl_get_vector_elements(type);
+ *size = comp_size * length,
+ *align = comp_size;
}
-static VkResult
-deep_copy_vertex_input_state(void *mem_ctx,
- struct VkPipelineVertexInputStateCreateInfo *dst,
- const struct VkPipelineVertexInputStateCreateInfo *src)
+static bool
+remove_barriers_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
- dst->sType = src->sType;
- dst->pNext = NULL;
- dst->flags = src->flags;
- dst->vertexBindingDescriptionCount = src->vertexBindingDescriptionCount;
-
- LVP_PIPELINE_DUP(dst->pVertexBindingDescriptions,
- src->pVertexBindingDescriptions,
- VkVertexInputBindingDescription,
- src->vertexBindingDescriptionCount);
-
- dst->vertexAttributeDescriptionCount = src->vertexAttributeDescriptionCount;
-
- LVP_PIPELINE_DUP(dst->pVertexAttributeDescriptions,
- src->pVertexAttributeDescriptions,
- VkVertexInputAttributeDescription,
- src->vertexAttributeDescriptionCount);
-
- if (src->pNext) {
- vk_foreach_struct(ext, src->pNext) {
- switch (ext->sType) {
- case VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT: {
- VkPipelineVertexInputDivisorStateCreateInfoEXT *ext_src = (VkPipelineVertexInputDivisorStateCreateInfoEXT *)ext;
- VkPipelineVertexInputDivisorStateCreateInfoEXT *ext_dst = ralloc(mem_ctx, VkPipelineVertexInputDivisorStateCreateInfoEXT);
-
- ext_dst->sType = ext_src->sType;
- ext_dst->vertexBindingDivisorCount = ext_src->vertexBindingDivisorCount;
-
- LVP_PIPELINE_DUP(ext_dst->pVertexBindingDivisors,
- ext_src->pVertexBindingDivisors,
- VkVertexInputBindingDivisorDescriptionEXT,
- ext_src->vertexBindingDivisorCount);
-
- dst->pNext = ext_dst;
- break;
- }
- default:
- break;
- }
- }
+ if (intr->intrinsic != nir_intrinsic_barrier)
+ return false;
+ if (data) {
+ if (nir_intrinsic_execution_scope(intr) != SCOPE_NONE)
+ return false;
+
+ if (nir_intrinsic_memory_scope(intr) == SCOPE_WORKGROUP ||
+ nir_intrinsic_memory_scope(intr) == SCOPE_DEVICE ||
+ nir_intrinsic_memory_scope(intr) == SCOPE_QUEUE_FAMILY)
+ return false;
}
- return VK_SUCCESS;
+ nir_instr_remove(&intr->instr);
+ return true;
}
static bool
-dynamic_state_contains(const VkPipelineDynamicStateCreateInfo *src, VkDynamicState state)
+remove_barriers(nir_shader *nir, bool is_compute)
{
- if (!src)
- return false;
-
- for (unsigned i = 0; i < src->dynamicStateCount; i++)
- if (src->pDynamicStates[i] == state)
- return true;
- return false;
+ return nir_shader_intrinsics_pass(nir, remove_barriers_impl,
+ nir_metadata_dominance,
+ (void*)is_compute);
}
-static VkResult
-deep_copy_viewport_state(void *mem_ctx,
- const VkPipelineDynamicStateCreateInfo *dyn_state,
- VkPipelineViewportStateCreateInfo *dst,
- const VkPipelineViewportStateCreateInfo *src)
+static bool
+lower_demote_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
- dst->sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
- dst->pNext = NULL;
- dst->pViewports = NULL;
- dst->pScissors = NULL;
-
- if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_VIEWPORT) &&
- !dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT)) {
- LVP_PIPELINE_DUP(dst->pViewports,
- src->pViewports,
- VkViewport,
- src->viewportCount);
+ if (intr->intrinsic == nir_intrinsic_demote || intr->intrinsic == nir_intrinsic_terminate) {
+ intr->intrinsic = nir_intrinsic_discard;
+ return true;
}
- if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT))
- dst->viewportCount = src->viewportCount;
- else
- dst->viewportCount = 0;
-
- if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_SCISSOR) &&
- !dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT)) {
- if (src->pScissors)
- LVP_PIPELINE_DUP(dst->pScissors,
- src->pScissors,
- VkRect2D,
- src->scissorCount);
+ if (intr->intrinsic == nir_intrinsic_demote_if || intr->intrinsic == nir_intrinsic_terminate_if) {
+ intr->intrinsic = nir_intrinsic_discard_if;
+ return true;
}
- if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT))
- dst->scissorCount = src->scissorCount;
- else
- dst->scissorCount = 0;
+ return false;
+}
- return VK_SUCCESS;
+static bool
+lower_demote(nir_shader *nir)
+{
+ return nir_shader_intrinsics_pass(nir, lower_demote_impl,
+ nir_metadata_dominance, NULL);
}
-static VkResult
-deep_copy_color_blend_state(void *mem_ctx,
- VkPipelineColorBlendStateCreateInfo *dst,
- const VkPipelineColorBlendStateCreateInfo *src)
+static bool
+find_tex(const nir_instr *instr, const void *data_cb)
{
- dst->sType = src->sType;
- dst->pNext = NULL;
- dst->flags = src->flags;
- dst->logicOpEnable = src->logicOpEnable;
- dst->logicOp = src->logicOp;
+ if (instr->type == nir_instr_type_tex)
+ return true;
+ return false;
+}
- LVP_PIPELINE_DUP(dst->pAttachments,
- src->pAttachments,
- VkPipelineColorBlendAttachmentState,
- src->attachmentCount);
- dst->attachmentCount = src->attachmentCount;
+static nir_def *
+fixup_tex_instr(struct nir_builder *b, nir_instr *instr, void *data_cb)
+{
+ nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+ unsigned offset = 0;
- memcpy(&dst->blendConstants, &src->blendConstants, sizeof(float) * 4);
+ int idx = nir_tex_instr_src_index(tex_instr, nir_tex_src_texture_offset);
+ if (idx == -1)
+ return NULL;
- return VK_SUCCESS;
-}
+ if (!nir_src_is_const(tex_instr->src[idx].src))
+ return NULL;
+ offset = nir_src_comp_as_uint(tex_instr->src[idx].src, 0);
-static VkResult
-deep_copy_dynamic_state(void *mem_ctx,
- VkPipelineDynamicStateCreateInfo *dst,
- const VkPipelineDynamicStateCreateInfo *src)
-{
- dst->sType = src->sType;
- dst->pNext = NULL;
- dst->flags = src->flags;
-
- LVP_PIPELINE_DUP(dst->pDynamicStates,
- src->pDynamicStates,
- VkDynamicState,
- src->dynamicStateCount);
- dst->dynamicStateCount = src->dynamicStateCount;
- return VK_SUCCESS;
+ nir_tex_instr_remove_src(tex_instr, idx);
+ tex_instr->texture_index += offset;
+ return NIR_LOWER_INSTR_PROGRESS;
}
-
-static VkResult
-deep_copy_rasterization_state(void *mem_ctx,
- VkPipelineRasterizationStateCreateInfo *dst,
- const VkPipelineRasterizationStateCreateInfo *src)
+static bool
+lvp_nir_fixup_indirect_tex(nir_shader *shader)
{
- memcpy(dst, src, sizeof(VkPipelineRasterizationStateCreateInfo));
- dst->pNext = NULL;
-
- if (src->pNext) {
- vk_foreach_struct(ext, src->pNext) {
- switch (ext->sType) {
- case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT: {
- VkPipelineRasterizationDepthClipStateCreateInfoEXT *ext_src = (VkPipelineRasterizationDepthClipStateCreateInfoEXT *)ext;
- VkPipelineRasterizationDepthClipStateCreateInfoEXT *ext_dst = ralloc(mem_ctx, VkPipelineRasterizationDepthClipStateCreateInfoEXT);
- ext_dst->sType = ext_src->sType;
- ext_dst->flags = ext_src->flags;
- ext_dst->depthClipEnable = ext_src->depthClipEnable;
- dst->pNext = ext_dst;
- break;
- }
- default:
- break;
- }
- }
- }
- return VK_SUCCESS;
+ return nir_shader_lower_instructions(shader, find_tex, fixup_tex_instr, NULL);
}
-static VkResult
-deep_copy_graphics_create_info(void *mem_ctx,
- VkGraphicsPipelineCreateInfo *dst,
- const VkGraphicsPipelineCreateInfo *src)
+static void
+optimize(nir_shader *nir)
{
- int i;
- VkResult result;
- VkPipelineShaderStageCreateInfo *stages;
- VkPipelineVertexInputStateCreateInfo *vertex_input;
- VkPipelineRasterizationStateCreateInfo *rasterization_state;
- LVP_FROM_HANDLE(lvp_render_pass, pass, src->renderPass);
-
- dst->sType = src->sType;
- dst->pNext = NULL;
- dst->flags = src->flags;
- dst->layout = src->layout;
- dst->renderPass = src->renderPass;
- dst->subpass = src->subpass;
- dst->basePipelineHandle = src->basePipelineHandle;
- dst->basePipelineIndex = src->basePipelineIndex;
-
- /* pStages */
- VkShaderStageFlags stages_present = 0;
- dst->stageCount = src->stageCount;
- stages = ralloc_array(mem_ctx, VkPipelineShaderStageCreateInfo, dst->stageCount);
- for (i = 0 ; i < dst->stageCount; i++) {
- result = deep_copy_shader_stage(mem_ctx, &stages[i], &src->pStages[i]);
- if (result != VK_SUCCESS)
- return result;
- stages_present |= src->pStages[i].stage;
- }
- dst->pStages = stages;
+ bool progress = false;
+ do {
+ progress = false;
- /* pVertexInputState */
- if (!dynamic_state_contains(src->pDynamicState, VK_DYNAMIC_STATE_VERTEX_INPUT_EXT)) {
- vertex_input = ralloc(mem_ctx, VkPipelineVertexInputStateCreateInfo);
- result = deep_copy_vertex_input_state(mem_ctx, vertex_input,
- src->pVertexInputState);
- if (result != VK_SUCCESS)
- return result;
- dst->pVertexInputState = vertex_input;
- } else
- dst->pVertexInputState = NULL;
-
- /* pInputAssemblyState */
- LVP_PIPELINE_DUP(dst->pInputAssemblyState,
- src->pInputAssemblyState,
- VkPipelineInputAssemblyStateCreateInfo,
- 1);
-
- /* pTessellationState */
- if (src->pTessellationState &&
- (stages_present & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) ==
- (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) {
- LVP_PIPELINE_DUP(dst->pTessellationState,
- src->pTessellationState,
- VkPipelineTessellationStateCreateInfo,
- 1);
- }
+ NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true);
+ NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
+ NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
+ NIR_PASS(progress, nir, nir_opt_deref);
+ NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
- /* pViewportState */
- bool rasterization_disabled = !dynamic_state_contains(src->pDynamicState, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT) &&
- src->pRasterizationState->rasterizerDiscardEnable;
- if (src->pViewportState && !rasterization_disabled) {
- VkPipelineViewportStateCreateInfo *viewport_state;
- viewport_state = ralloc(mem_ctx, VkPipelineViewportStateCreateInfo);
- if (!viewport_state)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- deep_copy_viewport_state(mem_ctx, src->pDynamicState,
- viewport_state, src->pViewportState);
- dst->pViewportState = viewport_state;
- } else
- dst->pViewportState = NULL;
-
- /* pRasterizationState */
- rasterization_state = ralloc(mem_ctx, VkPipelineRasterizationStateCreateInfo);
- if (!rasterization_state)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- deep_copy_rasterization_state(mem_ctx, rasterization_state, src->pRasterizationState);
- dst->pRasterizationState = rasterization_state;
-
- /* pMultisampleState */
- if (src->pMultisampleState && !rasterization_disabled) {
- VkPipelineMultisampleStateCreateInfo* ms_state;
- ms_state = ralloc_size(mem_ctx, sizeof(VkPipelineMultisampleStateCreateInfo) + sizeof(VkSampleMask));
- if (!ms_state)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- /* does samplemask need deep copy? */
- memcpy(ms_state, src->pMultisampleState, sizeof(VkPipelineMultisampleStateCreateInfo));
- if (src->pMultisampleState->pSampleMask) {
- VkSampleMask *sample_mask = (VkSampleMask *)(ms_state + 1);
- sample_mask[0] = src->pMultisampleState->pSampleMask[0];
- ms_state->pSampleMask = sample_mask;
- }
- dst->pMultisampleState = ms_state;
- } else
- dst->pMultisampleState = NULL;
-
- /* pDepthStencilState */
- if (src->pDepthStencilState && !rasterization_disabled && pass->has_zs_attachment) {
- LVP_PIPELINE_DUP(dst->pDepthStencilState,
- src->pDepthStencilState,
- VkPipelineDepthStencilStateCreateInfo,
- 1);
- } else
- dst->pDepthStencilState = NULL;
-
- /* pColorBlendState */
- if (src->pColorBlendState && !rasterization_disabled && pass->has_color_attachment) {
- VkPipelineColorBlendStateCreateInfo* cb_state;
-
- cb_state = ralloc(mem_ctx, VkPipelineColorBlendStateCreateInfo);
- if (!cb_state)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- deep_copy_color_blend_state(mem_ctx, cb_state, src->pColorBlendState);
- dst->pColorBlendState = cb_state;
- } else
- dst->pColorBlendState = NULL;
-
- if (src->pDynamicState) {
- VkPipelineDynamicStateCreateInfo* dyn_state;
-
- /* pDynamicState */
- dyn_state = ralloc(mem_ctx, VkPipelineDynamicStateCreateInfo);
- if (!dyn_state)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- deep_copy_dynamic_state(mem_ctx, dyn_state, src->pDynamicState);
- dst->pDynamicState = dyn_state;
- } else
- dst->pDynamicState = NULL;
+ NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
- return VK_SUCCESS;
-}
+ NIR_PASS(progress, nir, nir_copy_prop);
+ NIR_PASS(progress, nir, nir_opt_dce);
+ NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
-static VkResult
-deep_copy_compute_create_info(void *mem_ctx,
- VkComputePipelineCreateInfo *dst,
- const VkComputePipelineCreateInfo *src)
-{
- VkResult result;
- dst->sType = src->sType;
- dst->pNext = NULL;
- dst->flags = src->flags;
- dst->layout = src->layout;
- dst->basePipelineHandle = src->basePipelineHandle;
- dst->basePipelineIndex = src->basePipelineIndex;
-
- result = deep_copy_shader_stage(mem_ctx, &dst->stage, &src->stage);
- if (result != VK_SUCCESS)
- return result;
- return VK_SUCCESS;
-}
+ NIR_PASS(progress, nir, nir_opt_algebraic);
+ NIR_PASS(progress, nir, nir_opt_constant_folding);
-static inline unsigned
-st_shader_stage_to_ptarget(gl_shader_stage stage)
-{
- switch (stage) {
- case MESA_SHADER_VERTEX:
- return PIPE_SHADER_VERTEX;
- case MESA_SHADER_FRAGMENT:
- return PIPE_SHADER_FRAGMENT;
- case MESA_SHADER_GEOMETRY:
- return PIPE_SHADER_GEOMETRY;
- case MESA_SHADER_TESS_CTRL:
- return PIPE_SHADER_TESS_CTRL;
- case MESA_SHADER_TESS_EVAL:
- return PIPE_SHADER_TESS_EVAL;
- case MESA_SHADER_COMPUTE:
- return PIPE_SHADER_COMPUTE;
- default:
- break;
- }
+ NIR_PASS(progress, nir, nir_opt_remove_phis);
+ bool loop = false;
+ NIR_PASS(loop, nir, nir_opt_loop);
+ progress |= loop;
+ if (loop) {
+ /* If nir_opt_loop makes progress, then we need to clean
+ * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
+ * to make progress.
+ */
+ NIR_PASS(progress, nir, nir_copy_prop);
+ NIR_PASS(progress, nir, nir_opt_dce);
+ NIR_PASS(progress, nir, nir_opt_remove_phis);
+ }
+ NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
+ NIR_PASS(progress, nir, nir_opt_dead_cf);
+ NIR_PASS(progress, nir, nir_opt_conditional_discard);
+ NIR_PASS(progress, nir, nir_opt_remove_phis);
+ NIR_PASS(progress, nir, nir_opt_cse);
+ NIR_PASS(progress, nir, nir_opt_undef);
- assert(!"should not be reached");
- return PIPE_SHADER_VERTEX;
+ NIR_PASS(progress, nir, nir_opt_deref);
+ NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
+ NIR_PASS(progress, nir, nir_opt_loop_unroll);
+ NIR_PASS(progress, nir, lvp_nir_fixup_indirect_tex);
+ } while (progress);
}
-static void
-shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+void
+lvp_shader_optimize(nir_shader *nir)
{
- assert(glsl_type_is_vector_or_scalar(type));
+ optimize(nir);
+ NIR_PASS_V(nir, nir_lower_var_copies);
+ NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
+ NIR_PASS_V(nir, nir_opt_dce);
+ nir_sweep(nir);
+}
- uint32_t comp_size = glsl_type_is_boolean(type)
- ? 4 : glsl_get_bit_size(type) / 8;
- unsigned length = glsl_get_vector_elements(type);
- *size = comp_size * length,
- *align = comp_size;
+struct lvp_pipeline_nir *
+lvp_create_pipeline_nir(nir_shader *nir)
+{
+ struct lvp_pipeline_nir *pipeline_nir = ralloc(NULL, struct lvp_pipeline_nir);
+ pipeline_nir->nir = nir;
+ pipeline_nir->ref_cnt = 1;
+ return pipeline_nir;
}
-static void
-lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
- struct vk_shader_module *module,
- const char *entrypoint_name,
- gl_shader_stage stage,
- const VkSpecializationInfo *spec_info)
+static VkResult
+compile_spirv(struct lvp_device *pdevice, const VkPipelineShaderStageCreateInfo *sinfo, nir_shader **nir)
{
- nir_shader *nir;
- const nir_shader_compiler_options *drv_options = pipeline->device->pscreen->get_compiler_options(pipeline->device->pscreen, PIPE_SHADER_IR_NIR, st_shader_stage_to_ptarget(stage));
- bool progress;
- uint32_t *spirv = (uint32_t *) module->data;
- assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
- assert(module->size % 4 == 0);
+ gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
+ assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
+ VkResult result;
- uint32_t num_spec_entries = 0;
- struct nir_spirv_specialization *spec_entries =
- vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries);
+#ifdef VK_ENABLE_BETA_EXTENSIONS
+ const VkPipelineShaderStageNodeCreateInfoAMDX *node_info = vk_find_struct_const(
+ sinfo->pNext, PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX);
+#endif
- struct lvp_device *pdevice = pipeline->device;
const struct spirv_to_nir_options spirv_options = {
.environment = NIR_SPIRV_VULKAN,
.caps = {
@@ -483,6 +307,11 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
.int16 = true,
.int64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_INT64) == 1),
.tessellation = true,
+ .float_controls = true,
+ .float32_atomic_add = true,
+#if LLVM_VERSION_MAJOR >= 15
+ .float32_atomic_min_max = true,
+#endif
.image_ms_array = true,
.image_read_without_format = true,
.image_write_without_format = true,
@@ -497,6 +326,7 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
.device_group = true,
.draw_parameters = true,
.shader_viewport_index_layer = true,
+ .shader_clock = true,
.multiview = true,
.physical_storage_buffer_address = true,
.int64_atomics = true,
@@ -504,59 +334,103 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
.subgroup_basic = true,
.subgroup_ballot = true,
.subgroup_quad = true,
+#if LLVM_VERSION_MAJOR >= 10
+ .subgroup_shuffle = true,
+#endif
.subgroup_vote = true,
+ .vk_memory_model = true,
+ .vk_memory_model_device_scope = true,
.int8 = true,
.float16 = true,
+ .demote_to_helper_invocation = true,
+ .mesh_shading = true,
+ .descriptor_array_dynamic_indexing = true,
+ .descriptor_array_non_uniform_indexing = true,
+ .descriptor_indexing = true,
+ .runtime_descriptor_array = true,
+ .shader_enqueue = true,
+ .ray_query = true,
+ .ray_cull_mask = true,
+ .ray_tracing = true,
+ .ray_tracing_position_fetch = true,
},
- .ubo_addr_format = nir_address_format_32bit_index_offset,
- .ssbo_addr_format = nir_address_format_32bit_index_offset,
+ .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
+ .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
.phys_ssbo_addr_format = nir_address_format_64bit_global,
.push_const_addr_format = nir_address_format_logical,
.shared_addr_format = nir_address_format_32bit_offset,
- .frag_coord_is_sysval = false,
+ .constant_addr_format = nir_address_format_64bit_global,
+#ifdef VK_ENABLE_BETA_EXTENSIONS
+ .shader_index = node_info ? node_info->index : 0,
+#endif
};
- nir = spirv_to_nir(spirv, module->size / 4,
- spec_entries, num_spec_entries,
- stage, entrypoint_name, &spirv_options, drv_options);
+ result = vk_pipeline_shader_stage_to_nir(&pdevice->vk, sinfo,
+ &spirv_options, pdevice->physical_device->drv_options[stage],
+ NULL, nir);
+ return result;
+}
- if (!nir) {
- free(spec_entries);
- return;
+static bool
+inline_variant_equals(const void *a, const void *b)
+{
+ const struct lvp_inline_variant *av = a, *bv = b;
+ assert(av->mask == bv->mask);
+ u_foreach_bit(slot, av->mask) {
+ if (memcmp(av->vals[slot], bv->vals[slot], sizeof(av->vals[slot])))
+ return false;
}
- nir_validate_shader(nir, NULL);
+ return true;
+}
- free(spec_entries);
+static const struct vk_ycbcr_conversion_state *
+lvp_ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32_t array_index)
+{
+ const struct lvp_pipeline_layout *layout = data;
- NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
- NIR_PASS_V(nir, nir_lower_returns);
- NIR_PASS_V(nir, nir_inline_functions);
- NIR_PASS_V(nir, nir_copy_prop);
- NIR_PASS_V(nir, nir_opt_deref);
+ const struct lvp_descriptor_set_layout *set_layout = container_of(layout->vk.set_layouts[set], struct lvp_descriptor_set_layout, vk);
+ const struct lvp_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding];
+ if (!binding_layout->immutable_samplers)
+ return NULL;
- /* Pick off the single entrypoint that we want */
- foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
- if (!func->is_entrypoint)
- exec_node_remove(&func->node);
- }
- assert(exec_list_length(&nir->functions) == 1);
+ struct vk_ycbcr_conversion *ycbcr_conversion = binding_layout->immutable_samplers[array_index]->vk.ycbcr_conversion;
+ return ycbcr_conversion ? &ycbcr_conversion->state : NULL;
+}
- NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
- NIR_PASS_V(nir, nir_split_var_copies);
- NIR_PASS_V(nir, nir_split_per_member_structs);
+/* pipeline is NULL for shader objects. */
+static void
+lvp_shader_lower(struct lvp_device *pdevice, struct lvp_pipeline *pipeline, nir_shader *nir, struct lvp_pipeline_layout *layout)
+{
+ if (nir->info.stage != MESA_SHADER_TESS_CTRL)
+ NIR_PASS_V(nir, remove_barriers, nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_MESH || nir->info.stage == MESA_SHADER_TASK);
- NIR_PASS_V(nir, nir_remove_dead_variables,
- nir_var_shader_in | nir_var_shader_out | nir_var_system_value, NULL);
+ const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
+ .frag_coord = true,
+ .point_coord = true,
+ };
+ NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
+
+ struct nir_lower_subgroups_options subgroup_opts = {0};
+ subgroup_opts.lower_quad = true;
+ subgroup_opts.ballot_components = 1;
+ subgroup_opts.ballot_bit_size = 32;
+ subgroup_opts.lower_inverse_ballot = true;
+ NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts);
- if (stage == MESA_SHADER_FRAGMENT)
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
lvp_lower_input_attachments(nir, false);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
+ NIR_PASS_V(nir, nir_lower_is_helper_invocation);
+ NIR_PASS_V(nir, lower_demote);
- NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
- NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, NULL);
+ const struct nir_lower_compute_system_values_options compute_system_values = {0};
+ NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_system_values);
- lvp_lower_pipeline_layout(pipeline->device, pipeline->layout, nir);
+ NIR_PASS_V(nir, nir_remove_dead_variables,
+ nir_var_uniform | nir_var_image, NULL);
+
+ optimize(nir);
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
NIR_PASS_V(nir, nir_split_var_copies);
@@ -567,17 +441,39 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_mem_ubo | nir_var_mem_ssbo,
- nir_address_format_32bit_index_offset);
+ nir_address_format_vec2_index_32bit_offset);
NIR_PASS_V(nir, nir_lower_explicit_io,
- nir_var_mem_global,
+ nir_var_mem_global | nir_var_mem_constant,
nir_address_format_64bit_global);
- if (nir->info.stage == MESA_SHADER_COMPUTE) {
+ if (nir->info.stage == MESA_SHADER_COMPUTE)
+ lvp_lower_exec_graph(pipeline, nir);
+
+ NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lvp_ycbcr_conversion_lookup, layout);
+
+ nir_lower_non_uniform_access_options options = {
+ .types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access,
+ };
+ NIR_PASS(_, nir, nir_lower_non_uniform_access, &options);
+
+ lvp_lower_pipeline_layout(pdevice, layout, nir);
+
+ NIR_PASS(_, nir, lvp_nir_lower_ray_queries);
+
+ if (nir->info.stage == MESA_SHADER_COMPUTE ||
+ nir->info.stage == MESA_SHADER_TASK ||
+ nir->info.stage == MESA_SHADER_MESH) {
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
}
+ if (nir->info.stage == MESA_SHADER_TASK ||
+ nir->info.stage == MESA_SHADER_MESH) {
+ NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_task_payload, shared_var_info);
+ NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_task_payload, nir_address_format_32bit_offset);
+ }
+
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
if (nir->info.stage == MESA_SHADER_VERTEX ||
@@ -587,52 +483,23 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
}
- do {
- progress = false;
-
- NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true);
- NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
- NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
- NIR_PASS(progress, nir, nir_opt_deref);
- NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
-
- NIR_PASS(progress, nir, nir_copy_prop);
- NIR_PASS(progress, nir, nir_opt_dce);
- NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
-
- NIR_PASS(progress, nir, nir_opt_algebraic);
- NIR_PASS(progress, nir, nir_opt_constant_folding);
-
- NIR_PASS(progress, nir, nir_opt_remove_phis);
- bool trivial_continues = false;
- NIR_PASS(trivial_continues, nir, nir_opt_trivial_continues);
- progress |= trivial_continues;
- if (trivial_continues) {
- /* If nir_opt_trivial_continues makes progress, then we need to clean
- * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
- * to make progress.
- */
- NIR_PASS(progress, nir, nir_copy_prop);
- NIR_PASS(progress, nir, nir_opt_dce);
- NIR_PASS(progress, nir, nir_opt_remove_phis);
- }
- NIR_PASS(progress, nir, nir_opt_if, true);
- NIR_PASS(progress, nir, nir_opt_dead_cf);
- NIR_PASS(progress, nir, nir_opt_conditional_discard);
- NIR_PASS(progress, nir, nir_opt_remove_phis);
- NIR_PASS(progress, nir, nir_opt_cse);
- NIR_PASS(progress, nir, nir_opt_undef);
-
- NIR_PASS(progress, nir, nir_opt_deref);
- NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
- } while (progress);
+ // TODO: also optimize the tex srcs. see radeonSI for reference */
+ /* Skip if there are potentially conflicting rounding modes */
+ struct nir_opt_16bit_tex_image_options opt_16bit_options = {
+ .rounding_mode = nir_rounding_mode_undef,
+ .opt_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
+ };
+ NIR_PASS_V(nir, nir_opt_16bit_tex_image, &opt_16bit_options);
- NIR_PASS_V(nir, nir_lower_var_copies);
- NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
- NIR_PASS_V(nir, nir_opt_dce);
- nir_sweep(nir);
+ /* Lower texture OPs llvmpipe supports to reduce the amount of sample
+ * functions that need to be pre-compiled.
+ */
+ const nir_lower_tex_options tex_options = {
+ .lower_txd = true,
+ };
+ NIR_PASS(_, nir, nir_lower_tex, &tex_options);
- nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+ lvp_shader_optimize(nir);
if (nir->info.stage != MESA_SHADER_VERTEX)
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
@@ -644,13 +511,43 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
}
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
nir->info.stage);
- pipeline->pipeline_nir[stage] = nir;
}
-static void fill_shader_prog(struct pipe_shader_state *state, gl_shader_stage stage, struct lvp_pipeline *pipeline)
+VkResult
+lvp_spirv_to_nir(struct lvp_pipeline *pipeline, const VkPipelineShaderStageCreateInfo *sinfo,
+ nir_shader **out_nir)
+{
+ VkResult result = compile_spirv(pipeline->device, sinfo, out_nir);
+ if (result == VK_SUCCESS)
+ lvp_shader_lower(pipeline->device, pipeline, *out_nir, pipeline->layout);
+
+ return result;
+}
+
+void
+lvp_shader_init(struct lvp_shader *shader, nir_shader *nir)
+{
+ nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ if (impl->ssa_alloc > 100) //skip for small shaders
+ shader->inlines.must_inline = lvp_find_inlinable_uniforms(shader, nir);
+ shader->pipeline_nir = lvp_create_pipeline_nir(nir);
+ if (shader->inlines.can_inline)
+ _mesa_set_init(&shader->inlines.variants, NULL, NULL, inline_variant_equals);
+}
+
+static VkResult
+lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
+ const VkPipelineShaderStageCreateInfo *sinfo)
{
- state->type = PIPE_SHADER_IR_NIR;
- state->ir.nir = pipeline->pipeline_nir[stage];
+ gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
+ assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
+ nir_shader *nir;
+ VkResult result = lvp_spirv_to_nir(pipeline, sinfo, &nir);
+ if (result == VK_SUCCESS) {
+ struct lvp_shader *shader = &pipeline->shaders[stage];
+ lvp_shader_init(shader, nir);
+ }
+ return result;
}
static void
@@ -684,108 +581,228 @@ merge_tess_info(struct shader_info *tes_info,
tcs_info->tess.spacing == tes_info->tess.spacing);
tes_info->tess.spacing |= tcs_info->tess.spacing;
- assert(tcs_info->tess.primitive_mode == 0 ||
- tes_info->tess.primitive_mode == 0 ||
- tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
- tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
+ assert(tcs_info->tess._primitive_mode == 0 ||
+ tes_info->tess._primitive_mode == 0 ||
+ tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
+ tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
tes_info->tess.ccw |= tcs_info->tess.ccw;
tes_info->tess.point_mode |= tcs_info->tess.point_mode;
}
-static gl_shader_stage
-lvp_shader_stage(VkShaderStageFlagBits stage)
+static void
+lvp_shader_xfb_init(struct lvp_shader *shader)
{
- switch (stage) {
- case VK_SHADER_STAGE_VERTEX_BIT:
- return MESA_SHADER_VERTEX;
- case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
- return MESA_SHADER_TESS_CTRL;
- case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
- return MESA_SHADER_TESS_EVAL;
- case VK_SHADER_STAGE_GEOMETRY_BIT:
- return MESA_SHADER_GEOMETRY;
- case VK_SHADER_STAGE_FRAGMENT_BIT:
- return MESA_SHADER_FRAGMENT;
- case VK_SHADER_STAGE_COMPUTE_BIT:
- return MESA_SHADER_COMPUTE;
- default:
- unreachable("invalid VkShaderStageFlagBits");
- return MESA_SHADER_NONE;
+ nir_xfb_info *xfb_info = shader->pipeline_nir->nir->xfb_info;
+ if (xfb_info) {
+ uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
+ memset(output_mapping, 0, sizeof(output_mapping));
+
+ nir_foreach_shader_out_variable(var, shader->pipeline_nir->nir) {
+ unsigned slots = nir_variable_count_slots(var, var->type);
+ for (unsigned i = 0; i < slots; i++)
+ output_mapping[var->data.location + i] = var->data.driver_location + i;
+ }
+
+ shader->stream_output.num_outputs = xfb_info->output_count;
+ for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
+ if (xfb_info->buffers_written & (1 << i)) {
+ shader->stream_output.stride[i] = xfb_info->buffers[i].stride / 4;
+ }
+ }
+ for (unsigned i = 0; i < xfb_info->output_count; i++) {
+ shader->stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer;
+ shader->stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4;
+ shader->stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location];
+ shader->stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask);
+ shader->stream_output.output[i].start_component = xfb_info->outputs[i].component_offset;
+ shader->stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer];
+ }
+
}
}
-static VkResult
-lvp_pipeline_compile(struct lvp_pipeline *pipeline,
- gl_shader_stage stage)
+static void
+lvp_pipeline_xfb_init(struct lvp_pipeline *pipeline)
+{
+ gl_shader_stage stage = MESA_SHADER_VERTEX;
+ if (pipeline->shaders[MESA_SHADER_GEOMETRY].pipeline_nir)
+ stage = MESA_SHADER_GEOMETRY;
+ else if (pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir)
+ stage = MESA_SHADER_TESS_EVAL;
+ else if (pipeline->shaders[MESA_SHADER_MESH].pipeline_nir)
+ stage = MESA_SHADER_MESH;
+ pipeline->last_vertex = stage;
+ lvp_shader_xfb_init(&pipeline->shaders[stage]);
+}
+
+static void *
+lvp_shader_compile_stage(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir)
{
- struct lvp_device *device = pipeline->device;
- device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, pipeline->pipeline_nir[stage]);
- if (stage == MESA_SHADER_COMPUTE) {
+ if (nir->info.stage == MESA_SHADER_COMPUTE) {
struct pipe_compute_state shstate = {0};
- shstate.prog = (void *)pipeline->pipeline_nir[MESA_SHADER_COMPUTE];
+ shstate.prog = nir;
shstate.ir_type = PIPE_SHADER_IR_NIR;
- shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.shared_size;
- pipeline->shader_cso[PIPE_SHADER_COMPUTE] = device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
+ shstate.static_shared_mem = nir->info.shared_size;
+ return device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
} else {
struct pipe_shader_state shstate = {0};
- fill_shader_prog(&shstate, stage, pipeline);
-
- if (stage == MESA_SHADER_VERTEX ||
- stage == MESA_SHADER_GEOMETRY ||
- stage == MESA_SHADER_TESS_EVAL) {
- nir_xfb_info *xfb_info = nir_gather_xfb_info(pipeline->pipeline_nir[stage], NULL);
- if (xfb_info) {
- uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
- memset(output_mapping, 0, sizeof(output_mapping));
-
- nir_foreach_shader_out_variable(var, pipeline->pipeline_nir[stage]) {
- unsigned slots = var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
- : glsl_count_attribute_slots(var->type, false);
- for (unsigned i = 0; i < slots; i++)
- output_mapping[var->data.location + i] = var->data.driver_location + i;
- }
-
- shstate.stream_output.num_outputs = xfb_info->output_count;
- for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
- if (xfb_info->buffers_written & (1 << i)) {
- shstate.stream_output.stride[i] = xfb_info->buffers[i].stride / 4;
- }
- }
- for (unsigned i = 0; i < xfb_info->output_count; i++) {
- shstate.stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer;
- shstate.stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4;
- shstate.stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location];
- shstate.stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask);
- shstate.stream_output.output[i].start_component = ffs(xfb_info->outputs[i].component_mask) - 1;
- shstate.stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer];
- }
-
- ralloc_free(xfb_info);
- }
- }
+ shstate.type = PIPE_SHADER_IR_NIR;
+ shstate.ir.nir = nir;
+ memcpy(&shstate.stream_output, &shader->stream_output, sizeof(shstate.stream_output));
- switch (stage) {
+ switch (nir->info.stage) {
case MESA_SHADER_FRAGMENT:
- pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
- break;
+ return device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
case MESA_SHADER_VERTEX:
- pipeline->shader_cso[PIPE_SHADER_VERTEX] = device->queue.ctx->create_vs_state(device->queue.ctx, &shstate);
- break;
+ return device->queue.ctx->create_vs_state(device->queue.ctx, &shstate);
case MESA_SHADER_GEOMETRY:
- pipeline->shader_cso[PIPE_SHADER_GEOMETRY] = device->queue.ctx->create_gs_state(device->queue.ctx, &shstate);
- break;
+ return device->queue.ctx->create_gs_state(device->queue.ctx, &shstate);
case MESA_SHADER_TESS_CTRL:
- pipeline->shader_cso[PIPE_SHADER_TESS_CTRL] = device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate);
- break;
+ return device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate);
case MESA_SHADER_TESS_EVAL:
- pipeline->shader_cso[PIPE_SHADER_TESS_EVAL] = device->queue.ctx->create_tes_state(device->queue.ctx, &shstate);
- break;
+ return device->queue.ctx->create_tes_state(device->queue.ctx, &shstate);
+ case MESA_SHADER_TASK:
+ return device->queue.ctx->create_ts_state(device->queue.ctx, &shstate);
+ case MESA_SHADER_MESH:
+ return device->queue.ctx->create_ms_state(device->queue.ctx, &shstate);
default:
unreachable("illegal shader");
break;
}
}
- return VK_SUCCESS;
+ return NULL;
+}
+
+void *
+lvp_shader_compile(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir, bool locked)
+{
+ device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, nir);
+
+ if (!locked)
+ simple_mtx_lock(&device->queue.lock);
+
+ void *state = lvp_shader_compile_stage(device, shader, nir);
+
+ if (!locked)
+ simple_mtx_unlock(&device->queue.lock);
+
+ return state;
+}
+
+#ifndef NDEBUG
+static bool
+layouts_equal(const struct lvp_descriptor_set_layout *a, const struct lvp_descriptor_set_layout *b)
+{
+ const uint8_t *pa = (const uint8_t*)a, *pb = (const uint8_t*)b;
+ uint32_t hash_start_offset = sizeof(struct vk_descriptor_set_layout);
+ uint32_t binding_offset = offsetof(struct lvp_descriptor_set_layout, binding);
+ /* base equal */
+ if (memcmp(pa + hash_start_offset, pb + hash_start_offset, binding_offset - hash_start_offset))
+ return false;
+
+ /* bindings equal */
+ if (a->binding_count != b->binding_count)
+ return false;
+ size_t binding_size = a->binding_count * sizeof(struct lvp_descriptor_set_binding_layout);
+ const struct lvp_descriptor_set_binding_layout *la = a->binding;
+ const struct lvp_descriptor_set_binding_layout *lb = b->binding;
+ if (memcmp(la, lb, binding_size)) {
+ for (unsigned i = 0; i < a->binding_count; i++) {
+ if (memcmp(&la[i], &lb[i], offsetof(struct lvp_descriptor_set_binding_layout, immutable_samplers)))
+ return false;
+ }
+ }
+
+ /* immutable sampler equal */
+ if (a->immutable_sampler_count != b->immutable_sampler_count)
+ return false;
+ if (a->immutable_sampler_count) {
+ size_t sampler_size = a->immutable_sampler_count * sizeof(struct lvp_sampler *);
+ if (memcmp(pa + binding_offset + binding_size, pb + binding_offset + binding_size, sampler_size)) {
+ struct lvp_sampler **sa = (struct lvp_sampler **)(pa + binding_offset);
+ struct lvp_sampler **sb = (struct lvp_sampler **)(pb + binding_offset);
+ for (unsigned i = 0; i < a->immutable_sampler_count; i++) {
+ if (memcmp(sa[i], sb[i], sizeof(struct lvp_sampler)))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+#endif
+
+static void
+merge_layouts(struct vk_device *device, struct lvp_pipeline *dst, struct lvp_pipeline_layout *src)
+{
+ if (!src)
+ return;
+ if (dst->layout) {
+ /* these must match */
+ ASSERTED VkPipelineCreateFlags src_flag = src->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
+ ASSERTED VkPipelineCreateFlags dst_flag = dst->layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
+ assert(src_flag == dst_flag);
+ }
+ /* always try to reuse existing layout: independent sets bit doesn't guarantee independent sets */
+ if (!dst->layout) {
+ dst->layout = (struct lvp_pipeline_layout*)vk_pipeline_layout_ref(&src->vk);
+ return;
+ }
+ /* this is a big optimization when hit */
+ if (dst->layout == src)
+ return;
+#ifndef NDEBUG
+ /* verify that layouts match */
+ const struct lvp_pipeline_layout *smaller = dst->layout->vk.set_count < src->vk.set_count ? dst->layout : src;
+ const struct lvp_pipeline_layout *bigger = smaller == dst->layout ? src : dst->layout;
+ for (unsigned i = 0; i < smaller->vk.set_count; i++) {
+ if (!smaller->vk.set_layouts[i] || !bigger->vk.set_layouts[i] ||
+ smaller->vk.set_layouts[i] == bigger->vk.set_layouts[i])
+ continue;
+
+ const struct lvp_descriptor_set_layout *smaller_set_layout =
+ vk_to_lvp_descriptor_set_layout(smaller->vk.set_layouts[i]);
+ const struct lvp_descriptor_set_layout *bigger_set_layout =
+ vk_to_lvp_descriptor_set_layout(bigger->vk.set_layouts[i]);
+
+ assert(!smaller_set_layout->binding_count ||
+ !bigger_set_layout->binding_count ||
+ layouts_equal(smaller_set_layout, bigger_set_layout));
+ }
+#endif
+ /* must be independent sets with different layouts: reallocate to avoid modifying original layout */
+ struct lvp_pipeline_layout *old_layout = dst->layout;
+ dst->layout = vk_zalloc(&device->alloc, sizeof(struct lvp_pipeline_layout), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ memcpy(dst->layout, old_layout, sizeof(struct lvp_pipeline_layout));
+ dst->layout->vk.ref_cnt = 1;
+ for (unsigned i = 0; i < dst->layout->vk.set_count; i++) {
+ if (dst->layout->vk.set_layouts[i])
+ vk_descriptor_set_layout_ref(dst->layout->vk.set_layouts[i]);
+ }
+ vk_pipeline_layout_unref(device, &old_layout->vk);
+
+ for (unsigned i = 0; i < src->vk.set_count; i++) {
+ if (!dst->layout->vk.set_layouts[i]) {
+ dst->layout->vk.set_layouts[i] = src->vk.set_layouts[i];
+ if (dst->layout->vk.set_layouts[i])
+ vk_descriptor_set_layout_ref(src->vk.set_layouts[i]);
+ }
+ }
+ dst->layout->vk.set_count = MAX2(dst->layout->vk.set_count,
+ src->vk.set_count);
+ dst->layout->push_constant_size += src->push_constant_size;
+ dst->layout->push_constant_stages |= src->push_constant_stages;
+}
+
+static void
+copy_shader_sanitized(struct lvp_shader *dst, const struct lvp_shader *src)
+{
+ *dst = *src;
+ dst->pipeline_nir = NULL; //this gets handled later
+ dst->tess_ccw = NULL; //this gets handled later
+ assert(!dst->shader_cso);
+ assert(!dst->tess_ccw_cso);
+ if (src->inlines.can_inline)
+ _mesa_set_init(&dst->inlines.variants, NULL, NULL, inline_variant_equals);
}
static VkResult
@@ -793,111 +810,186 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline,
struct lvp_device *device,
struct lvp_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *alloc)
+ VkPipelineCreateFlagBits2KHR flags)
{
- if (alloc == NULL)
- alloc = &device->vk.alloc;
- pipeline->device = device;
- pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
- pipeline->force_min_sample = false;
+ pipeline->type = LVP_PIPELINE_GRAPHICS;
- pipeline->mem_ctx = ralloc_context(NULL);
- /* recreate createinfo */
- deep_copy_graphics_create_info(pipeline->mem_ctx, &pipeline->graphics_create_info, pCreateInfo);
- pipeline->is_compute_pipeline = false;
-
- const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_state =
- vk_find_struct_const(pCreateInfo->pRasterizationState,
- PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT);
- pipeline->provoking_vertex_last = pv_state && pv_state->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
-
- const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
- vk_find_struct_const(pCreateInfo->pRasterizationState,
- PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
- if (line_state) {
- /* always draw bresenham if !smooth */
- pipeline->line_stipple_enable = line_state->stippledLineEnable;
- pipeline->line_smooth = line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
- pipeline->disable_multisample = line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT ||
- line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
- pipeline->line_rectangular = line_state->lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
- if (pipeline->line_stipple_enable) {
- if (!dynamic_state_contains(pipeline->graphics_create_info.pDynamicState, VK_DYNAMIC_STATE_LINE_STIPPLE_EXT)) {
- pipeline->line_stipple_factor = line_state->lineStippleFactor - 1;
- pipeline->line_stipple_pattern = line_state->lineStipplePattern;
- } else {
- pipeline->line_stipple_factor = 0;
- pipeline->line_stipple_pattern = UINT16_MAX;
- }
+ VkResult result;
+
+ const VkGraphicsPipelineLibraryCreateInfoEXT *libinfo = vk_find_struct_const(pCreateInfo,
+ GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
+ const VkPipelineLibraryCreateInfoKHR *libstate = vk_find_struct_const(pCreateInfo,
+ PIPELINE_LIBRARY_CREATE_INFO_KHR);
+ const VkGraphicsPipelineLibraryFlagsEXT layout_stages = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+ VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
+ if (libinfo)
+ pipeline->stages = libinfo->flags;
+ else if (!libstate)
+ pipeline->stages = VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
+ VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+ VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
+ VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT;
+
+ if (flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
+ pipeline->library = true;
+
+ struct lvp_pipeline_layout *layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
+
+ if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
+ /* this is a regular pipeline with no partials: directly reuse */
+ pipeline->layout = layout ? (void*)vk_pipeline_layout_ref(&layout->vk) : NULL;
+ else if (pipeline->stages & layout_stages) {
+ if ((pipeline->stages & layout_stages) == layout_stages)
+ /* this has all the layout stages: directly reuse */
+ pipeline->layout = (void*)vk_pipeline_layout_ref(&layout->vk);
+ else {
+ /* this is a partial: copy for later merging to avoid modifying another layout */
+ merge_layouts(&device->vk, pipeline, layout);
}
- } else
- pipeline->line_rectangular = true;
-
- if (!dynamic_state_contains(pipeline->graphics_create_info.pDynamicState, VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT)) {
- const VkPipelineColorWriteCreateInfoEXT *cw_state =
- vk_find_struct_const(pCreateInfo->pColorBlendState, PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
- if (cw_state) {
- for (unsigned i = 0; i < cw_state->attachmentCount; i++)
- if (!cw_state->pColorWriteEnables[i]) {
- VkPipelineColorBlendAttachmentState *att = (void*)&pipeline->graphics_create_info.pColorBlendState->pAttachments[i];
- att->colorWriteMask = 0;
+ }
+
+ if (libstate) {
+ for (unsigned i = 0; i < libstate->libraryCount; i++) {
+ LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
+ vk_graphics_pipeline_state_merge(&pipeline->graphics_state,
+ &p->graphics_state);
+ if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
+ pipeline->line_smooth = p->line_smooth;
+ pipeline->disable_multisample = p->disable_multisample;
+ pipeline->line_rectangular = p->line_rectangular;
+ memcpy(pipeline->shaders, p->shaders, sizeof(struct lvp_shader) * 4);
+ memcpy(&pipeline->shaders[MESA_SHADER_TASK], &p->shaders[MESA_SHADER_TASK], sizeof(struct lvp_shader) * 2);
+ lvp_forall_gfx_stage(i) {
+ if (i == MESA_SHADER_FRAGMENT)
+ continue;
+ copy_shader_sanitized(&pipeline->shaders[i], &p->shaders[i]);
}
+ }
+ if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
+ pipeline->force_min_sample = p->force_min_sample;
+ copy_shader_sanitized(&pipeline->shaders[MESA_SHADER_FRAGMENT], &p->shaders[MESA_SHADER_FRAGMENT]);
+ }
+ if (p->stages & layout_stages) {
+ if (!layout || (layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
+ merge_layouts(&device->vk, pipeline, p->layout);
+ }
+ pipeline->stages |= p->stages;
}
}
+ result = vk_graphics_pipeline_state_fill(&device->vk,
+ &pipeline->graphics_state,
+ pCreateInfo, NULL, 0, NULL, NULL,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT,
+ &pipeline->state_data);
+ if (result != VK_SUCCESS)
+ return result;
+
+ assert(pipeline->library || pipeline->stages & (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+ VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
+ VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT));
+
+ pipeline->device = device;
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
- VK_FROM_HANDLE(vk_shader_module, module,
- pCreateInfo->pStages[i].module);
- gl_shader_stage stage = lvp_shader_stage(pCreateInfo->pStages[i].stage);
- lvp_shader_compile_to_ir(pipeline, module,
- pCreateInfo->pStages[i].pName,
- stage,
- pCreateInfo->pStages[i].pSpecializationInfo);
- if (!pipeline->pipeline_nir[stage])
- return VK_ERROR_FEATURE_NOT_PRESENT;
- }
+ const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
+ gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
+ if (stage == MESA_SHADER_FRAGMENT) {
+ if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))
+ continue;
+ } else {
+ if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT))
+ continue;
+ }
+ result = lvp_shader_compile_to_ir(pipeline, sinfo);
+ if (result != VK_SUCCESS)
+ goto fail;
- if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]) {
- if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_qualifier ||
- BITSET_TEST(pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
- BITSET_TEST(pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS))
- pipeline->force_min_sample = true;
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir->nir->info.fs.uses_sample_shading)
+ pipeline->force_min_sample = true;
+ break;
+ default: break;
+ }
+ }
+ if (pCreateInfo->stageCount && pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir) {
+ nir_lower_patch_vertices(pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir, pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info.tess.tcs_vertices_out, NULL);
+ merge_tess_info(&pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info, &pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info);
+ if (BITSET_TEST(pipeline->graphics_state.dynamic,
+ MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) {
+ pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw = lvp_create_pipeline_nir(nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir));
+ pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw;
+ } else if (pipeline->graphics_state.ts &&
+ pipeline->graphics_state.ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) {
+ pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw;
+ }
}
- if (pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]) {
- nir_lower_patch_vertices(pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL], pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL);
- merge_tess_info(&pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info, &pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info);
- const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =
- vk_find_struct_const(pCreateInfo->pTessellationState,
- PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
- if (!domain_origin_state || domain_origin_state->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)
- pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw = !pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw;
+ if (libstate) {
+ for (unsigned i = 0; i < libstate->libraryCount; i++) {
+ LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
+ if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
+ if (p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir)
+ lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir, p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir);
+ }
+ if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
+ lvp_forall_gfx_stage(j) {
+ if (j == MESA_SHADER_FRAGMENT)
+ continue;
+ if (p->shaders[j].pipeline_nir)
+ lvp_pipeline_nir_ref(&pipeline->shaders[j].pipeline_nir, p->shaders[j].pipeline_nir);
+ }
+ if (p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw)
+ lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw, p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw);
+ }
+ }
+ } else if (pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
+ const struct vk_rasterization_state *rs = pipeline->graphics_state.rs;
+ if (rs) {
+ /* always draw bresenham if !smooth */
+ pipeline->line_smooth = rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
+ pipeline->disable_multisample = rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR ||
+ rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
+ pipeline->line_rectangular = rs->line.mode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
+ } else
+ pipeline->line_rectangular = true;
+ lvp_pipeline_xfb_init(pipeline);
}
+ if (!libstate && !pipeline->library)
+ lvp_pipeline_shaders_compile(pipeline, false);
- pipeline->gs_output_lines = pipeline->pipeline_nir[MESA_SHADER_GEOMETRY] &&
- pipeline->pipeline_nir[MESA_SHADER_GEOMETRY]->info.gs.output_primitive == GL_LINES;
-
+ return VK_SUCCESS;
- bool has_fragment_shader = false;
- for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
- gl_shader_stage stage = lvp_shader_stage(pCreateInfo->pStages[i].stage);
- lvp_pipeline_compile(pipeline, stage);
- if (stage == MESA_SHADER_FRAGMENT)
- has_fragment_shader = true;
+fail:
+ for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) {
+ lvp_pipeline_nir_ref(&pipeline->shaders[i].pipeline_nir, NULL);
}
+ vk_free(&device->vk.alloc, pipeline->state_data);
- if (has_fragment_shader == false) {
- /* create a dummy fragment shader for this pipeline. */
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
- "dummy_frag");
+ return result;
+}
- pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = b.shader;
- struct pipe_shader_state shstate = {0};
- shstate.type = PIPE_SHADER_IR_NIR;
- shstate.ir.nir = pipeline->pipeline_nir[MESA_SHADER_FRAGMENT];
- pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
+void
+lvp_pipeline_shaders_compile(struct lvp_pipeline *pipeline, bool locked)
+{
+ if (pipeline->compiled)
+ return;
+ for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) {
+ if (!pipeline->shaders[i].pipeline_nir)
+ continue;
+
+ gl_shader_stage stage = i;
+ assert(stage == pipeline->shaders[i].pipeline_nir->nir->info.stage);
+
+ if (!pipeline->shaders[stage].inlines.can_inline) {
+ pipeline->shaders[stage].shader_cso = lvp_shader_compile(pipeline->device, &pipeline->shaders[stage],
+ nir_shader_clone(NULL, pipeline->shaders[stage].pipeline_nir->nir), locked);
+ if (pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw)
+ pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw_cso = lvp_shader_compile(pipeline->device, &pipeline->shaders[stage],
+ nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir), locked);
+ }
}
- return VK_SUCCESS;
+ pipeline->compiled = true;
}
static VkResult
@@ -905,8 +997,9 @@ lvp_graphics_pipeline_create(
VkDevice _device,
VkPipelineCache _cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkPipeline *pPipeline)
+ VkPipelineCreateFlagBits2KHR flags,
+ VkPipeline *pPipeline,
+ bool group)
{
LVP_FROM_HANDLE(lvp_device, device, _device);
LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
@@ -915,19 +1008,50 @@ lvp_graphics_pipeline_create(
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
- pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
+ size_t size = 0;
+ const VkGraphicsPipelineShaderGroupsCreateInfoNV *groupinfo = vk_find_struct_const(pCreateInfo, GRAPHICS_PIPELINE_SHADER_GROUPS_CREATE_INFO_NV);
+ if (!group && groupinfo)
+ size += (groupinfo->groupCount + groupinfo->pipelineCount) * sizeof(VkPipeline);
+
+ pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
vk_object_base_init(&device->vk, &pipeline->base,
VK_OBJECT_TYPE_PIPELINE);
- result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo,
- pAllocator);
+ uint64_t t0 = os_time_get_nano();
+ result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo, flags);
if (result != VK_SUCCESS) {
- vk_free2(&device->vk.alloc, pAllocator, pipeline);
+ vk_free(&device->vk.alloc, pipeline);
return result;
}
+ if (!group && groupinfo) {
+ VkGraphicsPipelineCreateInfo pci = *pCreateInfo;
+ for (unsigned i = 0; i < groupinfo->groupCount; i++) {
+ const VkGraphicsShaderGroupCreateInfoNV *g = &groupinfo->pGroups[i];
+ pci.pVertexInputState = g->pVertexInputState;
+ pci.pTessellationState = g->pTessellationState;
+ pci.pStages = g->pStages;
+ pci.stageCount = g->stageCount;
+ result = lvp_graphics_pipeline_create(_device, _cache, &pci, flags, &pipeline->groups[i], true);
+ if (result != VK_SUCCESS) {
+ lvp_pipeline_destroy(device, pipeline, false);
+ return result;
+ }
+ pipeline->num_groups++;
+ }
+ for (unsigned i = 0; i < groupinfo->pipelineCount; i++)
+ pipeline->groups[pipeline->num_groups + i] = groupinfo->pPipelines[i];
+ pipeline->num_groups_total = groupinfo->groupCount + groupinfo->pipelineCount;
+ }
+
+ VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
+ if (feedback && !group) {
+ feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
+ feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
+ memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
+ }
*pPipeline = lvp_pipeline_to_handle(pipeline);
@@ -946,16 +1070,27 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines(
unsigned i = 0;
for (; i < count; i++) {
- VkResult r;
- r = lvp_graphics_pipeline_create(_device,
- pipelineCache,
- &pCreateInfos[i],
- pAllocator, &pPipelines[i]);
+ VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
+ VkPipelineCreateFlagBits2KHR flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
+
+ if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
+ r = lvp_graphics_pipeline_create(_device,
+ pipelineCache,
+ &pCreateInfos[i],
+ flags,
+ &pPipelines[i],
+ false);
if (r != VK_SUCCESS) {
result = r;
pPipelines[i] = VK_NULL_HANDLE;
+ if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
+ break;
}
}
+ if (result != VK_SUCCESS) {
+ for (; i < count; i++)
+ pPipelines[i] = VK_NULL_HANDLE;
+ }
return result;
}
@@ -964,29 +1099,23 @@ static VkResult
lvp_compute_pipeline_init(struct lvp_pipeline *pipeline,
struct lvp_device *device,
struct lvp_pipeline_cache *cache,
- const VkComputePipelineCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *alloc)
+ const VkComputePipelineCreateInfo *pCreateInfo)
{
- VK_FROM_HANDLE(vk_shader_module, module,
- pCreateInfo->stage.module);
- if (alloc == NULL)
- alloc = &device->vk.alloc;
pipeline->device = device;
pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
+ vk_pipeline_layout_ref(&pipeline->layout->vk);
pipeline->force_min_sample = false;
- pipeline->mem_ctx = ralloc_context(NULL);
- deep_copy_compute_create_info(pipeline->mem_ctx,
- &pipeline->compute_create_info, pCreateInfo);
- pipeline->is_compute_pipeline = true;
-
- lvp_shader_compile_to_ir(pipeline, module,
- pCreateInfo->stage.pName,
- MESA_SHADER_COMPUTE,
- pCreateInfo->stage.pSpecializationInfo);
- if (!pipeline->pipeline_nir[MESA_SHADER_COMPUTE])
- return VK_ERROR_FEATURE_NOT_PRESENT;
- lvp_pipeline_compile(pipeline, MESA_SHADER_COMPUTE);
+ pipeline->type = LVP_PIPELINE_COMPUTE;
+
+ VkResult result = lvp_shader_compile_to_ir(pipeline, &pCreateInfo->stage);
+ if (result != VK_SUCCESS)
+ return result;
+
+ struct lvp_shader *shader = &pipeline->shaders[MESA_SHADER_COMPUTE];
+ if (!shader->inlines.can_inline)
+ shader->shader_cso = lvp_shader_compile(pipeline->device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir), false);
+ pipeline->compiled = true;
return VK_SUCCESS;
}
@@ -995,7 +1124,7 @@ lvp_compute_pipeline_create(
VkDevice _device,
VkPipelineCache _cache,
const VkComputePipelineCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
+ VkPipelineCreateFlagBits2KHR flags,
VkPipeline *pPipeline)
{
LVP_FROM_HANDLE(lvp_device, device, _device);
@@ -1005,20 +1134,27 @@ lvp_compute_pipeline_create(
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
- pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
+ pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
vk_object_base_init(&device->vk, &pipeline->base,
VK_OBJECT_TYPE_PIPELINE);
- result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo,
- pAllocator);
+ uint64_t t0 = os_time_get_nano();
+ result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo);
if (result != VK_SUCCESS) {
- vk_free2(&device->vk.alloc, pAllocator, pipeline);
+ vk_free(&device->vk.alloc, pipeline);
return result;
}
+ const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
+ if (feedback) {
+ feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
+ feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
+ memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
+ }
+
*pPipeline = lvp_pipeline_to_handle(pipeline);
return VK_SUCCESS;
@@ -1036,16 +1172,360 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines(
unsigned i = 0;
for (; i < count; i++) {
- VkResult r;
- r = lvp_compute_pipeline_create(_device,
- pipelineCache,
- &pCreateInfos[i],
- pAllocator, &pPipelines[i]);
+ VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
+ VkPipelineCreateFlagBits2KHR flags = vk_compute_pipeline_create_flags(&pCreateInfos[i]);
+
+ if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
+ r = lvp_compute_pipeline_create(_device,
+ pipelineCache,
+ &pCreateInfos[i],
+ flags,
+ &pPipelines[i]);
+ if (r != VK_SUCCESS) {
+ result = r;
+ pPipelines[i] = VK_NULL_HANDLE;
+ if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
+ break;
+ }
+ }
+ if (result != VK_SUCCESS) {
+ for (; i < count; i++)
+ pPipelines[i] = VK_NULL_HANDLE;
+ }
+
+
+ return result;
+}
+
+VKAPI_ATTR void VKAPI_CALL lvp_DestroyShaderEXT(
+ VkDevice _device,
+ VkShaderEXT _shader,
+ const VkAllocationCallbacks* pAllocator)
+{
+ LVP_FROM_HANDLE(lvp_device, device, _device);
+ LVP_FROM_HANDLE(lvp_shader, shader, _shader);
+
+ if (!shader)
+ return;
+ shader_destroy(device, shader, false);
+
+ vk_pipeline_layout_unref(&device->vk, &shader->layout->vk);
+ blob_finish(&shader->blob);
+ vk_object_base_finish(&shader->base);
+ vk_free2(&device->vk.alloc, pAllocator, shader);
+}
+
+static VkShaderEXT
+create_shader_object(struct lvp_device *device, const VkShaderCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator)
+{
+ nir_shader *nir = NULL;
+ gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage);
+ assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
+ if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT) {
+ VkShaderModuleCreateInfo minfo = {
+ VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ NULL,
+ 0,
+ pCreateInfo->codeSize,
+ pCreateInfo->pCode,
+ };
+ VkPipelineShaderStageCreateFlagBits flags = 0;
+ if (pCreateInfo->flags & VK_SHADER_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
+ flags |= VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT;
+ if (pCreateInfo->flags & VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
+ flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT;
+ VkPipelineShaderStageCreateInfo sinfo = {
+ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ &minfo,
+ flags,
+ pCreateInfo->stage,
+ VK_NULL_HANDLE,
+ pCreateInfo->pName,
+ pCreateInfo->pSpecializationInfo,
+ };
+ VkResult result = compile_spirv(device, &sinfo, &nir);
+ if (result != VK_SUCCESS)
+ goto fail;
+ nir->info.separate_shader = true;
+ } else {
+ assert(pCreateInfo->codeType == VK_SHADER_CODE_TYPE_BINARY_EXT);
+ if (pCreateInfo->codeSize < SHA1_DIGEST_LENGTH + VK_UUID_SIZE + 1)
+ return VK_NULL_HANDLE;
+ struct blob_reader blob;
+ const uint8_t *data = pCreateInfo->pCode;
+ uint8_t uuid[VK_UUID_SIZE];
+ lvp_device_get_cache_uuid(uuid);
+ if (memcmp(uuid, data, VK_UUID_SIZE))
+ return VK_NULL_HANDLE;
+ size_t size = pCreateInfo->codeSize - SHA1_DIGEST_LENGTH - VK_UUID_SIZE;
+ unsigned char sha1[20];
+
+ struct mesa_sha1 sctx;
+ _mesa_sha1_init(&sctx);
+ _mesa_sha1_update(&sctx, data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, size);
+ _mesa_sha1_final(&sctx, sha1);
+ if (memcmp(sha1, data + VK_UUID_SIZE, SHA1_DIGEST_LENGTH))
+ return VK_NULL_HANDLE;
+
+ blob_reader_init(&blob, data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, size);
+ nir = nir_deserialize(NULL, device->pscreen->get_compiler_options(device->pscreen, PIPE_SHADER_IR_NIR, stage), &blob);
+ if (!nir)
+ goto fail;
+ }
+ if (!nir_shader_get_entrypoint(nir))
+ goto fail;
+ struct lvp_shader *shader = vk_object_zalloc(&device->vk, pAllocator, sizeof(struct lvp_shader), VK_OBJECT_TYPE_SHADER_EXT);
+ if (!shader)
+ goto fail;
+ blob_init(&shader->blob);
+ VkPipelineLayoutCreateInfo pci = {
+ VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ NULL,
+ 0,
+ pCreateInfo->setLayoutCount,
+ pCreateInfo->pSetLayouts,
+ pCreateInfo->pushConstantRangeCount,
+ pCreateInfo->pPushConstantRanges,
+ };
+ shader->layout = lvp_pipeline_layout_create(device, &pci, pAllocator);
+
+ if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT)
+ lvp_shader_lower(device, NULL, nir, shader->layout);
+
+ lvp_shader_init(shader, nir);
+
+ lvp_shader_xfb_init(shader);
+ if (stage == MESA_SHADER_TESS_EVAL) {
+ /* spec requires that all tess modes are set in both shaders */
+ nir_lower_patch_vertices(shader->pipeline_nir->nir, shader->pipeline_nir->nir->info.tess.tcs_vertices_out, NULL);
+ shader->tess_ccw = lvp_create_pipeline_nir(nir_shader_clone(NULL, shader->pipeline_nir->nir));
+ shader->tess_ccw->nir->info.tess.ccw = !shader->pipeline_nir->nir->info.tess.ccw;
+ shader->tess_ccw_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, shader->tess_ccw->nir), false);
+ } else if (stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) {
+ /* this is (currently) illegal */
+ assert(!nir->info.fs.uses_fbfetch_output);
+ shader_destroy(device, shader, false);
+
+ vk_object_base_finish(&shader->base);
+ vk_free2(&device->vk.alloc, pAllocator, shader);
+ return VK_NULL_HANDLE;
+ }
+ nir_serialize(&shader->blob, nir, true);
+ shader->shader_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, nir), false);
+ return lvp_shader_to_handle(shader);
+fail:
+ ralloc_free(nir);
+ return VK_NULL_HANDLE;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateShadersEXT(
+ VkDevice _device,
+ uint32_t createInfoCount,
+ const VkShaderCreateInfoEXT* pCreateInfos,
+ const VkAllocationCallbacks* pAllocator,
+ VkShaderEXT* pShaders)
+{
+ LVP_FROM_HANDLE(lvp_device, device, _device);
+ unsigned i;
+ for (i = 0; i < createInfoCount; i++) {
+ pShaders[i] = create_shader_object(device, &pCreateInfos[i], pAllocator);
+ if (!pShaders[i]) {
+ if (pCreateInfos[i].codeType == VK_SHADER_CODE_TYPE_BINARY_EXT) {
+ if (i < createInfoCount - 1)
+ memset(&pShaders[i + 1], 0, (createInfoCount - i - 1) * sizeof(VkShaderEXT));
+ return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
+ }
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ }
+ return VK_SUCCESS;
+}
+
+
+VKAPI_ATTR VkResult VKAPI_CALL lvp_GetShaderBinaryDataEXT(
+ VkDevice device,
+ VkShaderEXT _shader,
+ size_t* pDataSize,
+ void* pData)
+{
+ LVP_FROM_HANDLE(lvp_shader, shader, _shader);
+ VkResult ret = VK_SUCCESS;
+ if (pData) {
+ if (*pDataSize < shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE) {
+ ret = VK_INCOMPLETE;
+ *pDataSize = 0;
+ } else {
+ *pDataSize = MIN2(*pDataSize, shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE);
+ uint8_t *data = pData;
+ lvp_device_get_cache_uuid(data);
+ struct mesa_sha1 sctx;
+ _mesa_sha1_init(&sctx);
+ _mesa_sha1_update(&sctx, shader->blob.data, shader->blob.size);
+ _mesa_sha1_final(&sctx, data + VK_UUID_SIZE);
+ memcpy(data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, shader->blob.data, shader->blob.size);
+ }
+ } else {
+ *pDataSize = shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE;
+ }
+ return ret;
+}
+
+#ifdef VK_ENABLE_BETA_EXTENSIONS
+static VkResult
+lvp_exec_graph_pipeline_create(VkDevice _device, VkPipelineCache _cache,
+ const VkExecutionGraphPipelineCreateInfoAMDX *create_info,
+ VkPipelineCreateFlagBits2KHR flags,
+ VkPipeline *out_pipeline)
+{
+ LVP_FROM_HANDLE(lvp_device, device, _device);
+ struct lvp_pipeline *pipeline;
+ VkResult result;
+
+ assert(create_info->sType == VK_STRUCTURE_TYPE_EXECUTION_GRAPH_PIPELINE_CREATE_INFO_AMDX);
+
+ uint32_t stage_count = create_info->stageCount;
+ if (create_info->pLibraryInfo) {
+ for (uint32_t i = 0; i < create_info->pLibraryInfo->libraryCount; i++) {
+ VK_FROM_HANDLE(lvp_pipeline, library, create_info->pLibraryInfo->pLibraries[i]);
+ stage_count += library->num_groups;
+ }
+ }
+
+ pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + stage_count * sizeof(VkPipeline), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!pipeline)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &pipeline->base,
+ VK_OBJECT_TYPE_PIPELINE);
+
+ uint64_t t0 = os_time_get_nano();
+
+ pipeline->type = LVP_PIPELINE_EXEC_GRAPH;
+ pipeline->layout = lvp_pipeline_layout_from_handle(create_info->layout);
+
+ pipeline->exec_graph.scratch_size = 0;
+ pipeline->num_groups = stage_count;
+
+ uint32_t stage_index = 0;
+ for (uint32_t i = 0; i < create_info->stageCount; i++) {
+ const VkPipelineShaderStageNodeCreateInfoAMDX *node_info = vk_find_struct_const(
+ create_info->pStages[i].pNext, PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX);
+
+ VkComputePipelineCreateInfo stage_create_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .flags = create_info->flags,
+ .stage = create_info->pStages[i],
+ .layout = create_info->layout,
+ };
+
+ result = lvp_compute_pipeline_create(_device, _cache, &stage_create_info, flags, &pipeline->groups[i]);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VK_FROM_HANDLE(lvp_pipeline, stage, pipeline->groups[i]);
+ nir_shader *nir = stage->shaders[MESA_SHADER_COMPUTE].pipeline_nir->nir;
+
+ if (node_info) {
+ stage->exec_graph.name = node_info->pName;
+ stage->exec_graph.index = node_info->index;
+ }
+
+ /* TODO: Add a shader info NIR pass to figure out how many the payloads the shader creates. */
+ stage->exec_graph.scratch_size = nir->info.cs.node_payloads_size * 256;
+ pipeline->exec_graph.scratch_size = MAX2(pipeline->exec_graph.scratch_size, stage->exec_graph.scratch_size);
+
+ stage_index++;
+ }
+
+ if (create_info->pLibraryInfo) {
+ for (uint32_t i = 0; i < create_info->pLibraryInfo->libraryCount; i++) {
+ VK_FROM_HANDLE(lvp_pipeline, library, create_info->pLibraryInfo->pLibraries[i]);
+ for (uint32_t j = 0; j < library->num_groups; j++) {
+ /* TODO: Do we need reference counting? */
+ pipeline->groups[stage_index] = library->groups[j];
+ stage_index++;
+ }
+ pipeline->exec_graph.scratch_size = MAX2(pipeline->exec_graph.scratch_size, library->exec_graph.scratch_size);
+ }
+ }
+
+ const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(create_info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
+ if (feedback) {
+ feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
+ feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
+ memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
+ }
+
+ *out_pipeline = lvp_pipeline_to_handle(pipeline);
+
+ return VK_SUCCESS;
+
+fail:
+ for (uint32_t i = 0; i < stage_count; i++)
+ lvp_DestroyPipeline(_device, pipeline->groups[i], NULL);
+
+ vk_free(&device->vk.alloc, pipeline);
+
+ return result;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+lvp_CreateExecutionGraphPipelinesAMDX(VkDevice device, VkPipelineCache pipelineCache,
+ uint32_t createInfoCount,
+ const VkExecutionGraphPipelineCreateInfoAMDX *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
+{
+ VkResult result = VK_SUCCESS;
+ uint32_t i = 0;
+
+ for (; i < createInfoCount; i++) {
+ VkPipelineCreateFlagBits2KHR flags = vk_graph_pipeline_create_flags(&pCreateInfos[i]);
+
+ VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
+ if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
+ r = lvp_exec_graph_pipeline_create(device, pipelineCache, &pCreateInfos[i], flags, &pPipelines[i]);
if (r != VK_SUCCESS) {
result = r;
pPipelines[i] = VK_NULL_HANDLE;
+ if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
+ break;
}
}
+ if (result != VK_SUCCESS) {
+ for (; i < createInfoCount; i++)
+ pPipelines[i] = VK_NULL_HANDLE;
+ }
return result;
}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+lvp_GetExecutionGraphPipelineScratchSizeAMDX(VkDevice device, VkPipeline executionGraph,
+ VkExecutionGraphPipelineScratchSizeAMDX *pSizeInfo)
+{
+ VK_FROM_HANDLE(lvp_pipeline, pipeline, executionGraph);
+ pSizeInfo->size = MAX2(pipeline->exec_graph.scratch_size * 32, 16);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+lvp_GetExecutionGraphPipelineNodeIndexAMDX(VkDevice device, VkPipeline executionGraph,
+ const VkPipelineShaderStageNodeCreateInfoAMDX *pNodeInfo,
+ uint32_t *pNodeIndex)
+{
+ VK_FROM_HANDLE(lvp_pipeline, pipeline, executionGraph);
+
+ for (uint32_t i = 0; i < pipeline->num_groups; i++) {
+ VK_FROM_HANDLE(lvp_pipeline, stage, pipeline->groups[i]);
+ if (stage->exec_graph.index == pNodeInfo->index &&
+ !strcmp(stage->exec_graph.name, pNodeInfo->pName)) {
+ *pNodeIndex = i;
+ return VK_SUCCESS;
+ }
+ }
+
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+}
+#endif