diff options
Diffstat (limited to 'src/gallium/frontends/lavapipe/lvp_pipeline.c')
-rw-r--r-- | src/gallium/frontends/lavapipe/lvp_pipeline.c | 1830 |
1 files changed, 1155 insertions, 675 deletions
diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c index 012ac0bd048..0c63e57b3b7 100644 --- a/src/gallium/frontends/lavapipe/lvp_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c @@ -22,10 +22,15 @@ */ #include "lvp_private.h" +#include "vk_nir_convert_ycbcr.h" +#include "vk_pipeline.h" +#include "vk_render_pass.h" #include "vk_util.h" #include "glsl_types.h" +#include "util/os_time.h" #include "spirv/nir_spirv.h" #include "nir/nir_builder.h" +#include "nir/nir_serialize.h" #include "lvp_lower_vulkan_resource.h" #include "pipe/p_state.h" #include "pipe/p_context.h" @@ -33,12 +38,75 @@ #define SPIR_V_MAGIC_NUMBER 0x07230203 -#define LVP_PIPELINE_DUP(dst, src, type, count) do { \ - type *temp = ralloc_array(mem_ctx, type, count); \ - if (!temp) return VK_ERROR_OUT_OF_HOST_MEMORY; \ - memcpy(temp, (src), sizeof(type) * count); \ - dst = temp; \ - } while(0) +#define MAX_DYNAMIC_STATES 72 + +typedef void (*cso_destroy_func)(struct pipe_context*, void*); + +static void +shader_destroy(struct lvp_device *device, struct lvp_shader *shader, bool locked) +{ + if (!shader->pipeline_nir) + return; + gl_shader_stage stage = shader->pipeline_nir->nir->info.stage; + cso_destroy_func destroy[] = { + device->queue.ctx->delete_vs_state, + device->queue.ctx->delete_tcs_state, + device->queue.ctx->delete_tes_state, + device->queue.ctx->delete_gs_state, + device->queue.ctx->delete_fs_state, + device->queue.ctx->delete_compute_state, + device->queue.ctx->delete_ts_state, + device->queue.ctx->delete_ms_state, + }; + + if (!locked) + simple_mtx_lock(&device->queue.lock); + + set_foreach(&shader->inlines.variants, entry) { + struct lvp_inline_variant *variant = (void*)entry->key; + destroy[stage](device->queue.ctx, variant->cso); + free(variant); + } + ralloc_free(shader->inlines.variants.table); + + if (shader->shader_cso) + destroy[stage](device->queue.ctx, shader->shader_cso); + if (shader->tess_ccw_cso) + destroy[stage](device->queue.ctx, shader->tess_ccw_cso); + + if (!locked) + simple_mtx_unlock(&device->queue.lock); + + lvp_pipeline_nir_ref(&shader->pipeline_nir, NULL); + lvp_pipeline_nir_ref(&shader->tess_ccw, NULL); +} + +void +lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline, bool locked) +{ + lvp_forall_stage(i) + shader_destroy(device, &pipeline->shaders[i], locked); + + if (pipeline->layout) + vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk); + + for (unsigned i = 0; i < pipeline->num_groups; i++) { + LVP_FROM_HANDLE(lvp_pipeline, p, pipeline->groups[i]); + lvp_pipeline_destroy(device, p, locked); + } + + if (pipeline->rt.stages) { + for (uint32_t i = 0; i < pipeline->rt.stage_count; i++) + lvp_pipeline_nir_ref(pipeline->rt.stages + i, NULL); + } + + free(pipeline->rt.stages); + free(pipeline->rt.groups); + + vk_free(&device->vk.alloc, pipeline->state_data); + vk_object_base_finish(&pipeline->base); + vk_free(&device->vk.alloc, pipeline); +} VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline( VkDevice _device, @@ -51,431 +119,187 @@ VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline( if (!_pipeline) return; - if (pipeline->shader_cso[PIPE_SHADER_VERTEX]) - device->queue.ctx->delete_vs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_VERTEX]); - if (pipeline->shader_cso[PIPE_SHADER_FRAGMENT]) - device->queue.ctx->delete_fs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_FRAGMENT]); - if (pipeline->shader_cso[PIPE_SHADER_GEOMETRY]) - device->queue.ctx->delete_gs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_GEOMETRY]); - if (pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]) - device->queue.ctx->delete_tcs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]); - if (pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]) - device->queue.ctx->delete_tes_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]); - if (pipeline->shader_cso[PIPE_SHADER_COMPUTE]) - device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]); - - ralloc_free(pipeline->mem_ctx); - vk_object_base_finish(&pipeline->base); - vk_free2(&device->vk.alloc, pAllocator, pipeline); + if (pipeline->used) { + simple_mtx_lock(&device->queue.lock); + util_dynarray_append(&device->queue.pipeline_destroys, struct lvp_pipeline*, pipeline); + simple_mtx_unlock(&device->queue.lock); + } else { + lvp_pipeline_destroy(device, pipeline, false); + } } -static VkResult -deep_copy_shader_stage(void *mem_ctx, - struct VkPipelineShaderStageCreateInfo *dst, - const struct VkPipelineShaderStageCreateInfo *src) +static void +shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) { - dst->sType = src->sType; - dst->pNext = NULL; - dst->flags = src->flags; - dst->stage = src->stage; - dst->module = src->module; - dst->pName = src->pName; - dst->pSpecializationInfo = NULL; - if (src->pSpecializationInfo) { - const VkSpecializationInfo *src_spec = src->pSpecializationInfo; - VkSpecializationInfo *dst_spec = ralloc_size(mem_ctx, sizeof(VkSpecializationInfo) + - src_spec->mapEntryCount * sizeof(VkSpecializationMapEntry) + - src_spec->dataSize); - VkSpecializationMapEntry *maps = (VkSpecializationMapEntry *)(dst_spec + 1); - dst_spec->pMapEntries = maps; - void *pdata = (void *)(dst_spec->pMapEntries + src_spec->mapEntryCount); - dst_spec->pData = pdata; - - - dst_spec->mapEntryCount = src_spec->mapEntryCount; - dst_spec->dataSize = src_spec->dataSize; - memcpy(pdata, src_spec->pData, src->pSpecializationInfo->dataSize); - memcpy(maps, src_spec->pMapEntries, src_spec->mapEntryCount * sizeof(VkSpecializationMapEntry)); - dst->pSpecializationInfo = dst_spec; - } - return VK_SUCCESS; + assert(glsl_type_is_vector_or_scalar(type)); + + uint32_t comp_size = glsl_type_is_boolean(type) + ? 4 : glsl_get_bit_size(type) / 8; + unsigned length = glsl_get_vector_elements(type); + *size = comp_size * length, + *align = comp_size; } -static VkResult -deep_copy_vertex_input_state(void *mem_ctx, - struct VkPipelineVertexInputStateCreateInfo *dst, - const struct VkPipelineVertexInputStateCreateInfo *src) +static bool +remove_barriers_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data) { - dst->sType = src->sType; - dst->pNext = NULL; - dst->flags = src->flags; - dst->vertexBindingDescriptionCount = src->vertexBindingDescriptionCount; - - LVP_PIPELINE_DUP(dst->pVertexBindingDescriptions, - src->pVertexBindingDescriptions, - VkVertexInputBindingDescription, - src->vertexBindingDescriptionCount); - - dst->vertexAttributeDescriptionCount = src->vertexAttributeDescriptionCount; - - LVP_PIPELINE_DUP(dst->pVertexAttributeDescriptions, - src->pVertexAttributeDescriptions, - VkVertexInputAttributeDescription, - src->vertexAttributeDescriptionCount); - - if (src->pNext) { - vk_foreach_struct(ext, src->pNext) { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT: { - VkPipelineVertexInputDivisorStateCreateInfoEXT *ext_src = (VkPipelineVertexInputDivisorStateCreateInfoEXT *)ext; - VkPipelineVertexInputDivisorStateCreateInfoEXT *ext_dst = ralloc(mem_ctx, VkPipelineVertexInputDivisorStateCreateInfoEXT); - - ext_dst->sType = ext_src->sType; - ext_dst->vertexBindingDivisorCount = ext_src->vertexBindingDivisorCount; - - LVP_PIPELINE_DUP(ext_dst->pVertexBindingDivisors, - ext_src->pVertexBindingDivisors, - VkVertexInputBindingDivisorDescriptionEXT, - ext_src->vertexBindingDivisorCount); - - dst->pNext = ext_dst; - break; - } - default: - break; - } - } + if (intr->intrinsic != nir_intrinsic_barrier) + return false; + if (data) { + if (nir_intrinsic_execution_scope(intr) != SCOPE_NONE) + return false; + + if (nir_intrinsic_memory_scope(intr) == SCOPE_WORKGROUP || + nir_intrinsic_memory_scope(intr) == SCOPE_DEVICE || + nir_intrinsic_memory_scope(intr) == SCOPE_QUEUE_FAMILY) + return false; } - return VK_SUCCESS; + nir_instr_remove(&intr->instr); + return true; } static bool -dynamic_state_contains(const VkPipelineDynamicStateCreateInfo *src, VkDynamicState state) +remove_barriers(nir_shader *nir, bool is_compute) { - if (!src) - return false; - - for (unsigned i = 0; i < src->dynamicStateCount; i++) - if (src->pDynamicStates[i] == state) - return true; - return false; + return nir_shader_intrinsics_pass(nir, remove_barriers_impl, + nir_metadata_dominance, + (void*)is_compute); } -static VkResult -deep_copy_viewport_state(void *mem_ctx, - const VkPipelineDynamicStateCreateInfo *dyn_state, - VkPipelineViewportStateCreateInfo *dst, - const VkPipelineViewportStateCreateInfo *src) +static bool +lower_demote_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data) { - dst->sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - dst->pNext = NULL; - dst->pViewports = NULL; - dst->pScissors = NULL; - - if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_VIEWPORT) && - !dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT)) { - LVP_PIPELINE_DUP(dst->pViewports, - src->pViewports, - VkViewport, - src->viewportCount); + if (intr->intrinsic == nir_intrinsic_demote || intr->intrinsic == nir_intrinsic_terminate) { + intr->intrinsic = nir_intrinsic_discard; + return true; } - if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT)) - dst->viewportCount = src->viewportCount; - else - dst->viewportCount = 0; - - if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_SCISSOR) && - !dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT)) { - if (src->pScissors) - LVP_PIPELINE_DUP(dst->pScissors, - src->pScissors, - VkRect2D, - src->scissorCount); + if (intr->intrinsic == nir_intrinsic_demote_if || intr->intrinsic == nir_intrinsic_terminate_if) { + intr->intrinsic = nir_intrinsic_discard_if; + return true; } - if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT)) - dst->scissorCount = src->scissorCount; - else - dst->scissorCount = 0; + return false; +} - return VK_SUCCESS; +static bool +lower_demote(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, lower_demote_impl, + nir_metadata_dominance, NULL); } -static VkResult -deep_copy_color_blend_state(void *mem_ctx, - VkPipelineColorBlendStateCreateInfo *dst, - const VkPipelineColorBlendStateCreateInfo *src) +static bool +find_tex(const nir_instr *instr, const void *data_cb) { - dst->sType = src->sType; - dst->pNext = NULL; - dst->flags = src->flags; - dst->logicOpEnable = src->logicOpEnable; - dst->logicOp = src->logicOp; + if (instr->type == nir_instr_type_tex) + return true; + return false; +} - LVP_PIPELINE_DUP(dst->pAttachments, - src->pAttachments, - VkPipelineColorBlendAttachmentState, - src->attachmentCount); - dst->attachmentCount = src->attachmentCount; +static nir_def * +fixup_tex_instr(struct nir_builder *b, nir_instr *instr, void *data_cb) +{ + nir_tex_instr *tex_instr = nir_instr_as_tex(instr); + unsigned offset = 0; - memcpy(&dst->blendConstants, &src->blendConstants, sizeof(float) * 4); + int idx = nir_tex_instr_src_index(tex_instr, nir_tex_src_texture_offset); + if (idx == -1) + return NULL; - return VK_SUCCESS; -} + if (!nir_src_is_const(tex_instr->src[idx].src)) + return NULL; + offset = nir_src_comp_as_uint(tex_instr->src[idx].src, 0); -static VkResult -deep_copy_dynamic_state(void *mem_ctx, - VkPipelineDynamicStateCreateInfo *dst, - const VkPipelineDynamicStateCreateInfo *src) -{ - dst->sType = src->sType; - dst->pNext = NULL; - dst->flags = src->flags; - - LVP_PIPELINE_DUP(dst->pDynamicStates, - src->pDynamicStates, - VkDynamicState, - src->dynamicStateCount); - dst->dynamicStateCount = src->dynamicStateCount; - return VK_SUCCESS; + nir_tex_instr_remove_src(tex_instr, idx); + tex_instr->texture_index += offset; + return NIR_LOWER_INSTR_PROGRESS; } - -static VkResult -deep_copy_rasterization_state(void *mem_ctx, - VkPipelineRasterizationStateCreateInfo *dst, - const VkPipelineRasterizationStateCreateInfo *src) +static bool +lvp_nir_fixup_indirect_tex(nir_shader *shader) { - memcpy(dst, src, sizeof(VkPipelineRasterizationStateCreateInfo)); - dst->pNext = NULL; - - if (src->pNext) { - vk_foreach_struct(ext, src->pNext) { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT: { - VkPipelineRasterizationDepthClipStateCreateInfoEXT *ext_src = (VkPipelineRasterizationDepthClipStateCreateInfoEXT *)ext; - VkPipelineRasterizationDepthClipStateCreateInfoEXT *ext_dst = ralloc(mem_ctx, VkPipelineRasterizationDepthClipStateCreateInfoEXT); - ext_dst->sType = ext_src->sType; - ext_dst->flags = ext_src->flags; - ext_dst->depthClipEnable = ext_src->depthClipEnable; - dst->pNext = ext_dst; - break; - } - default: - break; - } - } - } - return VK_SUCCESS; + return nir_shader_lower_instructions(shader, find_tex, fixup_tex_instr, NULL); } -static VkResult -deep_copy_graphics_create_info(void *mem_ctx, - VkGraphicsPipelineCreateInfo *dst, - const VkGraphicsPipelineCreateInfo *src) +static void +optimize(nir_shader *nir) { - int i; - VkResult result; - VkPipelineShaderStageCreateInfo *stages; - VkPipelineVertexInputStateCreateInfo *vertex_input; - VkPipelineRasterizationStateCreateInfo *rasterization_state; - LVP_FROM_HANDLE(lvp_render_pass, pass, src->renderPass); - - dst->sType = src->sType; - dst->pNext = NULL; - dst->flags = src->flags; - dst->layout = src->layout; - dst->renderPass = src->renderPass; - dst->subpass = src->subpass; - dst->basePipelineHandle = src->basePipelineHandle; - dst->basePipelineIndex = src->basePipelineIndex; - - /* pStages */ - VkShaderStageFlags stages_present = 0; - dst->stageCount = src->stageCount; - stages = ralloc_array(mem_ctx, VkPipelineShaderStageCreateInfo, dst->stageCount); - for (i = 0 ; i < dst->stageCount; i++) { - result = deep_copy_shader_stage(mem_ctx, &stages[i], &src->pStages[i]); - if (result != VK_SUCCESS) - return result; - stages_present |= src->pStages[i].stage; - } - dst->pStages = stages; + bool progress = false; + do { + progress = false; - /* pVertexInputState */ - if (!dynamic_state_contains(src->pDynamicState, VK_DYNAMIC_STATE_VERTEX_INPUT_EXT)) { - vertex_input = ralloc(mem_ctx, VkPipelineVertexInputStateCreateInfo); - result = deep_copy_vertex_input_state(mem_ctx, vertex_input, - src->pVertexInputState); - if (result != VK_SUCCESS) - return result; - dst->pVertexInputState = vertex_input; - } else - dst->pVertexInputState = NULL; - - /* pInputAssemblyState */ - LVP_PIPELINE_DUP(dst->pInputAssemblyState, - src->pInputAssemblyState, - VkPipelineInputAssemblyStateCreateInfo, - 1); - - /* pTessellationState */ - if (src->pTessellationState && - (stages_present & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) == - (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) { - LVP_PIPELINE_DUP(dst->pTessellationState, - src->pTessellationState, - VkPipelineTessellationStateCreateInfo, - 1); - } + NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true); + NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp); + NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp); + NIR_PASS(progress, nir, nir_opt_deref); + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); - /* pViewportState */ - bool rasterization_disabled = !dynamic_state_contains(src->pDynamicState, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT) && - src->pRasterizationState->rasterizerDiscardEnable; - if (src->pViewportState && !rasterization_disabled) { - VkPipelineViewportStateCreateInfo *viewport_state; - viewport_state = ralloc(mem_ctx, VkPipelineViewportStateCreateInfo); - if (!viewport_state) - return VK_ERROR_OUT_OF_HOST_MEMORY; - deep_copy_viewport_state(mem_ctx, src->pDynamicState, - viewport_state, src->pViewportState); - dst->pViewportState = viewport_state; - } else - dst->pViewportState = NULL; - - /* pRasterizationState */ - rasterization_state = ralloc(mem_ctx, VkPipelineRasterizationStateCreateInfo); - if (!rasterization_state) - return VK_ERROR_OUT_OF_HOST_MEMORY; - deep_copy_rasterization_state(mem_ctx, rasterization_state, src->pRasterizationState); - dst->pRasterizationState = rasterization_state; - - /* pMultisampleState */ - if (src->pMultisampleState && !rasterization_disabled) { - VkPipelineMultisampleStateCreateInfo* ms_state; - ms_state = ralloc_size(mem_ctx, sizeof(VkPipelineMultisampleStateCreateInfo) + sizeof(VkSampleMask)); - if (!ms_state) - return VK_ERROR_OUT_OF_HOST_MEMORY; - /* does samplemask need deep copy? */ - memcpy(ms_state, src->pMultisampleState, sizeof(VkPipelineMultisampleStateCreateInfo)); - if (src->pMultisampleState->pSampleMask) { - VkSampleMask *sample_mask = (VkSampleMask *)(ms_state + 1); - sample_mask[0] = src->pMultisampleState->pSampleMask[0]; - ms_state->pSampleMask = sample_mask; - } - dst->pMultisampleState = ms_state; - } else - dst->pMultisampleState = NULL; - - /* pDepthStencilState */ - if (src->pDepthStencilState && !rasterization_disabled && pass->has_zs_attachment) { - LVP_PIPELINE_DUP(dst->pDepthStencilState, - src->pDepthStencilState, - VkPipelineDepthStencilStateCreateInfo, - 1); - } else - dst->pDepthStencilState = NULL; - - /* pColorBlendState */ - if (src->pColorBlendState && !rasterization_disabled && pass->has_color_attachment) { - VkPipelineColorBlendStateCreateInfo* cb_state; - - cb_state = ralloc(mem_ctx, VkPipelineColorBlendStateCreateInfo); - if (!cb_state) - return VK_ERROR_OUT_OF_HOST_MEMORY; - deep_copy_color_blend_state(mem_ctx, cb_state, src->pColorBlendState); - dst->pColorBlendState = cb_state; - } else - dst->pColorBlendState = NULL; - - if (src->pDynamicState) { - VkPipelineDynamicStateCreateInfo* dyn_state; - - /* pDynamicState */ - dyn_state = ralloc(mem_ctx, VkPipelineDynamicStateCreateInfo); - if (!dyn_state) - return VK_ERROR_OUT_OF_HOST_MEMORY; - deep_copy_dynamic_state(mem_ctx, dyn_state, src->pDynamicState); - dst->pDynamicState = dyn_state; - } else - dst->pDynamicState = NULL; + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); - return VK_SUCCESS; -} + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); -static VkResult -deep_copy_compute_create_info(void *mem_ctx, - VkComputePipelineCreateInfo *dst, - const VkComputePipelineCreateInfo *src) -{ - VkResult result; - dst->sType = src->sType; - dst->pNext = NULL; - dst->flags = src->flags; - dst->layout = src->layout; - dst->basePipelineHandle = src->basePipelineHandle; - dst->basePipelineIndex = src->basePipelineIndex; - - result = deep_copy_shader_stage(mem_ctx, &dst->stage, &src->stage); - if (result != VK_SUCCESS) - return result; - return VK_SUCCESS; -} + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_constant_folding); -static inline unsigned -st_shader_stage_to_ptarget(gl_shader_stage stage) -{ - switch (stage) { - case MESA_SHADER_VERTEX: - return PIPE_SHADER_VERTEX; - case MESA_SHADER_FRAGMENT: - return PIPE_SHADER_FRAGMENT; - case MESA_SHADER_GEOMETRY: - return PIPE_SHADER_GEOMETRY; - case MESA_SHADER_TESS_CTRL: - return PIPE_SHADER_TESS_CTRL; - case MESA_SHADER_TESS_EVAL: - return PIPE_SHADER_TESS_EVAL; - case MESA_SHADER_COMPUTE: - return PIPE_SHADER_COMPUTE; - default: - break; - } + NIR_PASS(progress, nir, nir_opt_remove_phis); + bool loop = false; + NIR_PASS(loop, nir, nir_opt_loop); + progress |= loop; + if (loop) { + /* If nir_opt_loop makes progress, then we need to clean + * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll + * to make progress. + */ + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_remove_phis); + } + NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false); + NIR_PASS(progress, nir, nir_opt_dead_cf); + NIR_PASS(progress, nir, nir_opt_conditional_discard); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_opt_cse); + NIR_PASS(progress, nir, nir_opt_undef); - assert(!"should not be reached"); - return PIPE_SHADER_VERTEX; + NIR_PASS(progress, nir, nir_opt_deref); + NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL); + NIR_PASS(progress, nir, nir_opt_loop_unroll); + NIR_PASS(progress, nir, lvp_nir_fixup_indirect_tex); + } while (progress); } -static void -shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) +void +lvp_shader_optimize(nir_shader *nir) { - assert(glsl_type_is_vector_or_scalar(type)); + optimize(nir); + NIR_PASS_V(nir, nir_lower_var_copies); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + NIR_PASS_V(nir, nir_opt_dce); + nir_sweep(nir); +} - uint32_t comp_size = glsl_type_is_boolean(type) - ? 4 : glsl_get_bit_size(type) / 8; - unsigned length = glsl_get_vector_elements(type); - *size = comp_size * length, - *align = comp_size; +struct lvp_pipeline_nir * +lvp_create_pipeline_nir(nir_shader *nir) +{ + struct lvp_pipeline_nir *pipeline_nir = ralloc(NULL, struct lvp_pipeline_nir); + pipeline_nir->nir = nir; + pipeline_nir->ref_cnt = 1; + return pipeline_nir; } -static void -lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, - struct vk_shader_module *module, - const char *entrypoint_name, - gl_shader_stage stage, - const VkSpecializationInfo *spec_info) +static VkResult +compile_spirv(struct lvp_device *pdevice, const VkPipelineShaderStageCreateInfo *sinfo, nir_shader **nir) { - nir_shader *nir; - const nir_shader_compiler_options *drv_options = pipeline->device->pscreen->get_compiler_options(pipeline->device->pscreen, PIPE_SHADER_IR_NIR, st_shader_stage_to_ptarget(stage)); - bool progress; - uint32_t *spirv = (uint32_t *) module->data; - assert(spirv[0] == SPIR_V_MAGIC_NUMBER); - assert(module->size % 4 == 0); + gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); + assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE); + VkResult result; - uint32_t num_spec_entries = 0; - struct nir_spirv_specialization *spec_entries = - vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries); +#ifdef VK_ENABLE_BETA_EXTENSIONS + const VkPipelineShaderStageNodeCreateInfoAMDX *node_info = vk_find_struct_const( + sinfo->pNext, PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX); +#endif - struct lvp_device *pdevice = pipeline->device; const struct spirv_to_nir_options spirv_options = { .environment = NIR_SPIRV_VULKAN, .caps = { @@ -483,6 +307,11 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, .int16 = true, .int64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_INT64) == 1), .tessellation = true, + .float_controls = true, + .float32_atomic_add = true, +#if LLVM_VERSION_MAJOR >= 15 + .float32_atomic_min_max = true, +#endif .image_ms_array = true, .image_read_without_format = true, .image_write_without_format = true, @@ -497,6 +326,7 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, .device_group = true, .draw_parameters = true, .shader_viewport_index_layer = true, + .shader_clock = true, .multiview = true, .physical_storage_buffer_address = true, .int64_atomics = true, @@ -504,59 +334,103 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, .subgroup_basic = true, .subgroup_ballot = true, .subgroup_quad = true, +#if LLVM_VERSION_MAJOR >= 10 + .subgroup_shuffle = true, +#endif .subgroup_vote = true, + .vk_memory_model = true, + .vk_memory_model_device_scope = true, .int8 = true, .float16 = true, + .demote_to_helper_invocation = true, + .mesh_shading = true, + .descriptor_array_dynamic_indexing = true, + .descriptor_array_non_uniform_indexing = true, + .descriptor_indexing = true, + .runtime_descriptor_array = true, + .shader_enqueue = true, + .ray_query = true, + .ray_cull_mask = true, + .ray_tracing = true, + .ray_tracing_position_fetch = true, }, - .ubo_addr_format = nir_address_format_32bit_index_offset, - .ssbo_addr_format = nir_address_format_32bit_index_offset, + .ubo_addr_format = nir_address_format_vec2_index_32bit_offset, + .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset, .phys_ssbo_addr_format = nir_address_format_64bit_global, .push_const_addr_format = nir_address_format_logical, .shared_addr_format = nir_address_format_32bit_offset, - .frag_coord_is_sysval = false, + .constant_addr_format = nir_address_format_64bit_global, +#ifdef VK_ENABLE_BETA_EXTENSIONS + .shader_index = node_info ? node_info->index : 0, +#endif }; - nir = spirv_to_nir(spirv, module->size / 4, - spec_entries, num_spec_entries, - stage, entrypoint_name, &spirv_options, drv_options); + result = vk_pipeline_shader_stage_to_nir(&pdevice->vk, sinfo, + &spirv_options, pdevice->physical_device->drv_options[stage], + NULL, nir); + return result; +} - if (!nir) { - free(spec_entries); - return; +static bool +inline_variant_equals(const void *a, const void *b) +{ + const struct lvp_inline_variant *av = a, *bv = b; + assert(av->mask == bv->mask); + u_foreach_bit(slot, av->mask) { + if (memcmp(av->vals[slot], bv->vals[slot], sizeof(av->vals[slot]))) + return false; } - nir_validate_shader(nir, NULL); + return true; +} - free(spec_entries); +static const struct vk_ycbcr_conversion_state * +lvp_ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32_t array_index) +{ + const struct lvp_pipeline_layout *layout = data; - NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); - NIR_PASS_V(nir, nir_lower_returns); - NIR_PASS_V(nir, nir_inline_functions); - NIR_PASS_V(nir, nir_copy_prop); - NIR_PASS_V(nir, nir_opt_deref); + const struct lvp_descriptor_set_layout *set_layout = container_of(layout->vk.set_layouts[set], struct lvp_descriptor_set_layout, vk); + const struct lvp_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding]; + if (!binding_layout->immutable_samplers) + return NULL; - /* Pick off the single entrypoint that we want */ - foreach_list_typed_safe(nir_function, func, node, &nir->functions) { - if (!func->is_entrypoint) - exec_node_remove(&func->node); - } - assert(exec_list_length(&nir->functions) == 1); + struct vk_ycbcr_conversion *ycbcr_conversion = binding_layout->immutable_samplers[array_index]->vk.ycbcr_conversion; + return ycbcr_conversion ? &ycbcr_conversion->state : NULL; +} - NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); - NIR_PASS_V(nir, nir_split_var_copies); - NIR_PASS_V(nir, nir_split_per_member_structs); +/* pipeline is NULL for shader objects. */ +static void +lvp_shader_lower(struct lvp_device *pdevice, struct lvp_pipeline *pipeline, nir_shader *nir, struct lvp_pipeline_layout *layout) +{ + if (nir->info.stage != MESA_SHADER_TESS_CTRL) + NIR_PASS_V(nir, remove_barriers, nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_MESH || nir->info.stage == MESA_SHADER_TASK); - NIR_PASS_V(nir, nir_remove_dead_variables, - nir_var_shader_in | nir_var_shader_out | nir_var_system_value, NULL); + const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { + .frag_coord = true, + .point_coord = true, + }; + NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); + + struct nir_lower_subgroups_options subgroup_opts = {0}; + subgroup_opts.lower_quad = true; + subgroup_opts.ballot_components = 1; + subgroup_opts.ballot_bit_size = 32; + subgroup_opts.lower_inverse_ballot = true; + NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts); - if (stage == MESA_SHADER_FRAGMENT) + if (nir->info.stage == MESA_SHADER_FRAGMENT) lvp_lower_input_attachments(nir, false); NIR_PASS_V(nir, nir_lower_system_values); - NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); + NIR_PASS_V(nir, nir_lower_is_helper_invocation); + NIR_PASS_V(nir, lower_demote); - NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); - NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, NULL); + const struct nir_lower_compute_system_values_options compute_system_values = {0}; + NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_system_values); - lvp_lower_pipeline_layout(pipeline->device, pipeline->layout, nir); + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_uniform | nir_var_image, NULL); + + optimize(nir); + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true); NIR_PASS_V(nir, nir_split_var_copies); @@ -567,17 +441,39 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo | nir_var_mem_ssbo, - nir_address_format_32bit_index_offset); + nir_address_format_vec2_index_32bit_offset); NIR_PASS_V(nir, nir_lower_explicit_io, - nir_var_mem_global, + nir_var_mem_global | nir_var_mem_constant, nir_address_format_64bit_global); - if (nir->info.stage == MESA_SHADER_COMPUTE) { + if (nir->info.stage == MESA_SHADER_COMPUTE) + lvp_lower_exec_graph(pipeline, nir); + + NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lvp_ycbcr_conversion_lookup, layout); + + nir_lower_non_uniform_access_options options = { + .types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access, + }; + NIR_PASS(_, nir, nir_lower_non_uniform_access, &options); + + lvp_lower_pipeline_layout(pdevice, layout, nir); + + NIR_PASS(_, nir, lvp_nir_lower_ray_queries); + + if (nir->info.stage == MESA_SHADER_COMPUTE || + nir->info.stage == MESA_SHADER_TASK || + nir->info.stage == MESA_SHADER_MESH) { NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); } + if (nir->info.stage == MESA_SHADER_TASK || + nir->info.stage == MESA_SHADER_MESH) { + NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_task_payload, shared_var_info); + NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_task_payload, nir_address_format_32bit_offset); + } + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); if (nir->info.stage == MESA_SHADER_VERTEX || @@ -587,52 +483,23 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true); } - do { - progress = false; - - NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true); - NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp); - NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp); - NIR_PASS(progress, nir, nir_opt_deref); - NIR_PASS(progress, nir, nir_lower_vars_to_ssa); - - NIR_PASS(progress, nir, nir_copy_prop); - NIR_PASS(progress, nir, nir_opt_dce); - NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); - - NIR_PASS(progress, nir, nir_opt_algebraic); - NIR_PASS(progress, nir, nir_opt_constant_folding); - - NIR_PASS(progress, nir, nir_opt_remove_phis); - bool trivial_continues = false; - NIR_PASS(trivial_continues, nir, nir_opt_trivial_continues); - progress |= trivial_continues; - if (trivial_continues) { - /* If nir_opt_trivial_continues makes progress, then we need to clean - * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll - * to make progress. - */ - NIR_PASS(progress, nir, nir_copy_prop); - NIR_PASS(progress, nir, nir_opt_dce); - NIR_PASS(progress, nir, nir_opt_remove_phis); - } - NIR_PASS(progress, nir, nir_opt_if, true); - NIR_PASS(progress, nir, nir_opt_dead_cf); - NIR_PASS(progress, nir, nir_opt_conditional_discard); - NIR_PASS(progress, nir, nir_opt_remove_phis); - NIR_PASS(progress, nir, nir_opt_cse); - NIR_PASS(progress, nir, nir_opt_undef); - - NIR_PASS(progress, nir, nir_opt_deref); - NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL); - } while (progress); + // TODO: also optimize the tex srcs. see radeonSI for reference */ + /* Skip if there are potentially conflicting rounding modes */ + struct nir_opt_16bit_tex_image_options opt_16bit_options = { + .rounding_mode = nir_rounding_mode_undef, + .opt_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int, + }; + NIR_PASS_V(nir, nir_opt_16bit_tex_image, &opt_16bit_options); - NIR_PASS_V(nir, nir_lower_var_copies); - NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); - NIR_PASS_V(nir, nir_opt_dce); - nir_sweep(nir); + /* Lower texture OPs llvmpipe supports to reduce the amount of sample + * functions that need to be pre-compiled. + */ + const nir_lower_tex_options tex_options = { + .lower_txd = true, + }; + NIR_PASS(_, nir, nir_lower_tex, &tex_options); - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + lvp_shader_optimize(nir); if (nir->info.stage != MESA_SHADER_VERTEX) nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage); @@ -644,13 +511,43 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, } nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage); - pipeline->pipeline_nir[stage] = nir; } -static void fill_shader_prog(struct pipe_shader_state *state, gl_shader_stage stage, struct lvp_pipeline *pipeline) +VkResult +lvp_spirv_to_nir(struct lvp_pipeline *pipeline, const VkPipelineShaderStageCreateInfo *sinfo, + nir_shader **out_nir) +{ + VkResult result = compile_spirv(pipeline->device, sinfo, out_nir); + if (result == VK_SUCCESS) + lvp_shader_lower(pipeline->device, pipeline, *out_nir, pipeline->layout); + + return result; +} + +void +lvp_shader_init(struct lvp_shader *shader, nir_shader *nir) +{ + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + if (impl->ssa_alloc > 100) //skip for small shaders + shader->inlines.must_inline = lvp_find_inlinable_uniforms(shader, nir); + shader->pipeline_nir = lvp_create_pipeline_nir(nir); + if (shader->inlines.can_inline) + _mesa_set_init(&shader->inlines.variants, NULL, NULL, inline_variant_equals); +} + +static VkResult +lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, + const VkPipelineShaderStageCreateInfo *sinfo) { - state->type = PIPE_SHADER_IR_NIR; - state->ir.nir = pipeline->pipeline_nir[stage]; + gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); + assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE); + nir_shader *nir; + VkResult result = lvp_spirv_to_nir(pipeline, sinfo, &nir); + if (result == VK_SUCCESS) { + struct lvp_shader *shader = &pipeline->shaders[stage]; + lvp_shader_init(shader, nir); + } + return result; } static void @@ -684,108 +581,228 @@ merge_tess_info(struct shader_info *tes_info, tcs_info->tess.spacing == tes_info->tess.spacing); tes_info->tess.spacing |= tcs_info->tess.spacing; - assert(tcs_info->tess.primitive_mode == 0 || - tes_info->tess.primitive_mode == 0 || - tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode); - tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode; + assert(tcs_info->tess._primitive_mode == 0 || + tes_info->tess._primitive_mode == 0 || + tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode); + tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode; tes_info->tess.ccw |= tcs_info->tess.ccw; tes_info->tess.point_mode |= tcs_info->tess.point_mode; } -static gl_shader_stage -lvp_shader_stage(VkShaderStageFlagBits stage) +static void +lvp_shader_xfb_init(struct lvp_shader *shader) { - switch (stage) { - case VK_SHADER_STAGE_VERTEX_BIT: - return MESA_SHADER_VERTEX; - case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: - return MESA_SHADER_TESS_CTRL; - case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: - return MESA_SHADER_TESS_EVAL; - case VK_SHADER_STAGE_GEOMETRY_BIT: - return MESA_SHADER_GEOMETRY; - case VK_SHADER_STAGE_FRAGMENT_BIT: - return MESA_SHADER_FRAGMENT; - case VK_SHADER_STAGE_COMPUTE_BIT: - return MESA_SHADER_COMPUTE; - default: - unreachable("invalid VkShaderStageFlagBits"); - return MESA_SHADER_NONE; + nir_xfb_info *xfb_info = shader->pipeline_nir->nir->xfb_info; + if (xfb_info) { + uint8_t output_mapping[VARYING_SLOT_TESS_MAX]; + memset(output_mapping, 0, sizeof(output_mapping)); + + nir_foreach_shader_out_variable(var, shader->pipeline_nir->nir) { + unsigned slots = nir_variable_count_slots(var, var->type); + for (unsigned i = 0; i < slots; i++) + output_mapping[var->data.location + i] = var->data.driver_location + i; + } + + shader->stream_output.num_outputs = xfb_info->output_count; + for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + if (xfb_info->buffers_written & (1 << i)) { + shader->stream_output.stride[i] = xfb_info->buffers[i].stride / 4; + } + } + for (unsigned i = 0; i < xfb_info->output_count; i++) { + shader->stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer; + shader->stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4; + shader->stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location]; + shader->stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask); + shader->stream_output.output[i].start_component = xfb_info->outputs[i].component_offset; + shader->stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer]; + } + } } -static VkResult -lvp_pipeline_compile(struct lvp_pipeline *pipeline, - gl_shader_stage stage) +static void +lvp_pipeline_xfb_init(struct lvp_pipeline *pipeline) +{ + gl_shader_stage stage = MESA_SHADER_VERTEX; + if (pipeline->shaders[MESA_SHADER_GEOMETRY].pipeline_nir) + stage = MESA_SHADER_GEOMETRY; + else if (pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir) + stage = MESA_SHADER_TESS_EVAL; + else if (pipeline->shaders[MESA_SHADER_MESH].pipeline_nir) + stage = MESA_SHADER_MESH; + pipeline->last_vertex = stage; + lvp_shader_xfb_init(&pipeline->shaders[stage]); +} + +static void * +lvp_shader_compile_stage(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir) { - struct lvp_device *device = pipeline->device; - device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, pipeline->pipeline_nir[stage]); - if (stage == MESA_SHADER_COMPUTE) { + if (nir->info.stage == MESA_SHADER_COMPUTE) { struct pipe_compute_state shstate = {0}; - shstate.prog = (void *)pipeline->pipeline_nir[MESA_SHADER_COMPUTE]; + shstate.prog = nir; shstate.ir_type = PIPE_SHADER_IR_NIR; - shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.shared_size; - pipeline->shader_cso[PIPE_SHADER_COMPUTE] = device->queue.ctx->create_compute_state(device->queue.ctx, &shstate); + shstate.static_shared_mem = nir->info.shared_size; + return device->queue.ctx->create_compute_state(device->queue.ctx, &shstate); } else { struct pipe_shader_state shstate = {0}; - fill_shader_prog(&shstate, stage, pipeline); - - if (stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_GEOMETRY || - stage == MESA_SHADER_TESS_EVAL) { - nir_xfb_info *xfb_info = nir_gather_xfb_info(pipeline->pipeline_nir[stage], NULL); - if (xfb_info) { - uint8_t output_mapping[VARYING_SLOT_TESS_MAX]; - memset(output_mapping, 0, sizeof(output_mapping)); - - nir_foreach_shader_out_variable(var, pipeline->pipeline_nir[stage]) { - unsigned slots = var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4) - : glsl_count_attribute_slots(var->type, false); - for (unsigned i = 0; i < slots; i++) - output_mapping[var->data.location + i] = var->data.driver_location + i; - } - - shstate.stream_output.num_outputs = xfb_info->output_count; - for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { - if (xfb_info->buffers_written & (1 << i)) { - shstate.stream_output.stride[i] = xfb_info->buffers[i].stride / 4; - } - } - for (unsigned i = 0; i < xfb_info->output_count; i++) { - shstate.stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer; - shstate.stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4; - shstate.stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location]; - shstate.stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask); - shstate.stream_output.output[i].start_component = ffs(xfb_info->outputs[i].component_mask) - 1; - shstate.stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer]; - } - - ralloc_free(xfb_info); - } - } + shstate.type = PIPE_SHADER_IR_NIR; + shstate.ir.nir = nir; + memcpy(&shstate.stream_output, &shader->stream_output, sizeof(shstate.stream_output)); - switch (stage) { + switch (nir->info.stage) { case MESA_SHADER_FRAGMENT: - pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate); - break; + return device->queue.ctx->create_fs_state(device->queue.ctx, &shstate); case MESA_SHADER_VERTEX: - pipeline->shader_cso[PIPE_SHADER_VERTEX] = device->queue.ctx->create_vs_state(device->queue.ctx, &shstate); - break; + return device->queue.ctx->create_vs_state(device->queue.ctx, &shstate); case MESA_SHADER_GEOMETRY: - pipeline->shader_cso[PIPE_SHADER_GEOMETRY] = device->queue.ctx->create_gs_state(device->queue.ctx, &shstate); - break; + return device->queue.ctx->create_gs_state(device->queue.ctx, &shstate); case MESA_SHADER_TESS_CTRL: - pipeline->shader_cso[PIPE_SHADER_TESS_CTRL] = device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate); - break; + return device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate); case MESA_SHADER_TESS_EVAL: - pipeline->shader_cso[PIPE_SHADER_TESS_EVAL] = device->queue.ctx->create_tes_state(device->queue.ctx, &shstate); - break; + return device->queue.ctx->create_tes_state(device->queue.ctx, &shstate); + case MESA_SHADER_TASK: + return device->queue.ctx->create_ts_state(device->queue.ctx, &shstate); + case MESA_SHADER_MESH: + return device->queue.ctx->create_ms_state(device->queue.ctx, &shstate); default: unreachable("illegal shader"); break; } } - return VK_SUCCESS; + return NULL; +} + +void * +lvp_shader_compile(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir, bool locked) +{ + device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, nir); + + if (!locked) + simple_mtx_lock(&device->queue.lock); + + void *state = lvp_shader_compile_stage(device, shader, nir); + + if (!locked) + simple_mtx_unlock(&device->queue.lock); + + return state; +} + +#ifndef NDEBUG +static bool +layouts_equal(const struct lvp_descriptor_set_layout *a, const struct lvp_descriptor_set_layout *b) +{ + const uint8_t *pa = (const uint8_t*)a, *pb = (const uint8_t*)b; + uint32_t hash_start_offset = sizeof(struct vk_descriptor_set_layout); + uint32_t binding_offset = offsetof(struct lvp_descriptor_set_layout, binding); + /* base equal */ + if (memcmp(pa + hash_start_offset, pb + hash_start_offset, binding_offset - hash_start_offset)) + return false; + + /* bindings equal */ + if (a->binding_count != b->binding_count) + return false; + size_t binding_size = a->binding_count * sizeof(struct lvp_descriptor_set_binding_layout); + const struct lvp_descriptor_set_binding_layout *la = a->binding; + const struct lvp_descriptor_set_binding_layout *lb = b->binding; + if (memcmp(la, lb, binding_size)) { + for (unsigned i = 0; i < a->binding_count; i++) { + if (memcmp(&la[i], &lb[i], offsetof(struct lvp_descriptor_set_binding_layout, immutable_samplers))) + return false; + } + } + + /* immutable sampler equal */ + if (a->immutable_sampler_count != b->immutable_sampler_count) + return false; + if (a->immutable_sampler_count) { + size_t sampler_size = a->immutable_sampler_count * sizeof(struct lvp_sampler *); + if (memcmp(pa + binding_offset + binding_size, pb + binding_offset + binding_size, sampler_size)) { + struct lvp_sampler **sa = (struct lvp_sampler **)(pa + binding_offset); + struct lvp_sampler **sb = (struct lvp_sampler **)(pb + binding_offset); + for (unsigned i = 0; i < a->immutable_sampler_count; i++) { + if (memcmp(sa[i], sb[i], sizeof(struct lvp_sampler))) + return false; + } + } + } + return true; +} +#endif + +static void +merge_layouts(struct vk_device *device, struct lvp_pipeline *dst, struct lvp_pipeline_layout *src) +{ + if (!src) + return; + if (dst->layout) { + /* these must match */ + ASSERTED VkPipelineCreateFlags src_flag = src->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT; + ASSERTED VkPipelineCreateFlags dst_flag = dst->layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT; + assert(src_flag == dst_flag); + } + /* always try to reuse existing layout: independent sets bit doesn't guarantee independent sets */ + if (!dst->layout) { + dst->layout = (struct lvp_pipeline_layout*)vk_pipeline_layout_ref(&src->vk); + return; + } + /* this is a big optimization when hit */ + if (dst->layout == src) + return; +#ifndef NDEBUG + /* verify that layouts match */ + const struct lvp_pipeline_layout *smaller = dst->layout->vk.set_count < src->vk.set_count ? dst->layout : src; + const struct lvp_pipeline_layout *bigger = smaller == dst->layout ? src : dst->layout; + for (unsigned i = 0; i < smaller->vk.set_count; i++) { + if (!smaller->vk.set_layouts[i] || !bigger->vk.set_layouts[i] || + smaller->vk.set_layouts[i] == bigger->vk.set_layouts[i]) + continue; + + const struct lvp_descriptor_set_layout *smaller_set_layout = + vk_to_lvp_descriptor_set_layout(smaller->vk.set_layouts[i]); + const struct lvp_descriptor_set_layout *bigger_set_layout = + vk_to_lvp_descriptor_set_layout(bigger->vk.set_layouts[i]); + + assert(!smaller_set_layout->binding_count || + !bigger_set_layout->binding_count || + layouts_equal(smaller_set_layout, bigger_set_layout)); + } +#endif + /* must be independent sets with different layouts: reallocate to avoid modifying original layout */ + struct lvp_pipeline_layout *old_layout = dst->layout; + dst->layout = vk_zalloc(&device->alloc, sizeof(struct lvp_pipeline_layout), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + memcpy(dst->layout, old_layout, sizeof(struct lvp_pipeline_layout)); + dst->layout->vk.ref_cnt = 1; + for (unsigned i = 0; i < dst->layout->vk.set_count; i++) { + if (dst->layout->vk.set_layouts[i]) + vk_descriptor_set_layout_ref(dst->layout->vk.set_layouts[i]); + } + vk_pipeline_layout_unref(device, &old_layout->vk); + + for (unsigned i = 0; i < src->vk.set_count; i++) { + if (!dst->layout->vk.set_layouts[i]) { + dst->layout->vk.set_layouts[i] = src->vk.set_layouts[i]; + if (dst->layout->vk.set_layouts[i]) + vk_descriptor_set_layout_ref(src->vk.set_layouts[i]); + } + } + dst->layout->vk.set_count = MAX2(dst->layout->vk.set_count, + src->vk.set_count); + dst->layout->push_constant_size += src->push_constant_size; + dst->layout->push_constant_stages |= src->push_constant_stages; +} + +static void +copy_shader_sanitized(struct lvp_shader *dst, const struct lvp_shader *src) +{ + *dst = *src; + dst->pipeline_nir = NULL; //this gets handled later + dst->tess_ccw = NULL; //this gets handled later + assert(!dst->shader_cso); + assert(!dst->tess_ccw_cso); + if (src->inlines.can_inline) + _mesa_set_init(&dst->inlines.variants, NULL, NULL, inline_variant_equals); } static VkResult @@ -793,111 +810,186 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, struct lvp_device *device, struct lvp_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc) + VkPipelineCreateFlagBits2KHR flags) { - if (alloc == NULL) - alloc = &device->vk.alloc; - pipeline->device = device; - pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout); - pipeline->force_min_sample = false; + pipeline->type = LVP_PIPELINE_GRAPHICS; - pipeline->mem_ctx = ralloc_context(NULL); - /* recreate createinfo */ - deep_copy_graphics_create_info(pipeline->mem_ctx, &pipeline->graphics_create_info, pCreateInfo); - pipeline->is_compute_pipeline = false; - - const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_state = - vk_find_struct_const(pCreateInfo->pRasterizationState, - PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT); - pipeline->provoking_vertex_last = pv_state && pv_state->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; - - const VkPipelineRasterizationLineStateCreateInfoEXT *line_state = - vk_find_struct_const(pCreateInfo->pRasterizationState, - PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); - if (line_state) { - /* always draw bresenham if !smooth */ - pipeline->line_stipple_enable = line_state->stippledLineEnable; - pipeline->line_smooth = line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; - pipeline->disable_multisample = line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT || - line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; - pipeline->line_rectangular = line_state->lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; - if (pipeline->line_stipple_enable) { - if (!dynamic_state_contains(pipeline->graphics_create_info.pDynamicState, VK_DYNAMIC_STATE_LINE_STIPPLE_EXT)) { - pipeline->line_stipple_factor = line_state->lineStippleFactor - 1; - pipeline->line_stipple_pattern = line_state->lineStipplePattern; - } else { - pipeline->line_stipple_factor = 0; - pipeline->line_stipple_pattern = UINT16_MAX; - } + VkResult result; + + const VkGraphicsPipelineLibraryCreateInfoEXT *libinfo = vk_find_struct_const(pCreateInfo, + GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT); + const VkPipelineLibraryCreateInfoKHR *libstate = vk_find_struct_const(pCreateInfo, + PIPELINE_LIBRARY_CREATE_INFO_KHR); + const VkGraphicsPipelineLibraryFlagsEXT layout_stages = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT; + if (libinfo) + pipeline->stages = libinfo->flags; + else if (!libstate) + pipeline->stages = VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT; + + if (flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) + pipeline->library = true; + + struct lvp_pipeline_layout *layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout); + + if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT)) + /* this is a regular pipeline with no partials: directly reuse */ + pipeline->layout = layout ? (void*)vk_pipeline_layout_ref(&layout->vk) : NULL; + else if (pipeline->stages & layout_stages) { + if ((pipeline->stages & layout_stages) == layout_stages) + /* this has all the layout stages: directly reuse */ + pipeline->layout = (void*)vk_pipeline_layout_ref(&layout->vk); + else { + /* this is a partial: copy for later merging to avoid modifying another layout */ + merge_layouts(&device->vk, pipeline, layout); } - } else - pipeline->line_rectangular = true; - - if (!dynamic_state_contains(pipeline->graphics_create_info.pDynamicState, VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT)) { - const VkPipelineColorWriteCreateInfoEXT *cw_state = - vk_find_struct_const(pCreateInfo->pColorBlendState, PIPELINE_COLOR_WRITE_CREATE_INFO_EXT); - if (cw_state) { - for (unsigned i = 0; i < cw_state->attachmentCount; i++) - if (!cw_state->pColorWriteEnables[i]) { - VkPipelineColorBlendAttachmentState *att = (void*)&pipeline->graphics_create_info.pColorBlendState->pAttachments[i]; - att->colorWriteMask = 0; + } + + if (libstate) { + for (unsigned i = 0; i < libstate->libraryCount; i++) { + LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]); + vk_graphics_pipeline_state_merge(&pipeline->graphics_state, + &p->graphics_state); + if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { + pipeline->line_smooth = p->line_smooth; + pipeline->disable_multisample = p->disable_multisample; + pipeline->line_rectangular = p->line_rectangular; + memcpy(pipeline->shaders, p->shaders, sizeof(struct lvp_shader) * 4); + memcpy(&pipeline->shaders[MESA_SHADER_TASK], &p->shaders[MESA_SHADER_TASK], sizeof(struct lvp_shader) * 2); + lvp_forall_gfx_stage(i) { + if (i == MESA_SHADER_FRAGMENT) + continue; + copy_shader_sanitized(&pipeline->shaders[i], &p->shaders[i]); } + } + if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { + pipeline->force_min_sample = p->force_min_sample; + copy_shader_sanitized(&pipeline->shaders[MESA_SHADER_FRAGMENT], &p->shaders[MESA_SHADER_FRAGMENT]); + } + if (p->stages & layout_stages) { + if (!layout || (layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT)) + merge_layouts(&device->vk, pipeline, p->layout); + } + pipeline->stages |= p->stages; } } + result = vk_graphics_pipeline_state_fill(&device->vk, + &pipeline->graphics_state, + pCreateInfo, NULL, 0, NULL, NULL, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, + &pipeline->state_data); + if (result != VK_SUCCESS) + return result; + + assert(pipeline->library || pipeline->stages & (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)); + + pipeline->device = device; for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - VK_FROM_HANDLE(vk_shader_module, module, - pCreateInfo->pStages[i].module); - gl_shader_stage stage = lvp_shader_stage(pCreateInfo->pStages[i].stage); - lvp_shader_compile_to_ir(pipeline, module, - pCreateInfo->pStages[i].pName, - stage, - pCreateInfo->pStages[i].pSpecializationInfo); - if (!pipeline->pipeline_nir[stage]) - return VK_ERROR_FEATURE_NOT_PRESENT; - } + const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; + gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); + if (stage == MESA_SHADER_FRAGMENT) { + if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) + continue; + } else { + if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT)) + continue; + } + result = lvp_shader_compile_to_ir(pipeline, sinfo); + if (result != VK_SUCCESS) + goto fail; - if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]) { - if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_qualifier || - BITSET_TEST(pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || - BITSET_TEST(pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS)) - pipeline->force_min_sample = true; + switch (stage) { + case MESA_SHADER_FRAGMENT: + if (pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir->nir->info.fs.uses_sample_shading) + pipeline->force_min_sample = true; + break; + default: break; + } + } + if (pCreateInfo->stageCount && pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir) { + nir_lower_patch_vertices(pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir, pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info.tess.tcs_vertices_out, NULL); + merge_tess_info(&pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info, &pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info); + if (BITSET_TEST(pipeline->graphics_state.dynamic, + MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) { + pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw = lvp_create_pipeline_nir(nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir)); + pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw; + } else if (pipeline->graphics_state.ts && + pipeline->graphics_state.ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) { + pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw; + } } - if (pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]) { - nir_lower_patch_vertices(pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL], pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL); - merge_tess_info(&pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info, &pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info); - const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state = - vk_find_struct_const(pCreateInfo->pTessellationState, - PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO); - if (!domain_origin_state || domain_origin_state->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) - pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw = !pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw; + if (libstate) { + for (unsigned i = 0; i < libstate->libraryCount; i++) { + LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]); + if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { + if (p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir) + lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir, p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir); + } + if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { + lvp_forall_gfx_stage(j) { + if (j == MESA_SHADER_FRAGMENT) + continue; + if (p->shaders[j].pipeline_nir) + lvp_pipeline_nir_ref(&pipeline->shaders[j].pipeline_nir, p->shaders[j].pipeline_nir); + } + if (p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw) + lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw, p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw); + } + } + } else if (pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { + const struct vk_rasterization_state *rs = pipeline->graphics_state.rs; + if (rs) { + /* always draw bresenham if !smooth */ + pipeline->line_smooth = rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR; + pipeline->disable_multisample = rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR || + rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR; + pipeline->line_rectangular = rs->line.mode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR; + } else + pipeline->line_rectangular = true; + lvp_pipeline_xfb_init(pipeline); } + if (!libstate && !pipeline->library) + lvp_pipeline_shaders_compile(pipeline, false); - pipeline->gs_output_lines = pipeline->pipeline_nir[MESA_SHADER_GEOMETRY] && - pipeline->pipeline_nir[MESA_SHADER_GEOMETRY]->info.gs.output_primitive == GL_LINES; - + return VK_SUCCESS; - bool has_fragment_shader = false; - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - gl_shader_stage stage = lvp_shader_stage(pCreateInfo->pStages[i].stage); - lvp_pipeline_compile(pipeline, stage); - if (stage == MESA_SHADER_FRAGMENT) - has_fragment_shader = true; +fail: + for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) { + lvp_pipeline_nir_ref(&pipeline->shaders[i].pipeline_nir, NULL); } + vk_free(&device->vk.alloc, pipeline->state_data); - if (has_fragment_shader == false) { - /* create a dummy fragment shader for this pipeline. */ - nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, - "dummy_frag"); + return result; +} - pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = b.shader; - struct pipe_shader_state shstate = {0}; - shstate.type = PIPE_SHADER_IR_NIR; - shstate.ir.nir = pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]; - pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate); +void +lvp_pipeline_shaders_compile(struct lvp_pipeline *pipeline, bool locked) +{ + if (pipeline->compiled) + return; + for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) { + if (!pipeline->shaders[i].pipeline_nir) + continue; + + gl_shader_stage stage = i; + assert(stage == pipeline->shaders[i].pipeline_nir->nir->info.stage); + + if (!pipeline->shaders[stage].inlines.can_inline) { + pipeline->shaders[stage].shader_cso = lvp_shader_compile(pipeline->device, &pipeline->shaders[stage], + nir_shader_clone(NULL, pipeline->shaders[stage].pipeline_nir->nir), locked); + if (pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw) + pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw_cso = lvp_shader_compile(pipeline->device, &pipeline->shaders[stage], + nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir), locked); + } } - return VK_SUCCESS; + pipeline->compiled = true; } static VkResult @@ -905,8 +997,9 @@ lvp_graphics_pipeline_create( VkDevice _device, VkPipelineCache _cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipeline) + VkPipelineCreateFlagBits2KHR flags, + VkPipeline *pPipeline, + bool group) { LVP_FROM_HANDLE(lvp_device, device, _device); LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache); @@ -915,19 +1008,50 @@ lvp_graphics_pipeline_create( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, + size_t size = 0; + const VkGraphicsPipelineShaderGroupsCreateInfoNV *groupinfo = vk_find_struct_const(pCreateInfo, GRAPHICS_PIPELINE_SHADER_GROUPS_CREATE_INFO_NV); + if (!group && groupinfo) + size += (groupinfo->groupCount + groupinfo->pipelineCount) * sizeof(VkPipeline); + + pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); - result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo, - pAllocator); + uint64_t t0 = os_time_get_nano(); + result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo, flags); if (result != VK_SUCCESS) { - vk_free2(&device->vk.alloc, pAllocator, pipeline); + vk_free(&device->vk.alloc, pipeline); return result; } + if (!group && groupinfo) { + VkGraphicsPipelineCreateInfo pci = *pCreateInfo; + for (unsigned i = 0; i < groupinfo->groupCount; i++) { + const VkGraphicsShaderGroupCreateInfoNV *g = &groupinfo->pGroups[i]; + pci.pVertexInputState = g->pVertexInputState; + pci.pTessellationState = g->pTessellationState; + pci.pStages = g->pStages; + pci.stageCount = g->stageCount; + result = lvp_graphics_pipeline_create(_device, _cache, &pci, flags, &pipeline->groups[i], true); + if (result != VK_SUCCESS) { + lvp_pipeline_destroy(device, pipeline, false); + return result; + } + pipeline->num_groups++; + } + for (unsigned i = 0; i < groupinfo->pipelineCount; i++) + pipeline->groups[pipeline->num_groups + i] = groupinfo->pPipelines[i]; + pipeline->num_groups_total = groupinfo->groupCount + groupinfo->pipelineCount; + } + + VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); + if (feedback && !group) { + feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0; + feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; + memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount); + } *pPipeline = lvp_pipeline_to_handle(pipeline); @@ -946,16 +1070,27 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines( unsigned i = 0; for (; i < count; i++) { - VkResult r; - r = lvp_graphics_pipeline_create(_device, - pipelineCache, - &pCreateInfos[i], - pAllocator, &pPipelines[i]); + VkResult r = VK_PIPELINE_COMPILE_REQUIRED; + VkPipelineCreateFlagBits2KHR flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]); + + if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)) + r = lvp_graphics_pipeline_create(_device, + pipelineCache, + &pCreateInfos[i], + flags, + &pPipelines[i], + false); if (r != VK_SUCCESS) { result = r; pPipelines[i] = VK_NULL_HANDLE; + if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR) + break; } } + if (result != VK_SUCCESS) { + for (; i < count; i++) + pPipelines[i] = VK_NULL_HANDLE; + } return result; } @@ -964,29 +1099,23 @@ static VkResult lvp_compute_pipeline_init(struct lvp_pipeline *pipeline, struct lvp_device *device, struct lvp_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc) + const VkComputePipelineCreateInfo *pCreateInfo) { - VK_FROM_HANDLE(vk_shader_module, module, - pCreateInfo->stage.module); - if (alloc == NULL) - alloc = &device->vk.alloc; pipeline->device = device; pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout); + vk_pipeline_layout_ref(&pipeline->layout->vk); pipeline->force_min_sample = false; - pipeline->mem_ctx = ralloc_context(NULL); - deep_copy_compute_create_info(pipeline->mem_ctx, - &pipeline->compute_create_info, pCreateInfo); - pipeline->is_compute_pipeline = true; - - lvp_shader_compile_to_ir(pipeline, module, - pCreateInfo->stage.pName, - MESA_SHADER_COMPUTE, - pCreateInfo->stage.pSpecializationInfo); - if (!pipeline->pipeline_nir[MESA_SHADER_COMPUTE]) - return VK_ERROR_FEATURE_NOT_PRESENT; - lvp_pipeline_compile(pipeline, MESA_SHADER_COMPUTE); + pipeline->type = LVP_PIPELINE_COMPUTE; + + VkResult result = lvp_shader_compile_to_ir(pipeline, &pCreateInfo->stage); + if (result != VK_SUCCESS) + return result; + + struct lvp_shader *shader = &pipeline->shaders[MESA_SHADER_COMPUTE]; + if (!shader->inlines.can_inline) + shader->shader_cso = lvp_shader_compile(pipeline->device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir), false); + pipeline->compiled = true; return VK_SUCCESS; } @@ -995,7 +1124,7 @@ lvp_compute_pipeline_create( VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, + VkPipelineCreateFlagBits2KHR flags, VkPipeline *pPipeline) { LVP_FROM_HANDLE(lvp_device, device, _device); @@ -1005,20 +1134,27 @@ lvp_compute_pipeline_create( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); - pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, + pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); - result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo, - pAllocator); + uint64_t t0 = os_time_get_nano(); + result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo); if (result != VK_SUCCESS) { - vk_free2(&device->vk.alloc, pAllocator, pipeline); + vk_free(&device->vk.alloc, pipeline); return result; } + const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); + if (feedback) { + feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0; + feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; + memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount); + } + *pPipeline = lvp_pipeline_to_handle(pipeline); return VK_SUCCESS; @@ -1036,16 +1172,360 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines( unsigned i = 0; for (; i < count; i++) { - VkResult r; - r = lvp_compute_pipeline_create(_device, - pipelineCache, - &pCreateInfos[i], - pAllocator, &pPipelines[i]); + VkResult r = VK_PIPELINE_COMPILE_REQUIRED; + VkPipelineCreateFlagBits2KHR flags = vk_compute_pipeline_create_flags(&pCreateInfos[i]); + + if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)) + r = lvp_compute_pipeline_create(_device, + pipelineCache, + &pCreateInfos[i], + flags, + &pPipelines[i]); + if (r != VK_SUCCESS) { + result = r; + pPipelines[i] = VK_NULL_HANDLE; + if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR) + break; + } + } + if (result != VK_SUCCESS) { + for (; i < count; i++) + pPipelines[i] = VK_NULL_HANDLE; + } + + + return result; +} + +VKAPI_ATTR void VKAPI_CALL lvp_DestroyShaderEXT( + VkDevice _device, + VkShaderEXT _shader, + const VkAllocationCallbacks* pAllocator) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + LVP_FROM_HANDLE(lvp_shader, shader, _shader); + + if (!shader) + return; + shader_destroy(device, shader, false); + + vk_pipeline_layout_unref(&device->vk, &shader->layout->vk); + blob_finish(&shader->blob); + vk_object_base_finish(&shader->base); + vk_free2(&device->vk.alloc, pAllocator, shader); +} + +static VkShaderEXT +create_shader_object(struct lvp_device *device, const VkShaderCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator) +{ + nir_shader *nir = NULL; + gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage); + assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE); + if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT) { + VkShaderModuleCreateInfo minfo = { + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + NULL, + 0, + pCreateInfo->codeSize, + pCreateInfo->pCode, + }; + VkPipelineShaderStageCreateFlagBits flags = 0; + if (pCreateInfo->flags & VK_SHADER_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) + flags |= VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT; + if (pCreateInfo->flags & VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) + flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT; + VkPipelineShaderStageCreateInfo sinfo = { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + &minfo, + flags, + pCreateInfo->stage, + VK_NULL_HANDLE, + pCreateInfo->pName, + pCreateInfo->pSpecializationInfo, + }; + VkResult result = compile_spirv(device, &sinfo, &nir); + if (result != VK_SUCCESS) + goto fail; + nir->info.separate_shader = true; + } else { + assert(pCreateInfo->codeType == VK_SHADER_CODE_TYPE_BINARY_EXT); + if (pCreateInfo->codeSize < SHA1_DIGEST_LENGTH + VK_UUID_SIZE + 1) + return VK_NULL_HANDLE; + struct blob_reader blob; + const uint8_t *data = pCreateInfo->pCode; + uint8_t uuid[VK_UUID_SIZE]; + lvp_device_get_cache_uuid(uuid); + if (memcmp(uuid, data, VK_UUID_SIZE)) + return VK_NULL_HANDLE; + size_t size = pCreateInfo->codeSize - SHA1_DIGEST_LENGTH - VK_UUID_SIZE; + unsigned char sha1[20]; + + struct mesa_sha1 sctx; + _mesa_sha1_init(&sctx); + _mesa_sha1_update(&sctx, data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, size); + _mesa_sha1_final(&sctx, sha1); + if (memcmp(sha1, data + VK_UUID_SIZE, SHA1_DIGEST_LENGTH)) + return VK_NULL_HANDLE; + + blob_reader_init(&blob, data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, size); + nir = nir_deserialize(NULL, device->pscreen->get_compiler_options(device->pscreen, PIPE_SHADER_IR_NIR, stage), &blob); + if (!nir) + goto fail; + } + if (!nir_shader_get_entrypoint(nir)) + goto fail; + struct lvp_shader *shader = vk_object_zalloc(&device->vk, pAllocator, sizeof(struct lvp_shader), VK_OBJECT_TYPE_SHADER_EXT); + if (!shader) + goto fail; + blob_init(&shader->blob); + VkPipelineLayoutCreateInfo pci = { + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + NULL, + 0, + pCreateInfo->setLayoutCount, + pCreateInfo->pSetLayouts, + pCreateInfo->pushConstantRangeCount, + pCreateInfo->pPushConstantRanges, + }; + shader->layout = lvp_pipeline_layout_create(device, &pci, pAllocator); + + if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT) + lvp_shader_lower(device, NULL, nir, shader->layout); + + lvp_shader_init(shader, nir); + + lvp_shader_xfb_init(shader); + if (stage == MESA_SHADER_TESS_EVAL) { + /* spec requires that all tess modes are set in both shaders */ + nir_lower_patch_vertices(shader->pipeline_nir->nir, shader->pipeline_nir->nir->info.tess.tcs_vertices_out, NULL); + shader->tess_ccw = lvp_create_pipeline_nir(nir_shader_clone(NULL, shader->pipeline_nir->nir)); + shader->tess_ccw->nir->info.tess.ccw = !shader->pipeline_nir->nir->info.tess.ccw; + shader->tess_ccw_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, shader->tess_ccw->nir), false); + } else if (stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) { + /* this is (currently) illegal */ + assert(!nir->info.fs.uses_fbfetch_output); + shader_destroy(device, shader, false); + + vk_object_base_finish(&shader->base); + vk_free2(&device->vk.alloc, pAllocator, shader); + return VK_NULL_HANDLE; + } + nir_serialize(&shader->blob, nir, true); + shader->shader_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, nir), false); + return lvp_shader_to_handle(shader); +fail: + ralloc_free(nir); + return VK_NULL_HANDLE; +} + +VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateShadersEXT( + VkDevice _device, + uint32_t createInfoCount, + const VkShaderCreateInfoEXT* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkShaderEXT* pShaders) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + unsigned i; + for (i = 0; i < createInfoCount; i++) { + pShaders[i] = create_shader_object(device, &pCreateInfos[i], pAllocator); + if (!pShaders[i]) { + if (pCreateInfos[i].codeType == VK_SHADER_CODE_TYPE_BINARY_EXT) { + if (i < createInfoCount - 1) + memset(&pShaders[i + 1], 0, (createInfoCount - i - 1) * sizeof(VkShaderEXT)); + return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + } + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + } + return VK_SUCCESS; +} + + +VKAPI_ATTR VkResult VKAPI_CALL lvp_GetShaderBinaryDataEXT( + VkDevice device, + VkShaderEXT _shader, + size_t* pDataSize, + void* pData) +{ + LVP_FROM_HANDLE(lvp_shader, shader, _shader); + VkResult ret = VK_SUCCESS; + if (pData) { + if (*pDataSize < shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE) { + ret = VK_INCOMPLETE; + *pDataSize = 0; + } else { + *pDataSize = MIN2(*pDataSize, shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE); + uint8_t *data = pData; + lvp_device_get_cache_uuid(data); + struct mesa_sha1 sctx; + _mesa_sha1_init(&sctx); + _mesa_sha1_update(&sctx, shader->blob.data, shader->blob.size); + _mesa_sha1_final(&sctx, data + VK_UUID_SIZE); + memcpy(data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, shader->blob.data, shader->blob.size); + } + } else { + *pDataSize = shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE; + } + return ret; +} + +#ifdef VK_ENABLE_BETA_EXTENSIONS +static VkResult +lvp_exec_graph_pipeline_create(VkDevice _device, VkPipelineCache _cache, + const VkExecutionGraphPipelineCreateInfoAMDX *create_info, + VkPipelineCreateFlagBits2KHR flags, + VkPipeline *out_pipeline) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + struct lvp_pipeline *pipeline; + VkResult result; + + assert(create_info->sType == VK_STRUCTURE_TYPE_EXECUTION_GRAPH_PIPELINE_CREATE_INFO_AMDX); + + uint32_t stage_count = create_info->stageCount; + if (create_info->pLibraryInfo) { + for (uint32_t i = 0; i < create_info->pLibraryInfo->libraryCount; i++) { + VK_FROM_HANDLE(lvp_pipeline, library, create_info->pLibraryInfo->pLibraries[i]); + stage_count += library->num_groups; + } + } + + pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + stage_count * sizeof(VkPipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pipeline) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &pipeline->base, + VK_OBJECT_TYPE_PIPELINE); + + uint64_t t0 = os_time_get_nano(); + + pipeline->type = LVP_PIPELINE_EXEC_GRAPH; + pipeline->layout = lvp_pipeline_layout_from_handle(create_info->layout); + + pipeline->exec_graph.scratch_size = 0; + pipeline->num_groups = stage_count; + + uint32_t stage_index = 0; + for (uint32_t i = 0; i < create_info->stageCount; i++) { + const VkPipelineShaderStageNodeCreateInfoAMDX *node_info = vk_find_struct_const( + create_info->pStages[i].pNext, PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX); + + VkComputePipelineCreateInfo stage_create_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .flags = create_info->flags, + .stage = create_info->pStages[i], + .layout = create_info->layout, + }; + + result = lvp_compute_pipeline_create(_device, _cache, &stage_create_info, flags, &pipeline->groups[i]); + if (result != VK_SUCCESS) + goto fail; + + VK_FROM_HANDLE(lvp_pipeline, stage, pipeline->groups[i]); + nir_shader *nir = stage->shaders[MESA_SHADER_COMPUTE].pipeline_nir->nir; + + if (node_info) { + stage->exec_graph.name = node_info->pName; + stage->exec_graph.index = node_info->index; + } + + /* TODO: Add a shader info NIR pass to figure out how many the payloads the shader creates. */ + stage->exec_graph.scratch_size = nir->info.cs.node_payloads_size * 256; + pipeline->exec_graph.scratch_size = MAX2(pipeline->exec_graph.scratch_size, stage->exec_graph.scratch_size); + + stage_index++; + } + + if (create_info->pLibraryInfo) { + for (uint32_t i = 0; i < create_info->pLibraryInfo->libraryCount; i++) { + VK_FROM_HANDLE(lvp_pipeline, library, create_info->pLibraryInfo->pLibraries[i]); + for (uint32_t j = 0; j < library->num_groups; j++) { + /* TODO: Do we need reference counting? */ + pipeline->groups[stage_index] = library->groups[j]; + stage_index++; + } + pipeline->exec_graph.scratch_size = MAX2(pipeline->exec_graph.scratch_size, library->exec_graph.scratch_size); + } + } + + const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(create_info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); + if (feedback) { + feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0; + feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; + memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount); + } + + *out_pipeline = lvp_pipeline_to_handle(pipeline); + + return VK_SUCCESS; + +fail: + for (uint32_t i = 0; i < stage_count; i++) + lvp_DestroyPipeline(_device, pipeline->groups[i], NULL); + + vk_free(&device->vk.alloc, pipeline); + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +lvp_CreateExecutionGraphPipelinesAMDX(VkDevice device, VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkExecutionGraphPipelineCreateInfoAMDX *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VkResult result = VK_SUCCESS; + uint32_t i = 0; + + for (; i < createInfoCount; i++) { + VkPipelineCreateFlagBits2KHR flags = vk_graph_pipeline_create_flags(&pCreateInfos[i]); + + VkResult r = VK_PIPELINE_COMPILE_REQUIRED; + if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)) + r = lvp_exec_graph_pipeline_create(device, pipelineCache, &pCreateInfos[i], flags, &pPipelines[i]); if (r != VK_SUCCESS) { result = r; pPipelines[i] = VK_NULL_HANDLE; + if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR) + break; } } + if (result != VK_SUCCESS) { + for (; i < createInfoCount; i++) + pPipelines[i] = VK_NULL_HANDLE; + } return result; } + +VKAPI_ATTR VkResult VKAPI_CALL +lvp_GetExecutionGraphPipelineScratchSizeAMDX(VkDevice device, VkPipeline executionGraph, + VkExecutionGraphPipelineScratchSizeAMDX *pSizeInfo) +{ + VK_FROM_HANDLE(lvp_pipeline, pipeline, executionGraph); + pSizeInfo->size = MAX2(pipeline->exec_graph.scratch_size * 32, 16); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +lvp_GetExecutionGraphPipelineNodeIndexAMDX(VkDevice device, VkPipeline executionGraph, + const VkPipelineShaderStageNodeCreateInfoAMDX *pNodeInfo, + uint32_t *pNodeIndex) +{ + VK_FROM_HANDLE(lvp_pipeline, pipeline, executionGraph); + + for (uint32_t i = 0; i < pipeline->num_groups; i++) { + VK_FROM_HANDLE(lvp_pipeline, stage, pipeline->groups[i]); + if (stage->exec_graph.index == pNodeInfo->index && + !strcmp(stage->exec_graph.name, pNodeInfo->pName)) { + *pNodeIndex = i; + return VK_SUCCESS; + } + } + + return VK_ERROR_OUT_OF_HOST_MEMORY; +} +#endif |