diff options
author | Alejandro PiƱeiro <apinheiro@igalia.com> | 2020-01-20 15:29:38 +0100 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2020-10-13 21:21:27 +0000 |
commit | 9afd24f89a6ad9be51ff9a943bb10af61408c4df (patch) | |
tree | 2b588e0b730c815a7e1b4ae87bcd29bdb8730e08 | |
parent | 7d6fbea536eb24be59008fe218864750a97905e9 (diff) |
v3dv: initial descriptor set support
Focused on getting the basic UBO and SSBO cases implemented. So no
dynamic offset, push contanst, samplers, and so on.
This include a initial implementation for CreatedescriptorPool,
CreateDescriptorSetLayout, AllocateDescriptorSets,
UpdateDescriptorSets, CreatePipelineLayout, and CmdBindDescriptorSets.
Also introduces lowering vulkan intrinsics. For now just
vulkan_resource_index.
We also introduce a descriptor_map, in this case for the ubos and
ssbos, used to assign a index for each set/binding combination, that
would be used when filling back the details of the ubo or ssbo on
other places (like QUNIFORM_UBO_ADDR or QUNIFORM_SSBO_OFFSET).
Note that at this point we don't need a bo for the descriptor pool, so
descriptor sets are not getting a piece of it. That would likely
change as we start to support more descriptor set types.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
-rw-r--r-- | src/broadcom/vulkan/v3dv_cl.h | 10 | ||||
-rw-r--r-- | src/broadcom/vulkan/v3dv_cmd_buffer.c | 32 | ||||
-rw-r--r-- | src/broadcom/vulkan/v3dv_descriptor_set.c | 443 | ||||
-rw-r--r-- | src/broadcom/vulkan/v3dv_device.c | 6 | ||||
-rw-r--r-- | src/broadcom/vulkan/v3dv_pipeline.c | 141 | ||||
-rw-r--r-- | src/broadcom/vulkan/v3dv_private.h | 95 | ||||
-rw-r--r-- | src/broadcom/vulkan/v3dv_uniforms.c | 56 |
7 files changed, 776 insertions, 7 deletions
diff --git a/src/broadcom/vulkan/v3dv_cl.h b/src/broadcom/vulkan/v3dv_cl.h index c0b0e380786..03d328c2f58 100644 --- a/src/broadcom/vulkan/v3dv_cl.h +++ b/src/broadcom/vulkan/v3dv_cl.h @@ -119,6 +119,16 @@ cl_aligned_f(struct v3dv_cl_out **cl, float f) cl_aligned_u32(cl, fui(f)); } +static inline void +cl_aligned_reloc(struct v3dv_cl *cl, + struct v3dv_cl_out **cl_out, + struct v3dv_bo *bo, + uint32_t offset) +{ + cl_aligned_u32(cl_out, bo->offset + offset); + v3dv_job_add_bo(cl->job, bo); +} + uint32_t v3dv_cl_ensure_space(struct v3dv_cl *cl, uint32_t space, uint32_t alignment); void v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space); diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index b46733e937b..2353908135c 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -422,6 +422,7 @@ cmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer) state->framebuffer = NULL; state->subpass_idx = 0; state->job = NULL; + state->descriptor_state.valid = 0; cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_INITIALIZED; } @@ -1985,8 +1986,9 @@ cmd_buffer_draw(struct v3dv_cmd_buffer *cmd_buffer, uint32_t *dirty = &cmd_buffer->state.dirty; struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; - /* vertex buffer state is emitted as part of the shader state record */ - if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_VERTEX_BUFFER)) { + if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | + V3DV_CMD_DIRTY_VERTEX_BUFFER | + V3DV_CMD_DIRTY_DESCRIPTOR_SETS)) { emit_graphics_pipeline(cmd_buffer); } @@ -2118,3 +2120,29 @@ v3dv_CmdSetStencilReference(VkCommandBuffer commandBuffer, cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_REFERENCE; } + +void +v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets) +{ + V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS); + assert(firstSet + descriptorSetCount <= MAX_SETS); + + for (uint32_t i = 0; i < descriptorSetCount; i++) { + V3DV_FROM_HANDLE(v3dv_descriptor_set, set, pDescriptorSets[i]); + uint32_t index = firstSet + i; + + cmd_buffer->state.descriptor_state.descriptors[index] = set; + cmd_buffer->state.descriptor_state.valid |= (1u << index); + } + + cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DESCRIPTOR_SETS; +} diff --git a/src/broadcom/vulkan/v3dv_descriptor_set.c b/src/broadcom/vulkan/v3dv_descriptor_set.c index d640412a498..42eb505ff6d 100644 --- a/src/broadcom/vulkan/v3dv_descriptor_set.c +++ b/src/broadcom/vulkan/v3dv_descriptor_set.c @@ -25,13 +25,39 @@ #include "v3dv_private.h" +/* + * As anv and tu already points: + * + * "Pipeline layouts. These have nothing to do with the pipeline. They are + * just multiple descriptor set layouts pasted together." + */ + VkResult v3dv_CreatePipelineLayout(VkDevice _device, const VkPipelineLayoutCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout) { - /* FIXME: stub*/ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + struct v3dv_pipeline_layout *layout; + + assert(pCreateInfo->sType == + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + layout = vk_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (layout == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->num_sets = pCreateInfo->setLayoutCount; + + for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { + V3DV_FROM_HANDLE(v3dv_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[set]); + layout->set[set].layout = set_layout; + } + + *pPipelineLayout = v3dv_pipeline_layout_to_handle(layout); return VK_SUCCESS; } @@ -41,5 +67,418 @@ v3dv_DestroyPipelineLayout(VkDevice _device, VkPipelineLayout _pipelineLayout, const VkAllocationCallbacks *pAllocator) { - /* FIXME: stub */ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_pipeline_layout, pipeline_layout, _pipelineLayout); + + if (!pipeline_layout) + return; + vk_free2(&device->alloc, pAllocator, pipeline_layout); +} + +VkResult +v3dv_CreateDescriptorPool(VkDevice _device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *pDescriptorPool) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + struct v3dv_descriptor_pool *pool; + uint64_t size = sizeof(struct v3dv_descriptor_pool); + uint32_t descriptor_count = 0; + + for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) { + if (pCreateInfo->pPoolSizes[i].type != VK_DESCRIPTOR_TYPE_SAMPLER) + descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount; + + switch(pCreateInfo->pPoolSizes[i].type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + break; + default: + unreachable("Unimplemented descriptor type"); + break; + } + } + + if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) { + uint64_t host_size = + pCreateInfo->maxSets * sizeof(struct v3dv_descriptor_set); + host_size += sizeof(struct v3dv_descriptor) * descriptor_count; + size += host_size; + } else { + size += sizeof(struct v3dv_descriptor_pool_entry) * pCreateInfo->maxSets; + } + + pool = vk_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!pool) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(pool, 0, sizeof(*pool)); + + if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) { + pool->host_memory_base = (uint8_t*)pool + sizeof(struct v3dv_descriptor_pool); + pool->host_memory_ptr = pool->host_memory_base; + pool->host_memory_end = (uint8_t*)pool + size; + } + + pool->max_entry_count = pCreateInfo->maxSets; + + *pDescriptorPool = v3dv_descriptor_pool_to_handle(pool); + + return VK_SUCCESS; +} + +static void +descriptor_set_destroy(struct v3dv_device *device, + struct v3dv_descriptor_pool *pool, + struct v3dv_descriptor_set *set) +{ + assert(!pool->host_memory_base); + + for (uint32_t i = 0; i < pool->entry_count; i++) { + if (pool->entries[i].set == set) { + memmove(&pool->entries[i], &pool->entries[i+1], + sizeof(pool->entries[i]) * (pool->entry_count - i - 1)); + --pool->entry_count; + break; + } + } + vk_free2(&device->alloc, NULL, set); +} + +void +v3dv_DestroyDescriptorPool(VkDevice _device, + VkDescriptorPool _pool, + const VkAllocationCallbacks *pAllocator) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_descriptor_pool, pool, _pool); + + if (!pool) + return; + + if (!pool->host_memory_base) { + for(int i = 0; i < pool->entry_count; ++i) { + descriptor_set_destroy(device, pool, pool->entries[i].set); + } + } + + vk_free2(&device->alloc, pAllocator, pool); +} + +VkResult +v3dv_ResetDescriptorPool(VkDevice _device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_descriptor_pool, pool, descriptorPool); + + if (!pool->host_memory_base) { + for(int i = 0; i < pool->entry_count; ++i) { + descriptor_set_destroy(device, pool, pool->entries[i].set); + } + } + + pool->entry_count = 0; + pool->host_memory_ptr = pool->host_memory_base; + + return VK_SUCCESS; +} + +static int +binding_compare(const void *av, const void *bv) +{ + const VkDescriptorSetLayoutBinding *a = + (const VkDescriptorSetLayoutBinding *) av; + const VkDescriptorSetLayoutBinding *b = + (const VkDescriptorSetLayoutBinding *) bv; + + return (a->binding < b->binding) ? -1 : (a->binding > b->binding) ? 1 : 0; +} + +static VkDescriptorSetLayoutBinding * +create_sorted_bindings(const VkDescriptorSetLayoutBinding *bindings, + unsigned count, + struct v3dv_device *device, + const VkAllocationCallbacks *pAllocator) +{ + VkDescriptorSetLayoutBinding *sorted_bindings = + vk_alloc2(&device->alloc, pAllocator, + count * sizeof(VkDescriptorSetLayoutBinding), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!sorted_bindings) + return NULL; + + memcpy(sorted_bindings, bindings, + count * sizeof(VkDescriptorSetLayoutBinding)); + + qsort(sorted_bindings, count, sizeof(VkDescriptorSetLayoutBinding), + binding_compare); + + return sorted_bindings; +} + +VkResult +v3dv_CreateDescriptorSetLayout(VkDevice _device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *pSetLayout) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + struct v3dv_descriptor_set_layout *set_layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + uint32_t max_binding = 0; + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); + } + + uint32_t size = sizeof(struct v3dv_descriptor_set_layout) + + (max_binding + 1) * sizeof(set_layout->binding[0]); + + set_layout = vk_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!set_layout) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkDescriptorSetLayoutBinding *bindings = + create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, + device, pAllocator); + + if (!bindings) { + vk_free2(&device->alloc, pAllocator, set_layout); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memset(set_layout->binding, 0, + size - sizeof(struct v3dv_descriptor_set_layout)); + + set_layout->binding_count = max_binding + 1; + set_layout->flags = pCreateInfo->flags; + set_layout->shader_stages = 0; + + uint32_t descriptor_count = 0; + + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { + const VkDescriptorSetLayoutBinding *binding = bindings + i; + uint32_t binding_number = binding->binding; + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + break; + default: + unreachable("Unknown descriptor type\n"); + break; + } + + set_layout->binding[binding_number].type = binding->descriptorType; + set_layout->binding[binding_number].array_size = binding->descriptorCount; + set_layout->binding[binding_number].descriptor_index = descriptor_count; + + descriptor_count += binding->descriptorCount; + + /* FIXME: right now we don't use shader_stages. We could explore if we + * could use it to add another filter to upload or allocate the + * descriptor data. + */ + set_layout->shader_stages |= binding->stageFlags; + } + + vk_free2(&device->alloc, pAllocator, bindings); + + set_layout->descriptor_count = descriptor_count; + + *pSetLayout = v3dv_descriptor_set_layout_to_handle(set_layout); + + return VK_SUCCESS; +} + +void +v3dv_DestroyDescriptorSetLayout(VkDevice _device, + VkDescriptorSetLayout _set_layout, + const VkAllocationCallbacks *pAllocator) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_descriptor_set_layout, set_layout, _set_layout); + + if (!set_layout) + return; + + vk_free2(&device->alloc, pAllocator, set_layout); +} + +static VkResult +descriptor_set_create(struct v3dv_device *device, + struct v3dv_descriptor_pool *pool, + const struct v3dv_descriptor_set_layout *layout, + struct v3dv_descriptor_set **out_set) +{ + struct v3dv_descriptor_set *set; + uint32_t descriptor_count = layout->descriptor_count; + unsigned range_offset = sizeof(struct v3dv_descriptor_set) + + sizeof(struct v3dv_descriptor) * descriptor_count; + unsigned mem_size = range_offset; + + if (pool->host_memory_base) { + if (pool->host_memory_end - pool->host_memory_ptr < mem_size) + return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY); + + set = (struct v3dv_descriptor_set*)pool->host_memory_ptr; + pool->host_memory_ptr += mem_size; + } else { + set = vk_alloc2(&device->alloc, NULL, mem_size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!set) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memset(set, 0, mem_size); + set->pool = pool; + + set->layout = layout; + + if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) { + vk_free2(&device->alloc, NULL, set); + return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY); + } + + if (!pool->host_memory_base) { + pool->entries[pool->entry_count].set = set; + pool->entry_count++; + } + + *out_set = set; + + return VK_SUCCESS; +} + +VkResult +v3dv_AllocateDescriptorSets(VkDevice _device, + const VkDescriptorSetAllocateInfo *pAllocateInfo, + VkDescriptorSet *pDescriptorSets) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_descriptor_pool, pool, pAllocateInfo->descriptorPool); + + VkResult result = VK_SUCCESS; + struct v3dv_descriptor_set *set = NULL; + uint32_t i = 0; + + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + V3DV_FROM_HANDLE(v3dv_descriptor_set_layout, layout, + pAllocateInfo->pSetLayouts[i]); + + result = descriptor_set_create(device, pool, layout, &set); + if (result != VK_SUCCESS) + break; + + pDescriptorSets[i] = v3dv_descriptor_set_to_handle(set); + } + + if (result != VK_SUCCESS) { + v3dv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, + i, pDescriptorSets); + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + pDescriptorSets[i] = VK_NULL_HANDLE; + } + } + + return result; +} + +VkResult +v3dv_FreeDescriptorSets(VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet *pDescriptorSets) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_descriptor_pool, pool, descriptorPool); + + for (uint32_t i = 0; i < count; i++) { + V3DV_FROM_HANDLE(v3dv_descriptor_set, set, pDescriptorSets[i]); + + if (set && !pool->host_memory_base) + descriptor_set_destroy(device, pool, set); + } + + return VK_SUCCESS; +} + +void +v3dv_UpdateDescriptorSets(VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies) +{ + for (uint32_t i = 0; i < descriptorWriteCount; i++) { + const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i]; + V3DV_FROM_HANDLE(v3dv_descriptor_set, set, writeset->dstSet); + + const struct v3dv_descriptor_set_binding_layout *binding_layout = + set->layout->binding + writeset->dstBinding; + + struct v3dv_descriptor *descriptor = set->descriptors; + + descriptor += binding_layout->descriptor_index; + descriptor += writeset->dstArrayElement; + for (uint32_t j = 0; j < writeset->descriptorCount; ++j) { + + switch(writeset->descriptorType) { + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + const VkDescriptorBufferInfo *buffer_info = writeset->pBufferInfo + j; + V3DV_FROM_HANDLE(v3dv_buffer, buffer, buffer_info->buffer); + + descriptor->bo = buffer->mem->bo; + descriptor->offset = buffer_info->offset; + break; + } + default: + unreachable("unimplemented descriptor type"); + break; + } + descriptor++; + } + } + + for (uint32_t i = 0; i < descriptorCopyCount; i++) { + const VkCopyDescriptorSet *copyset = &pDescriptorCopies[i]; + V3DV_FROM_HANDLE(v3dv_descriptor_set, src_set, + copyset->srcSet); + V3DV_FROM_HANDLE(v3dv_descriptor_set, dst_set, + copyset->dstSet); + + const struct v3dv_descriptor_set_binding_layout *src_binding_layout = + src_set->layout->binding + copyset->srcBinding; + const struct v3dv_descriptor_set_binding_layout *dst_binding_layout = + dst_set->layout->binding + copyset->dstBinding; + + assert(src_binding_layout->type == dst_binding_layout->type); + + struct v3dv_descriptor *src_descriptor = src_set->descriptors; + struct v3dv_descriptor *dst_descriptor = dst_set->descriptors; + + src_descriptor += src_binding_layout->descriptor_index; + src_descriptor += copyset->srcArrayElement; + + dst_descriptor += dst_binding_layout->descriptor_index; + dst_descriptor += copyset->dstArrayElement; + + for (uint32_t j = 0; j < copyset->descriptorCount; j++) { + *dst_descriptor = *src_descriptor; + dst_descriptor++; + src_descriptor++; + } + } } diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 110ebc494fe..44a3a967727 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -559,8 +559,8 @@ v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, .textureCompressionBC = false, .occlusionQueryPrecise = false, .pipelineStatisticsQuery = false, - .vertexPipelineStoresAndAtomics = false, - .fragmentStoresAndAtomics = false, + .vertexPipelineStoresAndAtomics = true, + .fragmentStoresAndAtomics = true, .shaderTessellationAndGeometryPointSize = false, .shaderImageGatherExtended = false, .shaderStorageImageExtendedFormats = false, @@ -649,7 +649,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .maxSamplerAllocationCount = 64 * 1024, .bufferImageGranularity = 256, /* A cache line */ .sparseAddressSpaceSize = 0, - .maxBoundDescriptorSets = 16, + .maxBoundDescriptorSets = MAX_SETS, .maxPerStageDescriptorSamplers = max_samplers, .maxPerStageDescriptorUniformBuffers = max_uniform_buffers, .maxPerStageDescriptorStorageBuffers = max_storage_buffers, diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index ac0430cd576..1cf666f3889 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -30,6 +30,8 @@ #include "common/v3d_debug.h" +#include "compiler/nir/nir_builder.h" + #include "vulkan/util/vk_format.h" #include "broadcom/cle/v3dx_pack.h" @@ -261,6 +263,130 @@ type_size_vec4(const struct glsl_type *type, bool bindless) return glsl_count_attribute_slots(type, false); } +static unsigned +descriptor_map_add(struct v3dv_descriptor_map *map, + int set, + int binding, + int value, + int array_size) +{ + unsigned index = 0; + for (unsigned i = 0; i < map->num; i++) { + if (set == map->set[i] && binding == map->binding[i]) { + assert(value == map->value[i]); + assert(array_size == map->array_size[i]); + return index; + } + index += map->array_size[i]; + } + + assert(index == map->num_desc); + + map->set[map->num] = set; + map->binding[map->num] = binding; + map->value[map->num] = value; + map->array_size[map->num] = array_size; + map->num++; + map->num_desc += array_size; + + return index; +} + +/* Gathers info from the intrinsic (set and binding) and then lowers it so it + * could be used by the v3d_compiler */ +static bool +lower_vulkan_resource_index(nir_builder *b, + nir_intrinsic_instr *instr, + struct v3dv_pipeline *pipeline, + const struct v3dv_pipeline_layout *layout) +{ + if (instr->intrinsic != nir_intrinsic_vulkan_resource_index) + return false; + + nir_const_value *const_val = nir_src_as_const_value(instr->src[0]); + + unsigned set = nir_intrinsic_desc_set(instr); + unsigned binding = nir_intrinsic_binding(instr); + struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct v3dv_descriptor_set_binding_layout *binding_layout = + &set_layout->binding[binding]; + unsigned index = 0; + + switch (nir_intrinsic_desc_type(instr)) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { + struct v3dv_descriptor_map *descriptor_map = + nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ? + &pipeline->ubo_map : &pipeline->ssbo_map; + + if (!const_val) + unreachable("non-constant vulkan_resource_index array index"); + + /* Note: although for ubos we should skip index 0 which is used for push + * constants, that is already took into account when loading the ubo at + * nir_to_vir, so we don't need to do it here again. + */ + index = descriptor_map_add(descriptor_map, set, binding, 0, + binding_layout->array_size); + index += const_val->u32; + break; + } + + default: + unreachable("unsupported desc_type for vulkan_resource_index"); + break; + } + + nir_ssa_def_rewrite_uses(&instr->dest.ssa, + nir_src_for_ssa(nir_imm_int(b, index))); + nir_instr_remove(&instr->instr); + + return true; +} + +static bool +lower_impl(nir_function_impl *impl, + struct v3dv_pipeline *pipeline, + const struct v3dv_pipeline_layout *layout) +{ + nir_builder b; + nir_builder_init(&b, impl); + bool progress = false; + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + b.cursor = nir_before_instr(instr); + switch (instr->type) { + case nir_instr_type_intrinsic: + progress |= + lower_vulkan_resource_index(&b, nir_instr_as_intrinsic(instr), + pipeline, layout); + break; + default: + break; + } + } + } + + return progress; +} + +static bool +lower_pipeline_layout_info(nir_shader *shader, + struct v3dv_pipeline *pipeline, + const struct v3dv_pipeline_layout *layout) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl) + progress |= lower_impl(function->impl, pipeline, layout); + } + + return progress; +} + + static void lower_fs_inputs(nir_shader *nir) { @@ -742,6 +868,17 @@ link_shaders(nir_shader *producer, nir_shader *consumer) } } +static void +pipeline_lower_nir(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_stage *p_stage, + struct v3dv_pipeline_layout *layout) +{ + nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir)); + + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ + NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout); +} + static VkResult pipeline_compile_graphics(struct v3dv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, @@ -801,6 +938,8 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, next_stage = stages[stage]; } + V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout); + /* Compiling to vir */ for (int stage = MESA_SHADER_STAGES - 1; stage >= 0; stage--) { if (stages[stage] == NULL || stages[stage]->entrypoint == NULL) @@ -808,6 +947,8 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, struct v3dv_pipeline_stage *p_stage = stages[stage]; + pipeline_lower_nir(pipeline, p_stage, layout); + switch(stage) { case MESA_SHADER_VERTEX: /* Right now we only support pipelines with both vertex and fragment diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index d6cf671f6b8..677bf34d2f3 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -98,6 +98,10 @@ pack_emit_reloc(void *cl, const void *reloc) {} #define v3dv_assert(x) #endif +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; __dword &= ~(1 << (b))) + #define typed_memcpy(dest, src, count) ({ \ STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \ memcpy((dest), (src), (count) * sizeof(*(src))); \ @@ -115,6 +119,8 @@ pack_emit_reloc(void *cl, const void *reloc) {} #define MAX_VBS 16 #define MAX_VERTEX_ATTRIBS 16 +#define MAX_SETS 16 + struct v3dv_instance; #ifdef USE_V3D_SIMULATOR @@ -450,8 +456,10 @@ enum v3dv_cmd_dirty_bits { V3DV_CMD_DIRTY_STENCIL_REFERENCE = 1 << 4, V3DV_CMD_DIRTY_PIPELINE = 1 << 5, V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 6, + V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 7, }; + struct v3dv_dynamic_state { /** * Bitmask of (1 << VK_DYNAMIC_STATE_*). @@ -527,6 +535,11 @@ struct v3dv_vertex_binding { VkDeviceSize offset; }; +struct v3dv_descriptor_state { + struct v3dv_descriptor_set *descriptors[MAX_SETS]; + uint32_t valid; +}; + struct v3dv_cmd_buffer_state { const struct v3dv_render_pass *pass; const struct v3dv_framebuffer *framebuffer; @@ -538,6 +551,7 @@ struct v3dv_cmd_buffer_state { uint32_t subpass_idx; struct v3dv_pipeline *pipeline; + struct v3dv_descriptor_state descriptor_state; struct v3dv_dynamic_state dynamic; uint32_t dirty; @@ -669,6 +683,80 @@ struct vpm_config { uint32_t gs_width; }; +struct v3dv_descriptor_pool_entry +{ + struct v3dv_descriptor_set *set; +}; + +struct v3dv_descriptor_pool { + uint8_t *host_memory_base; + uint8_t *host_memory_ptr; + uint8_t *host_memory_end; + + uint32_t entry_count; + uint32_t max_entry_count; + struct v3dv_descriptor_pool_entry entries[0]; +}; + +struct v3dv_descriptor { + struct v3dv_bo *bo; + uint32_t offset; +}; + +struct v3dv_descriptor_set { + struct v3dv_descriptor_pool *pool; + + const struct v3dv_descriptor_set_layout *layout; + + /* The descriptors below can be indexed (set/binding) using the set_layout + */ + struct v3dv_descriptor descriptors[0]; +}; + +struct v3dv_descriptor_set_binding_layout { + VkDescriptorType type; + + /* Number of array elements in this binding */ + uint32_t array_size; + + uint32_t descriptor_index; +}; + +struct v3dv_descriptor_set_layout { + VkDescriptorSetLayoutCreateFlags flags; + + /* Number of bindings in this descriptor set */ + uint32_t binding_count; + + /* Shader stages affected by this descriptor set */ + uint16_t shader_stages; + + /* Number of descriptors in this descriptor set */ + uint32_t descriptor_count; + + /* Bindings in this descriptor set */ + struct v3dv_descriptor_set_binding_layout binding[0]; +}; + +struct v3dv_pipeline_layout { + struct { + struct v3dv_descriptor_set_layout *layout; + uint32_t dynamic_offset_start; + } set[MAX_SETS]; + + uint32_t num_sets; +}; + +struct v3dv_descriptor_map { + /* TODO: avoid fixed size array/justify the size */ + unsigned num; /* number of array entries */ + unsigned num_desc; /* Number of descriptors (sum of array_size[]) */ + int set[64]; + int binding[64]; + int value[64]; + int array_size[64]; +}; + struct v3dv_pipeline { struct v3dv_device *device; @@ -714,6 +802,9 @@ struct v3dv_pipeline { } va[MAX_VERTEX_ATTRIBS]; uint32_t va_count; + struct v3dv_descriptor_map ubo_map; + struct v3dv_descriptor_map ssbo_map; + /* FIXME: this bo is another candidate to data to be uploaded using a * resource manager, instead of a individual bo */ @@ -836,11 +927,15 @@ V3DV_DEFINE_HANDLE_CASTS(v3dv_queue, VkQueue) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, VkCommandPool) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, VkBuffer) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, VkDeviceMemory) +V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, VkDescriptorPool) +V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, VkDescriptorSet) +V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, VkDescriptorSetLayout) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, VkFence) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, VkFramebuffer) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, VkImage) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, VkImageView) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, VkPipeline) +V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, VkPipelineLayout) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, VkRenderPass) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, VkSemaphore) V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_shader_module, VkShaderModule) diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c index 0652753fb9b..c68849f6354 100644 --- a/src/broadcom/vulkan/v3dv_uniforms.c +++ b/src/broadcom/vulkan/v3dv_uniforms.c @@ -27,12 +27,38 @@ #include "v3dv_private.h" +static struct v3dv_descriptor * +get_descriptor(struct v3dv_descriptor_state *descriptor_state, + struct v3dv_descriptor_map *map, + uint32_t index) +{ + assert(index >= 0 && index < map->num ); + + uint32_t set_number = map->set[index]; + assert(descriptor_state->valid & 1 << set_number); + + struct v3dv_descriptor_set *set = + descriptor_state->descriptors[set_number]; + assert(set); + + uint32_t binding_number = map->binding[index]; + assert(binding_number < set->layout->binding_count); + + const struct v3dv_descriptor_set_binding_layout *binding_layout = + &set->layout->binding[binding_number]; + + return &set->descriptors[binding_layout->descriptor_index]; +} + struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_pipeline_stage *p_stage) { struct v3d_uniform_list *uinfo = &p_stage->prog_data.base->uniforms; struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; + struct v3dv_descriptor_state *descriptor_state = + &cmd_buffer->state.descriptor_state; + struct v3dv_pipeline *pipeline = p_stage->pipeline; struct v3dv_job *job = cmd_buffer->state.job; assert(job); @@ -75,6 +101,36 @@ v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer, cl_aligned_f(&uniforms, dynamic->viewport.scale[0][2]); break; + case QUNIFORM_SSBO_OFFSET: + case QUNIFORM_UBO_ADDR: { + struct v3dv_descriptor_map *map = + uinfo->contents[i] == QUNIFORM_UBO_ADDR ? + &pipeline->ubo_map : &pipeline->ssbo_map; + + uint32_t offset = + uinfo->contents[i] == QUNIFORM_UBO_ADDR ? + v3d_unit_data_get_offset(data) : + 0; /* FIXME */ + + /* UBO index is shift up by 1, to follow gallium (0 is gallium's + * constant buffer 0), that is what nir_to_vir expects. But for the + * ubo_map below, we start from 0. + */ + uint32_t index = + uinfo->contents[i] == QUNIFORM_UBO_ADDR ? + v3d_unit_data_get_unit(data) - 1 : + data; + + struct v3dv_descriptor *descriptor = + get_descriptor(descriptor_state, map, index); + assert(descriptor); + + cl_aligned_reloc(&job->indirect, &uniforms, + descriptor->bo, + descriptor->offset + offset); + break; + } + default: unreachable("unsupported quniform_contents uniform type\n"); } |