From 0551f8ed62b106f3c1f1ac0c6f1a6171cde2c2cf Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Wed, 6 Apr 2022 15:04:34 +0200 Subject: dzn: port code to plain c This does quite a lot in one go, simply because C and C++ are too different to cleanly move from one language to another. But hopefully this won't create too many rebase-issues. Reviewed-by: Boris Brezillon Part-of: --- src/microsoft/vulkan/dzn_cmd_buffer.c | 4281 +++++++++++++++++++++++++++ src/microsoft/vulkan/dzn_cmd_buffer.cpp | 4281 --------------------------- src/microsoft/vulkan/dzn_descriptor_set.c | 1818 ++++++++++++ src/microsoft/vulkan/dzn_descriptor_set.cpp | 1818 ------------ src/microsoft/vulkan/dzn_device.c | 2650 +++++++++++++++++ src/microsoft/vulkan/dzn_device.cpp | 2649 ----------------- src/microsoft/vulkan/dzn_image.c | 1240 ++++++++ src/microsoft/vulkan/dzn_image.cpp | 1240 -------- src/microsoft/vulkan/dzn_meta.c | 736 +++++ src/microsoft/vulkan/dzn_meta.cpp | 736 ----- src/microsoft/vulkan/dzn_pass.c | 223 ++ src/microsoft/vulkan/dzn_pass.cpp | 223 -- src/microsoft/vulkan/dzn_pipeline.c | 1195 ++++++++ src/microsoft/vulkan/dzn_pipeline.cpp | 1195 -------- src/microsoft/vulkan/dzn_pipeline_cache.c | 99 + src/microsoft/vulkan/dzn_pipeline_cache.cpp | 99 - src/microsoft/vulkan/dzn_private.h | 10 +- src/microsoft/vulkan/dzn_query.c | 345 +++ src/microsoft/vulkan/dzn_query.cpp | 345 --- src/microsoft/vulkan/dzn_sync.c | 210 ++ src/microsoft/vulkan/dzn_sync.cpp | 210 -- src/microsoft/vulkan/dzn_wsi.c | 64 + src/microsoft/vulkan/dzn_wsi.cpp | 64 - src/microsoft/vulkan/meson.build | 22 +- 24 files changed, 12881 insertions(+), 12872 deletions(-) create mode 100644 src/microsoft/vulkan/dzn_cmd_buffer.c delete mode 100644 src/microsoft/vulkan/dzn_cmd_buffer.cpp create mode 100644 src/microsoft/vulkan/dzn_descriptor_set.c delete mode 100644 src/microsoft/vulkan/dzn_descriptor_set.cpp create mode 100644 src/microsoft/vulkan/dzn_device.c delete mode 100644 src/microsoft/vulkan/dzn_device.cpp create mode 100644 src/microsoft/vulkan/dzn_image.c delete mode 100644 src/microsoft/vulkan/dzn_image.cpp create mode 100644 src/microsoft/vulkan/dzn_meta.c delete mode 100644 src/microsoft/vulkan/dzn_meta.cpp create mode 100644 src/microsoft/vulkan/dzn_pass.c delete mode 100644 src/microsoft/vulkan/dzn_pass.cpp create mode 100644 src/microsoft/vulkan/dzn_pipeline.c delete mode 100644 src/microsoft/vulkan/dzn_pipeline.cpp create mode 100644 src/microsoft/vulkan/dzn_pipeline_cache.c delete mode 100644 src/microsoft/vulkan/dzn_pipeline_cache.cpp create mode 100644 src/microsoft/vulkan/dzn_query.c delete mode 100644 src/microsoft/vulkan/dzn_query.cpp create mode 100644 src/microsoft/vulkan/dzn_sync.c delete mode 100644 src/microsoft/vulkan/dzn_sync.cpp create mode 100644 src/microsoft/vulkan/dzn_wsi.c delete mode 100644 src/microsoft/vulkan/dzn_wsi.cpp diff --git a/src/microsoft/vulkan/dzn_cmd_buffer.c b/src/microsoft/vulkan/dzn_cmd_buffer.c new file mode 100644 index 00000000000..d5172c38a45 --- /dev/null +++ b/src/microsoft/vulkan/dzn_cmd_buffer.c @@ -0,0 +1,4281 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_debug_report.h" +#include "vk_format.h" +#include "vk_util.h" + +static void +dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf) +{ + if (!cbuf) + return; + + struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk); + struct dzn_device *device = container_of(cbuf->base.device, struct dzn_device, vk); + + if (cmdbuf->cmdlist) + ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist); + + if (cmdbuf->cmdalloc) + ID3D12CommandAllocator_Release(cmdbuf->cmdalloc); + + list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) { + list_del(&res->link); + ID3D12Resource_Release(res->res); + vk_free(&cbuf->pool->alloc, res); + } + + dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool); + dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool); + dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool); + dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool); + util_dynarray_fini(&cmdbuf->events.wait); + util_dynarray_fini(&cmdbuf->events.signal); + util_dynarray_fini(&cmdbuf->queries.reset); + util_dynarray_fini(&cmdbuf->queries.wait); + util_dynarray_fini(&cmdbuf->queries.signal); + + if (cmdbuf->rtvs.ht) { + hash_table_foreach(cmdbuf->rtvs.ht, he) + vk_free(&cbuf->pool->alloc, he->data); + _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL); + } + + if (cmdbuf->dsvs.ht) { + hash_table_foreach(cmdbuf->dsvs.ht, he) + vk_free(&cbuf->pool->alloc, he->data); + _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL); + } + + if (cmdbuf->events.ht) + _mesa_hash_table_destroy(cmdbuf->events.ht, NULL); + + if (cmdbuf->queries.ht) { + hash_table_foreach(cmdbuf->queries.ht, he) { + struct dzn_cmd_buffer_query_pool_state *qpstate = + (struct dzn_cmd_buffer_query_pool_state *)he->data; + util_dynarray_fini(&qpstate->reset); + util_dynarray_fini(&qpstate->collect); + util_dynarray_fini(&qpstate->wait); + util_dynarray_fini(&qpstate->signal); + vk_free(&cbuf->pool->alloc, he->data); + } + _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL); + } + + vk_command_buffer_finish(&cmdbuf->vk); + vk_free(&cbuf->pool->alloc, cmdbuf); +} + +static uint32_t +dzn_cmd_buffer_rtv_key_hash_function(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key)); +} + +static bool +dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0; +} + +static uint32_t +dzn_cmd_buffer_dsv_key_hash_function(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key)); +} + +static bool +dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0; +} + +static VkResult +dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info, + VkCommandBuffer *out) +{ + VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool); + struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk); + struct dzn_physical_device *pdev = + container_of(device->vk.physical, struct dzn_physical_device, vk); + + assert(pool->queue_family_index < pdev->queue_family_count); + + D3D12_COMMAND_LIST_TYPE type = + pdev->queue_families[pool->queue_family_index].desc.Type; + + struct dzn_cmd_buffer *cmdbuf = (struct dzn_cmd_buffer *) + vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!cmdbuf) + return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = + vk_command_buffer_init(&cmdbuf->vk, pool, info->level); + if (result != VK_SUCCESS) { + vk_free(&pool->alloc, cmdbuf); + return result; + } + + memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); + list_inithead(&cmdbuf->internal_bufs); + util_dynarray_init(&cmdbuf->events.wait, NULL); + util_dynarray_init(&cmdbuf->events.signal, NULL); + util_dynarray_init(&cmdbuf->queries.reset, NULL); + util_dynarray_init(&cmdbuf->queries.wait, NULL); + util_dynarray_init(&cmdbuf->queries.signal, NULL); + dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device, + D3D12_DESCRIPTOR_HEAP_TYPE_RTV, + false, &pool->alloc); + dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device, + D3D12_DESCRIPTOR_HEAP_TYPE_DSV, + false, &pool->alloc); + dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + true, &pool->alloc); + dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + true, &pool->alloc); + + cmdbuf->events.ht = + _mesa_pointer_hash_table_create(NULL); + cmdbuf->queries.ht = + _mesa_pointer_hash_table_create(NULL); + cmdbuf->rtvs.ht = + _mesa_hash_table_create(NULL, + dzn_cmd_buffer_rtv_key_hash_function, + dzn_cmd_buffer_rtv_key_equals_function); + cmdbuf->dsvs.ht = + _mesa_hash_table_create(NULL, + dzn_cmd_buffer_dsv_key_hash_function, + dzn_cmd_buffer_dsv_key_equals_function); + if (!cmdbuf->events.ht || !cmdbuf->queries.ht || + !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto out; + } + + cmdbuf->vk.destroy = dzn_cmd_buffer_destroy; + + if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type, + &IID_ID3D12CommandAllocator, + &cmdbuf->cmdalloc))) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto out; + } + + if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0, type, + cmdbuf->cmdalloc, NULL, + &IID_ID3D12GraphicsCommandList1, + &cmdbuf->cmdlist))) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto out; + } + +out: + if (result != VK_SUCCESS) + dzn_cmd_buffer_destroy(&cmdbuf->vk); + else + *out = dzn_cmd_buffer_to_handle(cmdbuf); + + return result; +} + +static VkResult +dzn_cmd_buffer_reset(struct dzn_cmd_buffer *cmdbuf) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + const struct dzn_physical_device *pdev = + container_of(device->vk.physical, struct dzn_physical_device, vk); + const struct vk_command_pool *pool = cmdbuf->vk.pool; + + /* Reset the state */ + memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); + + /* TODO: Return resources to the pool */ + list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) { + list_del(&res->link); + ID3D12Resource_Release(res->res); + vk_free(&cmdbuf->vk.pool->alloc, res); + } + + cmdbuf->error = VK_SUCCESS; + util_dynarray_clear(&cmdbuf->events.wait); + util_dynarray_clear(&cmdbuf->events.signal); + util_dynarray_clear(&cmdbuf->queries.reset); + util_dynarray_clear(&cmdbuf->queries.wait); + util_dynarray_clear(&cmdbuf->queries.signal); + hash_table_foreach(cmdbuf->rtvs.ht, he) + vk_free(&cmdbuf->vk.pool->alloc, he->data); + _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL); + dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool); + hash_table_foreach(cmdbuf->dsvs.ht, he) + vk_free(&cmdbuf->vk.pool->alloc, he->data); + _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL); + hash_table_foreach(cmdbuf->queries.ht, he) { + struct dzn_cmd_buffer_query_pool_state *qpstate = + (struct dzn_cmd_buffer_query_pool_state *)he->data; + util_dynarray_fini(&qpstate->reset); + util_dynarray_fini(&qpstate->collect); + util_dynarray_fini(&qpstate->wait); + util_dynarray_fini(&qpstate->signal); + vk_free(&cmdbuf->vk.pool->alloc, he->data); + } + _mesa_hash_table_clear(cmdbuf->queries.ht, NULL); + _mesa_hash_table_clear(cmdbuf->events.ht, NULL); + dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool); + dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool); + dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool); + vk_command_buffer_reset(&cmdbuf->vk); + + /* cmdlist->Reset() doesn't return the memory back the the command list + * allocator, and cmdalloc->Reset() can only be called if there's no live + * cmdlist allocated from the allocator, so we need to release and create + * a new command list. + */ + ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist); + cmdbuf->cmdlist = NULL; + ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc); + D3D12_COMMAND_LIST_TYPE type = + pdev->queue_families[pool->queue_family_index].desc.Type; + if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0, + type, + cmdbuf->cmdalloc, NULL, + &IID_ID3D12GraphicsCommandList1, + &cmdbuf->cmdlist))) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + return cmdbuf->error; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_AllocateCommandBuffers(VkDevice device, + const VkCommandBufferAllocateInfo *pAllocateInfo, + VkCommandBuffer *pCommandBuffers) +{ + VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool); + VK_FROM_HANDLE(dzn_device, dev, device); + VkResult result = VK_SUCCESS; + uint32_t i; + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + result = dzn_cmd_buffer_create(pAllocateInfo, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool, + i, pCommandBuffers); + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) + pCommandBuffers[i] = VK_NULL_HANDLE; + } + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_ResetCommandBuffer(VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + return dzn_cmd_buffer_reset(cmdbuf); +} + +VkResult +dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + /* If this is the first vkBeginCommandBuffer, we must *initialize* the + * command buffer's state. Otherwise, we must *reset* its state. In both + * cases we reset it. + * + * From the Vulkan 1.0 spec: + * + * If a command buffer is in the executable state and the command buffer + * was allocated from a command pool with the + * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then + * vkBeginCommandBuffer implicitly resets the command buffer, behaving + * as if vkResetCommandBuffer had been called with + * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts + * the command buffer in the recording state. + */ + return dzn_cmd_buffer_reset(cmdbuf); +} + +static void +dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + + if (cmdbuf->error != VK_SUCCESS) + goto out; + + hash_table_foreach(cmdbuf->events.ht, he) { + enum dzn_event_state state = (enum dzn_event_state)(uintptr_t)he->data; + + if (state != DZN_EVENT_STATE_EXTERNAL_WAIT) { + struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state == DZN_EVENT_STATE_SET }; + struct dzn_cmd_event_signal *entry = (struct dzn_cmd_event_signal *) + util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1); + + if (!entry) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + break; + } + + *entry = signal; + } + } + +out: + _mesa_hash_table_clear(cmdbuf->events.ht, NULL); +} + +static VkResult +dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + + if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS) + return VK_SUCCESS; + + unsigned old_sz = array->size; + void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS); + if (!ptr) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return cmdbuf->error; + } + + memset(ptr, 0, array->size - old_sz); + return VK_SUCCESS; +} + +static bool +dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit) +{ + uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS; + + if (bit < nbits) + return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit); + + return false; +} + +static VkResult +dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + + VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit); + if (result != VK_SUCCESS) + return result; + + BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit); + return VK_SUCCESS; +} + +static void +dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + + if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS) + return; + + BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit); +} + +static VkResult +dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf, + struct util_dynarray *array, + uint32_t bit, uint32_t count) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + + VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1); + if (result != VK_SUCCESS) + return result; + + BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1); + return VK_SUCCESS; +} + +static void +dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf, + struct util_dynarray *array, + uint32_t bit, uint32_t count) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS; + + if (!nbits) + return; + + uint32_t end = MIN2(bit + count, nbits) - 1; + + while (bit <= end) { + uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32)); + BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1); + bit += subcount; + } +} + +static struct dzn_cmd_buffer_query_pool_state * +dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + struct dzn_cmd_buffer_query_pool_state *state = (struct dzn_cmd_buffer_query_pool_state *) + vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!state) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + + util_dynarray_init(&state->reset, NULL); + util_dynarray_init(&state->collect, NULL); + util_dynarray_init(&state->wait, NULL); + util_dynarray_init(&state->signal, NULL); + return state; +} + +static void +dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf, + struct dzn_cmd_buffer_query_pool_state *state) +{ + util_dynarray_fini(&state->reset); + util_dynarray_fini(&state->collect); + util_dynarray_fini(&state->wait); + util_dynarray_fini(&state->signal); + vk_free(&cmdbuf->vk.pool->alloc, state); +} + +static struct dzn_cmd_buffer_query_pool_state * +dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf, + struct dzn_query_pool *qpool) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + struct dzn_cmd_buffer_query_pool_state *state = NULL; + struct hash_entry *he = + _mesa_hash_table_search(cmdbuf->queries.ht, qpool); + + if (!he) { + state = dzn_cmd_buffer_create_query_pool_state(cmdbuf); + if (!state) + return NULL; + + he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state); + if (!he) { + dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state); + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + } else { + state = (struct dzn_cmd_buffer_query_pool_state *)he->data; + } + + return state; +} + +static VkResult +dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_query_pool *qpool, + struct dzn_cmd_buffer_query_pool_state *state, + uint32_t first_query, + uint32_t query_count) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + uint32_t nbits = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS; + uint32_t start, end; + + query_count = MIN2(query_count, nbits - first_query); + nbits = MIN2(first_query + query_count, nbits); + + VkResult result = + dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1); + if (result != VK_SUCCESS) + return result; + + BITSET_WORD *collect = + util_dynarray_element(&state->collect, BITSET_WORD, 0); + for (start = first_query, end = first_query, + __bitset_next_range(&start, &end, collect, nbits); + start < nbits; + __bitset_next_range(&start, &end, collect, nbits)) { + ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist, + qpool->heap, + qpool->queries[start].type, + start, end - start, + qpool->resolve_buffer, + qpool->query_size * start); + } + + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = qpool->resolve_buffer, + .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, + .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, + }, + }; + uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query); + uint32_t size = dzn_query_pool_get_result_size(qpool, query_count); + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, + qpool->collect_buffer, offset, + qpool->resolve_buffer, offset, + size); + + for (start = first_query, end = first_query, + __bitset_next_range(&start, &end, collect, nbits); + start < nbits; + __bitset_next_range(&start, &end, collect, nbits)) { + uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); + uint32_t count = end - start; + + for (unsigned i = 0; i < count; i+= step) { + uint32_t sub_count = MIN2(step, count - i); + + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, + qpool->collect_buffer, + dzn_query_pool_get_availability_offset(qpool, start + i), + device->queries.refs, + DZN_QUERY_REFS_ALL_ONES_OFFSET, + sizeof(uint64_t) * sub_count); + } + + dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count); + dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, start, count); + } + + DZN_SWAP(D3D12_RESOURCE_STATES, barrier.Transition.StateBefore, barrier.Transition.StateAfter); + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + return VK_SUCCESS; +} + +static VkResult +dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf, + struct dzn_query_pool *qpool, + struct util_dynarray *bitset_array, + struct util_dynarray *ops_array) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0); + uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS; + uint32_t start, end; + + BITSET_FOREACH_RANGE(start, end, bitset, nbits) { + struct dzn_cmd_buffer_query_range range = { qpool, start, end - start }; + struct dzn_cmd_buffer_query_range *entry = (struct dzn_cmd_buffer_query_range *) + util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1); + + if (!entry) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return cmdbuf->error; + } + + *entry = range; + } + + return VK_SUCCESS; +} + +static VkResult +dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf) +{ + hash_table_foreach(cmdbuf->queries.ht, he) { + struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key; + struct dzn_cmd_buffer_query_pool_state *state = + (struct dzn_cmd_buffer_query_pool_state *)he->data; + VkResult result = + dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count); + if (result != VK_SUCCESS) + return result; + + result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset); + if (result != VK_SUCCESS) + return result; + + result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->wait, &cmdbuf->queries.wait); + if (result != VK_SUCCESS) + return result; + + result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_EndCommandBuffer(VkCommandBuffer commandBuffer) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + dzn_cmd_buffer_gather_events(cmdbuf); + dzn_cmd_buffer_gather_queries(cmdbuf); + HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist); + if (FAILED(hres)) + cmdbuf->error = vk_error(cmdbuf->vk.base.device, VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + cmdbuf->error = cmdbuf->vk.cmd_queue.error; + } + + return cmdbuf->error; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, + const VkDependencyInfo *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + bool execution_barrier = + !info->memoryBarrierCount && + !info->bufferMemoryBarrierCount && + !info->imageMemoryBarrierCount; + + if (execution_barrier) { + /* Execution barrier can be emulated with a NULL UAV barrier (AKA + * pipeline flush). That's the best we can do with the standard D3D12 + * barrier API. + */ + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .UAV = { .pResource = NULL }, + }; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } + + /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers. + * Scopes are not taken into account, but that's inherent to the current + * D3D12 barrier API. + */ + if (info->memoryBarrierCount) { + D3D12_RESOURCE_BARRIER barriers[2] = { 0 }; + + barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barriers[0].UAV.pResource = NULL; + barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; + barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barriers[1].Aliasing.pResourceBefore = NULL; + barriers[1].Aliasing.pResourceAfter = NULL; + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers); + } + + for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) { + VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer); + D3D12_RESOURCE_BARRIER barrier = { 0 }; + + /* UAV are used only for storage buffers, skip all other buffers. */ + if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)) + continue; + + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.UAV.pResource = buf->res; + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } + + for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) { + const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i]; + const VkImageSubresourceRange *range = &ibarrier->subresourceRange; + VK_FROM_HANDLE(dzn_image, image, ibarrier->image); + + /* We use placed resource's simple model, in which only one resource + * pointing to a given heap is active at a given time. To make the + * resource active we need to add an aliasing barrier. + */ + D3D12_RESOURCE_BARRIER aliasing_barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Aliasing = { + .pResourceBefore = NULL, + .pResourceAfter = image->res, + }, + }; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &aliasing_barrier); + + dzn_foreach_aspect(aspect, range->aspectMask) { + D3D12_RESOURCE_BARRIER transition_barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = image->res, + .StateAfter = dzn_image_layout_to_state(ibarrier->newLayout, aspect), + }, + }; + + if (ibarrier->oldLayout == VK_IMAGE_LAYOUT_UNDEFINED || + ibarrier->oldLayout == VK_IMAGE_LAYOUT_PREINITIALIZED) { + transition_barrier.Transition.StateBefore = image->mem->initial_state; + } else { + transition_barrier.Transition.StateBefore = + dzn_image_layout_to_state(ibarrier->oldLayout, aspect); + } + + if (transition_barrier.Transition.StateBefore == transition_barrier.Transition.StateAfter) + continue; + + /* some layouts map to the same states, and NOP-barriers are illegal */ + uint32_t layer_count = dzn_get_layer_count(image, range); + uint32_t level_count = dzn_get_level_count(image, range); + for (uint32_t layer = 0; layer < layer_count; layer++) { + for (uint32_t lvl = 0; lvl < level_count; lvl++) { + transition_barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, aspect, lvl, layer); + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &transition_barrier); + } + } + } + } +} + +static D3D12_CPU_DESCRIPTOR_HANDLE +dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_image *image, + const D3D12_DEPTH_STENCIL_VIEW_DESC *desc) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + struct dzn_cmd_buffer_dsv_key key = { image, *desc }; + struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key); + struct dzn_cmd_buffer_dsv_entry *dsve; + + if (!he) { + struct dzn_descriptor_heap *heap; + uint32_t slot; + + // TODO: error handling + dsve = (struct dzn_cmd_buffer_dsv_entry *) + vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + dsve->key = key; + dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot); + dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot); + ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle); + _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve); + } else { + dsve = (struct dzn_cmd_buffer_dsv_entry *)he->data; + } + + return dsve->handle; +} + +static D3D12_CPU_DESCRIPTOR_HANDLE +dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_image *image, + const D3D12_RENDER_TARGET_VIEW_DESC *desc) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + struct dzn_cmd_buffer_rtv_key key = { image, *desc }; + struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key); + struct dzn_cmd_buffer_rtv_entry *rtve; + + if (!he) { + struct dzn_descriptor_heap *heap; + uint32_t slot; + + // TODO: error handling + rtve = (struct dzn_cmd_buffer_rtv_entry *) + vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + rtve->key = key; + dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot); + rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot); + ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle); + he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve); + } else { + rtve = (struct dzn_cmd_buffer_rtv_entry *)he->data; + } + + return rtve->handle; +} + +static VkResult +dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf, + uint32_t size, + D3D12_HEAP_TYPE heap_type, + D3D12_RESOURCE_STATES init_state, + ID3D12Resource **out) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + ID3D12Resource *res; + *out = NULL; + + /* Align size on 64k (the default alignment) */ + size = ALIGN_POT(size, 64 * 1024); + + D3D12_HEAP_PROPERTIES hprops; + ID3D12Device1_GetCustomHeapProperties(device->dev, &hprops, 0, heap_type); + D3D12_RESOURCE_DESC rdesc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = size, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, + }; + + HRESULT hres = + ID3D12Device1_CreateCommittedResource(device->dev, &hprops, + D3D12_HEAP_FLAG_NONE, &rdesc, + init_state, NULL, + &IID_ID3D12Resource, + &res); + if (FAILED(hres)) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + return cmdbuf->error; + } + + struct dzn_internal_resource *entry = (struct dzn_internal_resource *) + vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!entry) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + ID3D12Resource_Release(res); + return cmdbuf->error; + } + + entry->res = res; + list_addtail(&entry->link, &cmdbuf->internal_bufs); + *out = entry->res; + return VK_SUCCESS; +} + +static void +dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_image *image, + VkImageLayout layout, + const VkClearColorValue *color, + const VkImageSubresourceRange *range, + uint32_t rect_count, D3D12_RECT *rects) +{ + enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); + uint32_t blksize = util_format_get_blocksize(pfmt); + uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 }; + uint32_t raw[4] = { 0 }; + + assert(blksize <= sizeof(raw)); + assert(!(sizeof(buf) % blksize)); + + util_format_write_4(pfmt, (void *)color, 0, (void *)raw, 0, 0, 0, 1, 1); + + uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + while (fill_step % blksize) + fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + + uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel); + uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel); + uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step); + uint32_t res_size = max_h * row_pitch; + + assert(fill_step <= sizeof(buf)); + + for (uint32_t i = 0; i < fill_step; i += blksize) + memcpy(&buf[i], raw, blksize); + + ID3D12Resource *src_res; + + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size, + D3D12_HEAP_TYPE_UPLOAD, + D3D12_RESOURCE_STATE_GENERIC_READ, + &src_res); + if (result != VK_SUCCESS) + return; + + assert(!(res_size % fill_step)); + + uint8_t *cpu_ptr; + ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); + for (uint32_t i = 0; i < res_size; i += fill_step) + memcpy(&cpu_ptr[i], buf, fill_step); + + ID3D12Resource_Unmap(src_res, 0, NULL); + + D3D12_TEXTURE_COPY_LOCATION src_loc = { + .pResource = src_res, + .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + .PlacedFootprint = { + .Offset = 0, + .Footprint = { + .Width = max_w, + .Height = max_h, + .Depth = 1, + .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step), + }, + }, + }; + + D3D12_RESOURCE_STATES dst_state = + dzn_image_layout_to_state(layout, VK_IMAGE_ASPECT_COLOR_BIT); + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = src_res, + .StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ, + .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, + }, + }; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + + barrier.Transition.pResource = image->res; + + assert(dzn_get_level_count(image, range) == 1); + uint32_t layer_count = dzn_get_layer_count(image, range); + + dzn_foreach_aspect(aspect, range->aspectMask) { + VkImageSubresourceLayers subres = { + .aspectMask = (VkImageAspectFlags)aspect, + .mipLevel = range->baseMipLevel, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = layer_count, + }; + + for (uint32_t layer = 0; layer < layer_count; layer++) { + if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { + barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, aspect, 0, layer); + barrier.Transition.StateBefore = dst_state; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } + + D3D12_TEXTURE_COPY_LOCATION dst_loc = + dzn_image_get_copy_loc(image, &subres, aspect, layer); + + src_loc.PlacedFootprint.Footprint.Format = + dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ? + dst_loc.PlacedFootprint.Footprint.Format : + image->desc.Format; + + for (uint32_t r = 0; r < rect_count; r++) { + D3D12_BOX src_box = { + .left = 0, + .top = 0, + .front = 0, + .right = (UINT)(rects[r].right - rects[r].left), + .bottom = (UINT)(rects[r].bottom - rects[r].top), + .back = 1, + }; + + ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, + &dst_loc, + rects[r].left, + rects[r].top, 0, + &src_loc, + &src_box); + } + + if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { + barrier.Transition.StateAfter = dst_state; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } + } + } +} + +static VkClearColorValue +adjust_clear_color(VkFormat format, const VkClearColorValue *col) +{ + VkClearColorValue out = *col; + + // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things + // manually where it matters, like here, in the clear path. + if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { + DZN_SWAP(float, out.float32[0], out.float32[1]); + DZN_SWAP(float, out.float32[2], out.float32[3]); + } + + return out; +} + +static void +dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_image *image, + VkImageLayout layout, + const VkClearColorValue *color, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); + uint32_t blksize = util_format_get_blocksize(pfmt); + uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 }; + uint32_t raw[4] = { 0 }; + + assert(blksize <= sizeof(raw)); + assert(!(sizeof(buf) % blksize)); + + util_format_write_4(pfmt, (void *)color, 0, (void *)raw, 0, 0, 0, 1, 1); + + uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + while (fill_step % blksize) + fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + + uint32_t res_size = 0; + for (uint32_t r = 0; r < range_count; r++) { + uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel); + uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel); + uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel); + uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step); + + res_size = MAX2(res_size, h * d * row_pitch); + } + + assert(fill_step <= sizeof(buf)); + + for (uint32_t i = 0; i < fill_step; i += blksize) + memcpy(&buf[i], raw, blksize); + + ID3D12Resource *src_res; + + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size, + D3D12_HEAP_TYPE_UPLOAD, + D3D12_RESOURCE_STATE_GENERIC_READ, + &src_res); + if (result != VK_SUCCESS) + return; + + assert(!(res_size % fill_step)); + + uint8_t *cpu_ptr; + ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); + for (uint32_t i = 0; i < res_size; i += fill_step) + memcpy(&cpu_ptr[i], buf, fill_step); + + ID3D12Resource_Unmap(src_res, 0, NULL); + + D3D12_TEXTURE_COPY_LOCATION src_loc = { + .pResource = src_res, + .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + .PlacedFootprint = { + .Offset = 0, + }, + }; + + D3D12_RESOURCE_STATES dst_state = + dzn_image_layout_to_state(layout, VK_IMAGE_ASPECT_COLOR_BIT); + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = src_res, + .StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ, + .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, + }, + }; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + + barrier.Transition.pResource = image->res; + for (uint32_t r = 0; r < range_count; r++) { + uint32_t level_count = dzn_get_level_count(image, &ranges[r]); + uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]); + + dzn_foreach_aspect(aspect, ranges[r].aspectMask) { + for (uint32_t lvl = 0; lvl < level_count; lvl++) { + uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl); + uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl); + uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl); + VkImageSubresourceLayers subres = { + .aspectMask = (VkImageAspectFlags)aspect, + .mipLevel = ranges[r].baseMipLevel + lvl, + .baseArrayLayer = ranges[r].baseArrayLayer, + .layerCount = layer_count, + }; + + for (uint32_t layer = 0; layer < layer_count; layer++) { + if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { + barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, &ranges[r], aspect, lvl, layer); + barrier.Transition.StateBefore = dst_state; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } + + D3D12_TEXTURE_COPY_LOCATION dst_loc = + dzn_image_get_copy_loc(image, &subres, aspect, layer); + + src_loc.PlacedFootprint.Footprint.Format = + dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ? + dst_loc.PlacedFootprint.Footprint.Format : + image->desc.Format; + src_loc.PlacedFootprint.Footprint.Width = w; + src_loc.PlacedFootprint.Footprint.Height = h; + src_loc.PlacedFootprint.Footprint.Depth = d; + src_loc.PlacedFootprint.Footprint.RowPitch = + ALIGN_NPOT(w * blksize, fill_step); + D3D12_BOX src_box = { + .left = 0, + .top = 0, + .front = 0, + .right = w, + .bottom = h, + .back = d, + }; + + ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0, + &src_loc, &src_box); + + if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { + barrier.Transition.StateAfter = dst_state; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } + } + } + } + } +} + +static void +dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf, + uint32_t idx, + const VkClearValue *value, + VkImageAspectFlags aspects, + uint32_t base_layer, + uint32_t layer_count, + uint32_t rect_count, + D3D12_RECT *rects) +{ + if (idx == VK_ATTACHMENT_UNUSED) + return; + + struct dzn_image_view *view = cmdbuf->state.framebuffer->attachments[idx]; + struct dzn_image *image = container_of(view->vk.image, struct dzn_image, vk); + + VkImageSubresourceRange range = { + .aspectMask = aspects, + .baseMipLevel = view->vk.base_mip_level, + .levelCount = 1, + .baseArrayLayer = view->vk.base_array_layer + base_layer, + .layerCount = layer_count, + }; + bool all_layers = + base_layer == 0 && + (layer_count == view->vk.layer_count || + layer_count == VK_REMAINING_ARRAY_LAYERS); + + if (vk_format_is_depth_or_stencil(view->vk.format)) { + D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; + + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + flags |= D3D12_CLEAR_FLAG_DEPTH; + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + flags |= D3D12_CLEAR_FLAG_STENCIL; + + if (flags != 0) { + D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0); + D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc); + ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags, + value->depthStencil.depth, + value->depthStencil.stencil, + rect_count, rects); + } + } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + VkClearColorValue color = adjust_clear_color(view->vk.format, &value->color); + bool clear_with_cpy = false; + float vals[4]; + + if (vk_format_is_sint(view->vk.format)) { + for (uint32_t i = 0; i < 4; i++) { + vals[i] = color.int32[i]; + if (color.int32[i] != (int32_t)vals[i]) { + clear_with_cpy = true; + break; + } + } + } else if (vk_format_is_uint(view->vk.format)) { + for (uint32_t i = 0; i < 4; i++) { + vals[i] = color.uint32[i]; + if (color.uint32[i] != (uint32_t)vals[i]) { + clear_with_cpy = true; + break; + } + } + } else { + for (uint32_t i = 0; i < 4; i++) + vals[i] = color.float32[i]; + } + + if (clear_with_cpy) { + dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + &value->color, + &range, rect_count, rects); + } else { + D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0); + D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc); + ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects); + } + } +} + +static void +dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_image *image, + VkImageLayout layout, + const VkClearColorValue *col, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { + dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); + return; + } + + VkClearColorValue color = adjust_clear_color(image->vk.format, col); + float clear_vals[4]; + + enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); + + if (util_format_is_pure_sint(pfmt)) { + for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) { + clear_vals[c] = color.int32[c]; + if (color.int32[c] != (int32_t)clear_vals[c]) { + dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); + return; + } + } + } else if (util_format_is_pure_uint(pfmt)) { + for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) { + clear_vals[c] = color.uint32[c]; + if (color.uint32[c] != (uint32_t)clear_vals[c]) { + dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); + return; + } + } + } else { + memcpy(clear_vals, color.float32, sizeof(clear_vals)); + } + + for (uint32_t r = 0; r < range_count; r++) { + const VkImageSubresourceRange *range = &ranges[r]; + uint32_t layer_count = dzn_get_layer_count(image, range); + uint32_t level_count = dzn_get_level_count(image, range); + + for (uint32_t lvl = 0; lvl < level_count; lvl++) { + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = image->res, + .StateBefore = + dzn_image_layout_to_state(layout, VK_IMAGE_ASPECT_COLOR_BIT), + .StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET, + }, + }; + + if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) { + for (uint32_t layer = 0; layer < layer_count; layer++) { + barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, + VK_IMAGE_ASPECT_COLOR_BIT, + lvl, layer); + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } + } + + VkImageSubresourceRange view_range = *range; + + if (image->vk.image_type == VK_IMAGE_TYPE_3D) { + view_range.baseArrayLayer = 0; + view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl); + } + + D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl); + D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc); + ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL); + + if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) { + DZN_SWAP(D3D12_RESOURCE_STATES, barrier.Transition.StateBefore, barrier.Transition.StateAfter); + + for (uint32_t layer = 0; layer < layer_count; layer++) { + barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, VK_IMAGE_ASPECT_COLOR_BIT, lvl, layer); + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } + } + } + } +} + +static void +dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_image *image, + VkImageLayout layout, + const VkClearDepthStencilValue *zs, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + + for (uint32_t r = 0; r < range_count; r++) { + const VkImageSubresourceRange *range = &ranges[r]; + uint32_t layer_count = dzn_get_layer_count(image, range); + uint32_t level_count = dzn_get_level_count(image, range); + + D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; + + if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) + flags |= D3D12_CLEAR_FLAG_DEPTH; + if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) + flags |= D3D12_CLEAR_FLAG_STENCIL; + + for (uint32_t lvl = 0; lvl < level_count; lvl++) { + uint32_t barrier_count = 0; + D3D12_RESOURCE_BARRIER barriers[2]; + VkImageAspectFlagBits barrier_aspects[2]; + + dzn_foreach_aspect(aspect, range->aspectMask) { + barrier_aspects[barrier_count] = aspect; + barriers[barrier_count] = (D3D12_RESOURCE_BARRIER) { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = image->res, + .StateBefore = dzn_image_layout_to_state(layout, aspect), + .StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE, + }, + }; + + if (barriers[barrier_count].Transition.StateBefore != barriers[barrier_count].Transition.StateAfter) + barrier_count++; + } + + if (barrier_count > 0) { + for (uint32_t layer = 0; layer < layer_count; layer++) { + for (uint32_t b = 0; b < barrier_count; b++) { + barriers[b].Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, barrier_aspects[b], lvl, layer); + } + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, + barrier_count, + barriers); + } + } + + D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl); + D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc); + ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, + handle, flags, + zs->depth, + zs->stencil, + 0, NULL); + + if (barrier_count > 0) { + for (uint32_t b = 0; b < barrier_count; b++) + DZN_SWAP(D3D12_RESOURCE_STATES, barriers[b].Transition.StateBefore, barriers[b].Transition.StateAfter); + + for (uint32_t layer = 0; layer < layer_count; layer++) { + for (uint32_t b = 0; b < barrier_count; b++) { + barriers[b].Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, barrier_aspects[b], lvl, layer); + } + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, + barrier_count, + barriers); + } + } + } + } +} + +static void +dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf, + const VkCopyBufferToImageInfo2 *info, + uint32_t r, + VkImageAspectFlagBits aspect, + uint32_t l) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer); + VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage); + + ID3D12Device1 *dev = device->dev; + ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; + + const VkBufferImageCopy2 *region = &info->pRegions[r]; + enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format); + uint32_t blkh = util_format_get_blockheight(pfmt); + uint32_t blkd = util_format_get_blockdepth(pfmt); + + D3D12_TEXTURE_COPY_LOCATION dst_img_loc = + dzn_image_get_copy_loc(dst_image, ®ion->imageSubresource, aspect, l); + D3D12_TEXTURE_COPY_LOCATION src_buf_loc = + dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, region, aspect, l); + + if (dzn_buffer_supports_region_copy(&src_buf_loc)) { + /* RowPitch and Offset are properly aligned, we can copy + * the whole thing in one call. + */ + D3D12_BOX src_box = { + .left = 0, + .top = 0, + .front = 0, + .right = region->imageExtent.width, + .bottom = region->imageExtent.height, + .back = region->imageExtent.depth, + }; + + ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc, + region->imageOffset.x, + region->imageOffset.y, + region->imageOffset.z, + &src_buf_loc, &src_box); + return; + } + + /* Copy line-by-line if things are not properly aligned. */ + D3D12_BOX src_box = { + .top = 0, + .front = 0, + .bottom = blkh, + .back = blkd, + }; + + for (uint32_t z = 0; z < region->imageExtent.depth; z += blkd) { + for (uint32_t y = 0; y < region->imageExtent.height; y += blkh) { + uint32_t src_x; + + D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc = + dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format, + region, &src_buf_loc, + y, z, &src_x); + + src_box.left = src_x; + src_box.right = src_x + region->imageExtent.width; + ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, + &dst_img_loc, + region->imageOffset.x, + region->imageOffset.y + y, + region->imageOffset.z + z, + &src_buf_line_loc, + &src_box); + } + } +} + +static void +dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf, + const VkCopyImageToBufferInfo2 *info, + uint32_t r, + VkImageAspectFlagBits aspect, + uint32_t l) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_image, src_image, info->srcImage); + VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer); + + ID3D12Device1 *dev = device->dev; + ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; + + const VkBufferImageCopy2 *region = &info->pRegions[r]; + enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format); + uint32_t blkh = util_format_get_blockheight(pfmt); + uint32_t blkd = util_format_get_blockdepth(pfmt); + + D3D12_TEXTURE_COPY_LOCATION src_img_loc = + dzn_image_get_copy_loc(src_image, ®ion->imageSubresource, aspect, l); + D3D12_TEXTURE_COPY_LOCATION dst_buf_loc = + dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, region, aspect, l); + + if (dzn_buffer_supports_region_copy(&dst_buf_loc)) { + /* RowPitch and Offset are properly aligned on 256 bytes, we can copy + * the whole thing in one call. + */ + D3D12_BOX src_box = { + .left = (UINT)region->imageOffset.x, + .top = (UINT)region->imageOffset.y, + .front = (UINT)region->imageOffset.z, + .right = (UINT)(region->imageOffset.x + region->imageExtent.width), + .bottom = (UINT)(region->imageOffset.y + region->imageExtent.height), + .back = (UINT)(region->imageOffset.z + region->imageExtent.depth), + }; + + ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc, + 0, 0, 0, &src_img_loc, + &src_box); + return; + } + + D3D12_BOX src_box = { + .left = (UINT)region->imageOffset.x, + .right = (UINT)(region->imageOffset.x + region->imageExtent.width), + }; + + /* Copy line-by-line if things are not properly aligned. */ + for (uint32_t z = 0; z < region->imageExtent.depth; z += blkd) { + src_box.front = region->imageOffset.z + z; + src_box.back = src_box.front + blkd; + + for (uint32_t y = 0; y < region->imageExtent.height; y += blkh) { + uint32_t dst_x; + + D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc = + dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format, + region, &dst_buf_loc, + y, z, &dst_x); + + src_box.top = region->imageOffset.y + y; + src_box.bottom = src_box.top + blkh; + + ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, + &dst_buf_line_loc, + dst_x, 0, 0, + &src_img_loc, + &src_box); + } + } +} + +static void +dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf, + const VkCopyImageInfo2 *info, + D3D12_RESOURCE_DESC *tmp_desc, + D3D12_TEXTURE_COPY_LOCATION *tmp_loc, + uint32_t r, + VkImageAspectFlagBits aspect, + uint32_t l) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_image, src, info->srcImage); + VK_FROM_HANDLE(dzn_image, dst, info->dstImage); + + ID3D12Device1 *dev = device->dev; + ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; + + const VkImageCopy2 *region = &info->pRegions[r]; + const VkImageSubresourceLayers *src_subres = ®ion->srcSubresource; + const VkImageSubresourceLayers *dst_subres = ®ion->dstSubresource; + VkFormat src_format = + dzn_image_get_plane_format(src->vk.format, aspect); + VkFormat dst_format = + dzn_image_get_plane_format(dst->vk.format, aspect); + + enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format); + uint32_t src_blkw = util_format_get_blockwidth(src_pfmt); + uint32_t src_blkh = util_format_get_blockheight(src_pfmt); + uint32_t src_blkd = util_format_get_blockdepth(src_pfmt); + enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format); + uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt); + uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt); + uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt); + uint32_t dst_z = region->dstOffset.z, src_z = region->srcOffset.z; + uint32_t depth = region->extent.depth; + uint32_t dst_l = l, src_l = l; + + assert(src_subres->aspectMask == dst_subres->aspectMask); + + if (src->vk.image_type == VK_IMAGE_TYPE_3D && + dst->vk.image_type == VK_IMAGE_TYPE_2D) { + assert(src_subres->layerCount == 1); + src_l = 0; + src_z += l; + depth = 1; + } else if (src->vk.image_type == VK_IMAGE_TYPE_2D && + dst->vk.image_type == VK_IMAGE_TYPE_3D) { + assert(dst_subres->layerCount == 1); + dst_l = 0; + dst_z += l; + depth = 1; + } else { + assert(src_subres->layerCount == dst_subres->layerCount); + } + + D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l); + D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l); + + D3D12_BOX src_box = { + .left = (UINT)MAX2(region->srcOffset.x, 0), + .top = (UINT)MAX2(region->srcOffset.y, 0), + .front = (UINT)MAX2(src_z, 0), + .right = (UINT)region->srcOffset.x + region->extent.width, + .bottom = (UINT)region->srcOffset.y + region->extent.height, + .back = (UINT)src_z + depth, + }; + + if (!tmp_loc->pResource) { + ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc, + region->dstOffset.x, + region->dstOffset.y, + dst_z, &src_loc, + &src_box); + return; + } + + tmp_desc->Format = + dzn_image_get_placed_footprint_format(src->vk.format, aspect); + tmp_desc->Width = region->extent.width; + tmp_desc->Height = region->extent.height; + + ID3D12Device1_GetCopyableFootprints(dev, tmp_desc, + 0, 1, 0, + &tmp_loc->PlacedFootprint, + NULL, NULL, NULL); + + tmp_loc->PlacedFootprint.Footprint.Depth = depth; + + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = tmp_loc->pResource, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE, + .StateAfter = D3D12_RESOURCE_STATE_COPY_DEST, + }, + }; + + if (r > 0 || l > 0) + ID3D12GraphicsCommandList1_ResourceBarrier(cmdlist, 1, &barrier); + + ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box); + + DZN_SWAP(D3D12_RESOURCE_STATES, barrier.Transition.StateBefore, barrier.Transition.StateAfter); + ID3D12GraphicsCommandList1_ResourceBarrier(cmdlist, 1, &barrier); + + tmp_desc->Format = + dzn_image_get_placed_footprint_format(dst->vk.format, aspect); + if (src_blkw != dst_blkw) + tmp_desc->Width = DIV_ROUND_UP(region->extent.width, src_blkw) * dst_blkw; + if (src_blkh != dst_blkh) + tmp_desc->Height = DIV_ROUND_UP(region->extent.height, src_blkh) * dst_blkh; + + ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc, + 0, 1, 0, + &tmp_loc->PlacedFootprint, + NULL, NULL, NULL); + + if (src_blkd != dst_blkd) { + tmp_loc->PlacedFootprint.Footprint.Depth = + DIV_ROUND_UP(depth, src_blkd) * dst_blkd; + } else { + tmp_loc->PlacedFootprint.Footprint.Depth = region->extent.depth; + } + + D3D12_BOX tmp_box = { + .left = 0, + .top = 0, + .front = 0, + .right = tmp_loc->PlacedFootprint.Footprint.Width, + .bottom = tmp_loc->PlacedFootprint.Footprint.Height, + .back = tmp_loc->PlacedFootprint.Footprint.Depth, + }; + + ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc, + region->dstOffset.x, + region->dstOffset.y, + dst_z, + tmp_loc, &tmp_box); +} + +static void +dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf, + VkImage image, + VkImageAspectFlagBits aspect, + const VkImageSubresourceLayers *subres, + struct dzn_descriptor_heap *heap, + uint32_t heap_slot) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_image, img, image); + VkImageViewCreateInfo iview_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = image, + .format = img->vk.format, + .subresourceRange = { + .aspectMask = (VkImageAspectFlags)aspect, + .baseMipLevel = subres->mipLevel, + .levelCount = 1, + .baseArrayLayer = subres->baseArrayLayer, + .layerCount = subres->layerCount, + }, + }; + + if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + iview_info.components.r = VK_COMPONENT_SWIZZLE_G; + iview_info.components.g = VK_COMPONENT_SWIZZLE_G; + iview_info.components.b = VK_COMPONENT_SWIZZLE_G; + iview_info.components.a = VK_COMPONENT_SWIZZLE_G; + } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + iview_info.components.r = VK_COMPONENT_SWIZZLE_R; + iview_info.components.g = VK_COMPONENT_SWIZZLE_R; + iview_info.components.b = VK_COMPONENT_SWIZZLE_R; + iview_info.components.a = VK_COMPONENT_SWIZZLE_R; + } + + switch (img->vk.image_type) { + case VK_IMAGE_TYPE_1D: + iview_info.viewType = img->vk.array_layers > 1 ? + VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D; + break; + case VK_IMAGE_TYPE_2D: + iview_info.viewType = img->vk.array_layers > 1 ? + VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; + break; + case VK_IMAGE_TYPE_3D: + iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D; + break; + default: + unreachable("Invalid type"); + } + + struct dzn_image_view iview; + dzn_image_view_init(device, &iview, &iview_info); + dzn_descriptor_heap_write_image_view_desc(heap, heap_slot, false, false, &iview); + dzn_image_view_finish(&iview); + + D3D12_GPU_DESCRIPTOR_HANDLE handle = + dzn_descriptor_heap_get_gpu_handle(heap, heap_slot); + ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle); +} + +static void +dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf, + struct dzn_image *img, + VkImageAspectFlagBits aspect, + uint32_t level, uint32_t layer) +{ + bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + VkImageSubresourceRange range = { + .aspectMask = (VkImageAspectFlags)aspect, + .baseMipLevel = level, + .levelCount = 1, + .baseArrayLayer = layer, + .layerCount = 1, + }; + + if (ds) { + D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0); + D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc); + ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, TRUE, &handle); + } else { + D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0); + D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc); + ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, FALSE, NULL); + } +} + +static void +dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_image *src, + const struct dzn_image *dst, + VkImageAspectFlagBits aspect, + VkFilter filter, bool resolve) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format); + VkImageUsageFlags usage = + vk_format_is_depth_or_stencil(dst->vk.format) ? + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + struct dzn_meta_blit_key ctx_key = { + .out_format = dzn_image_get_dxgi_format(dst->vk.format, usage, aspect), + .samples = (uint32_t)src->vk.samples, + .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? + FRAG_RESULT_DEPTH : + aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? + FRAG_RESULT_STENCIL : + FRAG_RESULT_DATA0), + .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT : + util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT : + aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT : + GLSL_TYPE_FLOAT), + .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D : + src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D : + src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS : + GLSL_SAMPLER_DIM_3D), + .src_is_array = src->vk.array_layers > 1, + .resolve = resolve, + .linear_filter = filter == VK_FILTER_LINEAR, + .padding = 0, + }; + + const struct dzn_meta_blit *ctx = + dzn_meta_blits_get_context(device, &ctx_key); + assert(ctx); + + ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig); + ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state); +} + +static void +dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_image *src, + const VkImageSubresourceLayers *src_subres, + const VkOffset3D *src_offsets, + const struct dzn_image *dst, + const VkImageSubresourceLayers *dst_subres, + const VkOffset3D *dst_offsets, + bool normalize_src_coords) +{ + uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel); + uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel); + uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel); + uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel); + + float dst_pos[4] = { + (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f), + (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f), + }; + + float src_pos[4] = { + (float)src_offsets[0].x, (float)src_offsets[0].y, + (float)src_offsets[1].x, (float)src_offsets[1].y, + }; + + if (normalize_src_coords) { + src_pos[0] /= src_w; + src_pos[1] /= src_h; + src_pos[2] /= src_w; + src_pos[3] /= src_h; + } + + float coords[] = { + dst_pos[0], dst_pos[1], src_pos[0], src_pos[1], + dst_pos[2], dst_pos[1], src_pos[2], src_pos[1], + dst_pos[0], dst_pos[3], src_pos[0], src_pos[3], + dst_pos[2], dst_pos[3], src_pos[2], src_pos[3], + }; + + ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0); + + D3D12_VIEWPORT vp = { + .TopLeftX = 0, + .TopLeftY = 0, + .Width = (float)dst_w, + .Height = (float)dst_h, + .MinDepth = 0, + .MaxDepth = 1, + }; + ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp); + + D3D12_RECT scissor = { + .left = MIN2(dst_offsets[0].x, dst_offsets[1].x), + .top = MIN2(dst_offsets[0].y, dst_offsets[1].y), + .right = MAX2(dst_offsets[0].x, dst_offsets[1].x), + .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y), + }; + ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor); +} + +static void +dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf, + struct dzn_image *src, VkImageLayout src_layout, + const VkImageSubresourceLayers *src_subres, + struct dzn_image *dst, VkImageLayout dst_layout, + const VkImageSubresourceLayers *dst_subres, + VkImageAspectFlagBits aspect, + bool post) +{ + bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + D3D12_RESOURCE_BARRIER barriers[2] = { + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = src->res, + .StateBefore = dzn_image_layout_to_state(src_layout, aspect), + .StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, + }, + }, + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = dst->res, + .StateBefore = dzn_image_layout_to_state(dst_layout, aspect), + .StateAfter = ds ? + D3D12_RESOURCE_STATE_DEPTH_WRITE : + D3D12_RESOURCE_STATE_RENDER_TARGET, + }, + }, + }; + + if (post) { + DZN_SWAP(D3D12_RESOURCE_STATES, barriers[0].Transition.StateBefore, barriers[0].Transition.StateAfter); + DZN_SWAP(D3D12_RESOURCE_STATES, barriers[1].Transition.StateBefore, barriers[1].Transition.StateAfter); + } + + uint32_t layer_count = dzn_get_layer_count(src, src_subres); + uint32_t src_level = src_subres->mipLevel; + uint32_t dst_level = dst_subres->mipLevel; + + assert(dzn_get_layer_count(dst, dst_subres) == layer_count); + assert(src_level < src->vk.mip_levels); + assert(dst_level < dst->vk.mip_levels); + + for (uint32_t layer = 0; layer < layer_count; layer++) { + barriers[0].Transition.Subresource = + dzn_image_layers_get_subresource_index(src, src_subres, aspect, layer); + barriers[1].Transition.Subresource = + dzn_image_layers_get_subresource_index(dst, dst_subres, aspect, layer); + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, ARRAY_SIZE(barriers), barriers); + } +} + +static void +dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf, + const VkBlitImageInfo2 *info, + struct dzn_descriptor_heap *heap, + uint32_t *heap_slot, + uint32_t r) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_image, src, info->srcImage); + VK_FROM_HANDLE(dzn_image, dst, info->dstImage); + + const VkImageBlit2 *region = &info->pRegions[r]; + bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D; + bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D; + + dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { + dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, false); + dzn_cmd_buffer_blit_issue_barriers(cmdbuf, + src, info->srcImageLayout, ®ion->srcSubresource, + dst, info->dstImageLayout, ®ion->dstSubresource, + aspect, false); + dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, + aspect, ®ion->srcSubresource, + heap, (*heap_slot)++); + dzn_cmd_buffer_blit_set_2d_region(cmdbuf, + src, ®ion->srcSubresource, region->srcOffsets, + dst, ®ion->dstSubresource, region->dstOffsets, + src->vk.samples == 1); + + uint32_t dst_depth = + region->dstOffsets[1].z > region->dstOffsets[0].z ? + region->dstOffsets[1].z - region->dstOffsets[0].z : + region->dstOffsets[0].z - region->dstOffsets[1].z; + uint32_t src_depth = + region->srcOffsets[1].z > region->srcOffsets[0].z ? + region->srcOffsets[1].z - region->srcOffsets[0].z : + region->srcOffsets[0].z - region->srcOffsets[1].z; + + uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource); + uint32_t dst_level = region->dstSubresource.mipLevel; + + float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1; + if (region->srcOffsets[0].z > region->srcOffsets[1].z) + src_slice_step = -src_slice_step; + float src_z_coord = + src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0; + uint32_t slice_count = dst_is_3d ? dst_depth : layer_count; + uint32_t dst_z_coord = + dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer; + if (region->dstOffsets[0].z > region->dstOffsets[1].z) + dst_z_coord--; + + uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ? + 1 : -1; + + /* Normalize the src coordinates/step */ + if (src_is_3d) { + src_z_coord /= src->vk.extent.depth; + src_slice_step /= src->vk.extent.depth; + } + + for (uint32_t slice = 0; slice < slice_count; slice++) { + dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord); + ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16); + ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); + src_z_coord += src_slice_step; + dst_z_coord += dst_slice_step; + } + + dzn_cmd_buffer_blit_issue_barriers(cmdbuf, + src, info->srcImageLayout, ®ion->srcSubresource, + dst, info->dstImageLayout, ®ion->dstSubresource, + aspect, true); + } +} + +static void +dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf, + const VkResolveImageInfo2 *info, + struct dzn_descriptor_heap *heap, + uint32_t *heap_slot, + uint32_t r) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_image, src, info->srcImage); + VK_FROM_HANDLE(dzn_image, dst, info->dstImage); + + ID3D12Device1 *dev = device->dev; + const VkImageResolve2 *region = &info->pRegions[r]; + + dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { + dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, true); + dzn_cmd_buffer_blit_issue_barriers(cmdbuf, + src, info->srcImageLayout, ®ion->srcSubresource, + dst, info->dstImageLayout, ®ion->dstSubresource, + aspect, false); + dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect, + ®ion->srcSubresource, + heap, (*heap_slot)++); + + VkOffset3D src_offset[2] = { + { + .x = region->srcOffset.x, + .y = region->srcOffset.y, + }, + { + .x = (int32_t)(region->srcOffset.x + region->extent.width), + .y = (int32_t)(region->srcOffset.y + region->extent.height), + }, + }; + VkOffset3D dst_offset[2] = { + { + .x = region->dstOffset.x, + .y = region->dstOffset.y, + }, + { + .x = (int32_t)(region->dstOffset.x + region->extent.width), + .y = (int32_t)(region->dstOffset.y + region->extent.height), + }, + }; + + dzn_cmd_buffer_blit_set_2d_region(cmdbuf, + src, ®ion->srcSubresource, src_offset, + dst, ®ion->dstSubresource, dst_offset, + false); + + uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource); + for (uint32_t layer = 0; layer < layer_count; layer++) { + float src_z_coord = layer; + + dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, + dst, aspect, region->dstSubresource.mipLevel, + region->dstSubresource.baseArrayLayer + layer); + ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16); + ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); + } + + dzn_cmd_buffer_blit_issue_barriers(cmdbuf, + src, info->srcImageLayout, ®ion->srcSubresource, + dst, info->dstImageLayout, ®ion->dstSubresource, + aspect, true); + } +} + +static void +dzn_cmd_buffer_clear_attachments(struct dzn_cmd_buffer *cmdbuf, + uint32_t attachment_count, + const VkClearAttachment *attachments, + uint32_t rect_count, + const VkClearRect *rects) +{ + struct dzn_render_pass *pass = cmdbuf->state.pass; + const struct dzn_subpass *subpass = &pass->subpasses[cmdbuf->state.subpass]; + + for (unsigned i = 0; i < attachment_count; i++) { + uint32_t idx; + if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) + idx = subpass->colors[attachments[i].colorAttachment].idx; + else + idx = subpass->zs.idx; + + for (uint32_t j = 0; j < rect_count; j++) { + D3D12_RECT rect; + + dzn_translate_rect(&rect, &rects[j].rect); + dzn_cmd_buffer_clear_attachment(cmdbuf, + idx, &attachments[i].clearValue, + attachments[i].aspectMask, + rects[j].baseArrayLayer, + rects[j].layerCount, + 1, &rect); + } + } +} + +static void +dzn_cmd_buffer_attachment_ref_transition(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_attachment_ref *att) +{ + const struct dzn_image_view *iview = cmdbuf->state.framebuffer->attachments[att->idx]; + const struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk); + + if (att->before == att->during) + return; + + VkImageSubresourceRange subres = { + .aspectMask = att->aspects, + .baseMipLevel = iview->vk.base_mip_level, + .levelCount = iview->vk.level_count, + .baseArrayLayer = iview->vk.base_array_layer, + .layerCount = iview->vk.layer_count, + }; + + dzn_foreach_aspect(aspect, att->aspects) { + for (uint32_t lvl = 0; lvl < iview->vk.level_count; lvl++) { + for (uint32_t layer = 0; layer < iview->vk.layer_count; layer++) { + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = image->res, + .Subresource = + dzn_image_range_get_subresource_index(image, &subres, aspect, lvl, layer), + .StateBefore = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT) ? att->stencil.before : att->before, + .StateAfter = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT) ? att->stencil.during : att->during, + }, + }; + + if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } + } + } +} + +static void +dzn_cmd_buffer_attachment_transition(struct dzn_cmd_buffer *cmdbuf, + const struct dzn_attachment *att) +{ + const struct dzn_image_view *iview = cmdbuf->state.framebuffer->attachments[att->idx]; + const struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk); + + if (att->last == att->after) + return; + + VkImageSubresourceRange subres = { + .aspectMask = att->aspects, + .baseMipLevel = iview->vk.base_mip_level, + .levelCount = iview->vk.level_count, + .baseArrayLayer = iview->vk.base_array_layer, + .layerCount = iview->vk.layer_count, + }; + + dzn_foreach_aspect(aspect, att->aspects) { + for (uint32_t lvl = 0; lvl < iview->vk.level_count; lvl++) { + for (uint32_t layer = 0; layer < iview->vk.layer_count; layer++) { + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = image->res, + .Subresource = + dzn_image_range_get_subresource_index(image, &subres, aspect, lvl, layer), + .StateBefore = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT) ? att->stencil.last : att->last, + .StateAfter = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT) ? att->stencil.after : att->after, + }, + }; + + if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } + } + } +} + +static void +dzn_cmd_buffer_resolve_attachment(struct dzn_cmd_buffer *cmdbuf, uint32_t i) +{ + const struct dzn_subpass *subpass = + &cmdbuf->state.pass->subpasses[cmdbuf->state.subpass]; + + if (subpass->resolve[i].idx == VK_ATTACHMENT_UNUSED) + return; + + const struct dzn_framebuffer *framebuffer = cmdbuf->state.framebuffer; + struct dzn_image_view *src = framebuffer->attachments[subpass->colors[i].idx]; + struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk); + struct dzn_image_view *dst = framebuffer->attachments[subpass->resolve[i].idx]; + struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk); + D3D12_RESOURCE_BARRIER barriers[2]; + uint32_t barrier_count = 0; + + /* TODO: 2DArrays/3D */ + if (subpass->colors[i].during != D3D12_RESOURCE_STATE_RESOLVE_SOURCE) { + barriers[barrier_count++] = (D3D12_RESOURCE_BARRIER) { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = src_img->res, + .Subresource = 0, + .StateBefore = subpass->colors[i].during, + .StateAfter = D3D12_RESOURCE_STATE_RESOLVE_SOURCE, + }, + }; + } + + if (subpass->resolve[i].during != D3D12_RESOURCE_STATE_RESOLVE_DEST) { + barriers[barrier_count++] = (D3D12_RESOURCE_BARRIER) { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = dst_img->res, + .Subresource = 0, + .StateBefore = subpass->resolve[i].during, + .StateAfter = D3D12_RESOURCE_STATE_RESOLVE_DEST, + }, + }; + } + + if (barrier_count) + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, barrier_count, barriers); + + ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist, dst_img->res, 0, + src_img->res, 0, + dst->srv_desc.Format); + + for (uint32_t b = 0; b < barrier_count; b++) + DZN_SWAP(D3D12_RESOURCE_STATES, barriers[b].Transition.StateBefore, barriers[b].Transition.StateAfter); + + if (barrier_count) + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, barrier_count, barriers); +} + +static void +dzn_cmd_buffer_begin_subpass(struct dzn_cmd_buffer *cmdbuf) +{ + struct dzn_framebuffer *framebuffer = cmdbuf->state.framebuffer; + struct dzn_render_pass *pass = cmdbuf->state.pass; + const struct dzn_subpass *subpass = &pass->subpasses[cmdbuf->state.subpass]; + + D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { 0 }; + D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 }; + + for (uint32_t i = 0; i < subpass->color_count; i++) { + if (subpass->colors[i].idx == VK_ATTACHMENT_UNUSED) continue; + + struct dzn_image_view *iview = framebuffer->attachments[subpass->colors[i].idx]; + struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk); + + rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc); + } + + if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) { + struct dzn_image_view *iview = framebuffer->attachments[subpass->zs.idx]; + struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk); + + zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc); + } + + ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, subpass->color_count, + subpass->color_count ? rt_handles : NULL, + FALSE, zs_handle.ptr ? &zs_handle : NULL); + + for (uint32_t i = 0; i < subpass->color_count; i++) + dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->colors[i]); + for (uint32_t i = 0; i < subpass->input_count; i++) + dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->inputs[i]); + + if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) + dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->zs); +} + +static void +dzn_cmd_buffer_end_subpass(struct dzn_cmd_buffer *cmdbuf) +{ + const struct dzn_subpass *subpass = &cmdbuf->state.pass->subpasses[cmdbuf->state.subpass]; + + for (uint32_t i = 0; i < subpass->color_count; i++) + dzn_cmd_buffer_resolve_attachment(cmdbuf, i); +} + +static void +dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) +{ + const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline; + + if (!pipeline) + return; + + if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) { + if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { + const struct dzn_graphics_pipeline *gfx = + (const struct dzn_graphics_pipeline *)pipeline; + ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig); + ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology); + } else { + ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig); + } + } + + if (cmdbuf->state.pipeline != pipeline) { + ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state); + cmdbuf->state.pipeline = pipeline; + } +} + +static void +dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + struct dzn_descriptor_state *desc_state = + &cmdbuf->state.bindpoint[bindpoint].desc_state; + struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = { + desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV], + desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] + }; + uint32_t new_heap_offsets[NUM_POOL_TYPES] = { 0 }; + bool update_root_desc_table[NUM_POOL_TYPES] = { 0 }; + const struct dzn_pipeline *pipeline = + cmdbuf->state.bindpoint[bindpoint].pipeline; + + if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_HEAPS)) + goto set_heaps; + + dzn_foreach_pool_type (type) { + uint32_t desc_count = pipeline->desc_count[type]; + if (!desc_count) + continue; + + struct dzn_descriptor_heap_pool *pool = + type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ? + &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool; + uint32_t dst_offset = 0; + struct dzn_descriptor_heap *dst_heap = NULL; + uint32_t dst_heap_offset = 0; + + dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count, + &dst_heap, &dst_heap_offset); + new_heap_offsets[type] = dst_heap_offset; + update_root_desc_table[type] = true; + + for (uint32_t s = 0; s < MAX_SETS; s++) { + const struct dzn_descriptor_set *set = desc_state->sets[s].set; + if (!set) continue; + + uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type]; + uint32_t set_desc_count = pipeline->sets[s].range_desc_count[type]; + if (set_desc_count) { + mtx_lock(&set->pool->defragment_lock); + dzn_descriptor_heap_copy(dst_heap, dst_heap_offset + set_heap_offset, + &set->pool->heaps[type], set->heap_offsets[type], + set_desc_count); + mtx_unlock(&set->pool->defragment_lock); + } + + if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) { + uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count; + for (uint32_t o = 0; o < dynamic_buffer_count; o++) { + uint32_t desc_heap_offset = + pipeline->sets[s].dynamic_buffer_heap_offsets[o].srv; + struct dzn_buffer_desc bdesc = set->dynamic_buffers[o]; + bdesc.offset += desc_state->sets[s].dynamic_offsets[o]; + + dzn_descriptor_heap_write_buffer_desc(dst_heap, + dst_heap_offset + set_heap_offset + desc_heap_offset, + false, &bdesc); + + if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav != ~0) { + desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav; + dzn_descriptor_heap_write_buffer_desc(dst_heap, + dst_heap_offset + set_heap_offset + desc_heap_offset, + true, &bdesc); + } + } + } + } + + new_heaps[type] = dst_heap; + } + +set_heaps: + if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] || + new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) { + ID3D12DescriptorHeap *desc_heaps[2]; + uint32_t num_desc_heaps = 0; + if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) + desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap; + if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) + desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap; + ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps); + + for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++) + cmdbuf->state.heaps[h] = new_heaps[h]; + } + + for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) { + D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r]; + + if (!update_root_desc_table[type]) + continue; + + D3D12_GPU_DESCRIPTOR_HANDLE handle = + dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]); + + if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) + ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle); + else + ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle); + } +} + +static void +dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) +{ + if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS)) + return; + + const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline; + uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx; + + if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { + ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx, + sizeof(cmdbuf->state.sysvals.gfx) / 4, + &cmdbuf->state.sysvals.gfx, 0); + } else { + ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx, + sizeof(cmdbuf->state.sysvals.compute) / 4, + &cmdbuf->state.sysvals.compute, 0); + } +} + +static void +dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf) +{ + const struct dzn_graphics_pipeline *pipeline = + (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; + + if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) || + !pipeline->vp.count) + return; + + ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports); +} + +static void +dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf) +{ + const struct dzn_graphics_pipeline *pipeline = + (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; + + if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS)) + return; + + if (!pipeline->scissor.count) { + /* Apply a scissor delimiting the render area. */ + ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render_area); + return; + } + + D3D12_RECT scissors[MAX_SCISSOR]; + uint32_t scissor_count = pipeline->scissor.count; + + memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count); + for (uint32_t i = 0; i < pipeline->scissor.count; i++) { + scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render_area.left); + scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render_area.top); + scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render_area.right); + scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render_area.bottom); + } + + ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors); +} + +static void +dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf) +{ + const struct dzn_graphics_pipeline *pipeline = + (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; + unsigned start, end; + + BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS) + ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, cmdbuf->state.vb.views); + + BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS); +} + +static void +dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf) +{ + if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB)) + return; + + ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view); +} + +static void +dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) +{ + struct dzn_cmd_buffer_push_constant_state *state = + bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ? + &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute; + + uint32_t offset = state->offset / 4; + uint32_t end = ALIGN(state->end, 4) / 4; + uint32_t count = end - offset; + + if (!count) + return; + + uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx; + uint32_t *vals = state->values + offset; + + if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) + ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset); + else + ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset); + + state->offset = 0; + state->end = 0; +} + +static void +dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf) +{ + if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) { + const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; + uint32_t ref = + gfx->zsa.stencil_test.front.uses_ref ? + cmdbuf->state.zsa.stencil_test.front.ref : + cmdbuf->state.zsa.stencil_test.back.ref; + ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref); + } +} + +static void +dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf) +{ + if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS) + ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist, + cmdbuf->state.blend.constants); +} + +static VkResult +dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4; + uint32_t triangle_count = MAX2(*vertex_count, 2) - 2; + + *vertex_count = triangle_count * 3; + if (!*vertex_count) + return VK_SUCCESS; + + ID3D12Resource *index_buf; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size, + D3D12_HEAP_TYPE_UPLOAD, + D3D12_RESOURCE_STATE_GENERIC_READ, + &index_buf); + if (result != VK_SUCCESS) + return result; + + void *cpu_ptr; + ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr); + + /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */ + if (index_size == 2) { + uint16_t *indices = (uint16_t *)cpu_ptr; + for (uint32_t t = 0; t < triangle_count; t++) { + indices[t * 3] = t + 1; + indices[(t * 3) + 1] = t + 2; + indices[(t * 3) + 2] = 0; + } + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT; + } else { + uint32_t *indices = (uint32_t *)cpu_ptr; + for (uint32_t t = 0; t < triangle_count; t++) { + indices[t * 3] = t + 1; + indices[(t * 3) + 1] = t + 2; + indices[(t * 3) + 2] = 0; + } + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; + } + + cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size; + cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf); + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + return VK_SUCCESS; +} + +static VkResult +dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf, + uint32_t *index_count, + uint32_t *first_index) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + uint32_t triangle_count = MAX2(*index_count, 2) - 2; + + *index_count = triangle_count * 3; + if (!*index_count) + return VK_SUCCESS; + + /* New index is always 32bit to make the compute shader rewriting the + * index simpler */ + ID3D12Resource *new_index_buf; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + &new_index_buf); + if (result != VK_SUCCESS) + return result; + + D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu = + cmdbuf->state.ib.view.BufferLocation; + + enum dzn_index_type index_type = + dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format); + const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index = + &device->triangle_fan[index_type]; + + const struct dzn_pipeline *compute_pipeline = + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; + + struct dzn_triangle_fan_rewrite_index_params params = { + .first_index = *first_index, + }; + + ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig); + ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state); + ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf)); + ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4, + ¶ms, 0); + ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu); + ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1); + + D3D12_RESOURCE_BARRIER post_barriers[] = { + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + /* Transition the exec buffer to indirect arg so it can be + * pass to ExecuteIndirect() as an argument buffer. + */ + .Transition = { + .pResource = new_index_buf, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + .StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER, + }, + }, + }; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, ARRAY_SIZE(post_barriers), post_barriers); + + /* We don't mess up with the driver state when executing our internal + * compute shader, but we still change the D3D12 state, so let's mark + * things dirty if needed. + */ + cmdbuf->state.pipeline = NULL; + if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) { + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= + DZN_CMD_BINDPOINT_DIRTY_PIPELINE; + } + + cmdbuf->state.ib.view.SizeInBytes = *index_count * 4; + cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf); + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + *first_index = 0; + return VK_SUCCESS; +} + +static void +dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed) +{ + dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); + dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); + dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); + dzn_cmd_buffer_update_viewports(cmdbuf); + dzn_cmd_buffer_update_scissors(cmdbuf); + dzn_cmd_buffer_update_vbviews(cmdbuf); + dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); + dzn_cmd_buffer_update_zsa(cmdbuf); + dzn_cmd_buffer_update_blend_constants(cmdbuf); + + if (indexed) + dzn_cmd_buffer_update_ibview(cmdbuf); + + /* Reset the dirty states */ + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty = 0; + cmdbuf->state.dirty = 0; +} + +static uint32_t +dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed) +{ + struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; + + if (!pipeline->ia.triangle_fan) + return 0; + + uint32_t max_triangles; + + if (indexed) { + uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2; + uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size; + + max_triangles = MAX2(max_indices, 2) - 2; + } else { + uint32_t max_vertex = 0; + for (uint32_t i = 0; i < pipeline->vb.count; i++) { + max_vertex = + MAX2(max_vertex, + cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes); + } + + max_triangles = MAX2(max_vertex, 2) - 2; + } + + return max_triangles * 3; +} + +static void +dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf, + struct dzn_buffer *draw_buf, + size_t draw_buf_offset, + uint32_t draw_count, + uint32_t draw_buf_stride, + bool indexed) +{ + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; + bool triangle_fan = pipeline->ia.triangle_fan; + uint32_t min_draw_buf_stride = + indexed ? + sizeof(struct dzn_indirect_indexed_draw_params) : + sizeof(struct dzn_indirect_draw_params); + + draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride; + assert(draw_buf_stride >= min_draw_buf_stride); + assert((draw_buf_stride & 3) == 0); + + uint32_t sysvals_stride = ALIGN_POT(sizeof(cmdbuf->state.sysvals.gfx), 256); + uint32_t exec_buf_stride = 32; + uint32_t triangle_fan_index_buf_stride = + dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, indexed) * + sizeof(uint32_t); + uint32_t triangle_fan_exec_buf_stride = + sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params); + ID3D12Resource *exec_buf; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, draw_count * exec_buf_stride, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + &exec_buf); + if (result != VK_SUCCESS) + return; + + D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu = + ID3D12Resource_GetGPUVirtualAddress(draw_buf->res) + draw_buf_offset; + ID3D12Resource *triangle_fan_index_buf = NULL; + ID3D12Resource *triangle_fan_exec_buf = NULL; + + if (triangle_fan_index_buf_stride) { + result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, + draw_count * triangle_fan_index_buf_stride, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + &triangle_fan_index_buf); + if (result != VK_SUCCESS) + return; + + result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, + draw_count * triangle_fan_exec_buf_stride, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + &triangle_fan_exec_buf); + if (result != VK_SUCCESS) + return; + } + + struct dzn_indirect_draw_triangle_fan_rewrite_params params = { + .draw_buf_stride = draw_buf_stride, + .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride, + .triangle_fan_index_buf_start = + triangle_fan_index_buf ? + ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0, + }; + uint32_t params_size = + triangle_fan_index_buf_stride > 0 ? + sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) : + sizeof(struct dzn_indirect_draw_rewrite_params); + + enum dzn_indirect_draw_type draw_type; + + if (indexed && triangle_fan_index_buf_stride > 0) + draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN; + else if (!indexed && triangle_fan_index_buf_stride > 0) + draw_type = DZN_INDIRECT_DRAW_TRIANGLE_FAN; + else if (indexed) + draw_type = DZN_INDIRECT_INDEXED_DRAW; + else + draw_type = DZN_INDIRECT_DRAW; + + struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type]; + + const struct dzn_pipeline *compute_pipeline = + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; + + ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig); + ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state); + ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 0, params_size / 4, (const void *)¶ms, 0); + ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 1, draw_buf_gpu); + ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 2, ID3D12Resource_GetGPUVirtualAddress(exec_buf)); + if (triangle_fan_exec_buf) + ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 3, ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf)); + + ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, draw_count, 1, 1); + + D3D12_RESOURCE_BARRIER post_barriers[] = { + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + /* Transition the exec buffer to indirect arg so it can be + * pass to ExecuteIndirect() as an argument buffer. + */ + .Transition = { + .pResource = exec_buf, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, + }, + }, + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + /* Transition the exec buffer to indirect arg so it can be + * pass to ExecuteIndirect() as an argument buffer. + */ + .Transition = { + .pResource = triangle_fan_exec_buf, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, + }, + }, + }; + + uint32_t post_barrier_count = triangle_fan_exec_buf ? 2 : 1; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, post_barrier_count, post_barriers); + + D3D12_INDEX_BUFFER_VIEW ib_view = { 0 }; + + if (triangle_fan_exec_buf) { + enum dzn_index_type index_type = + indexed ? + dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format) : + DZN_NO_INDEX; + struct dzn_meta_triangle_fan_rewrite_index *rewrite_index = + &device->triangle_fan[index_type]; + + struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = { 0 }; + + assert(rewrite_index->root_sig); + assert(rewrite_index->pipeline_state); + assert(rewrite_index->cmd_sig); + + ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig); + ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state); + ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf)); + ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(rewrite_index_params) / 4, + (const void *)&rewrite_index_params, 0); + + if (indexed) + ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, cmdbuf->state.ib.view.BufferLocation); + + ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig, + draw_count, triangle_fan_exec_buf, + 0, NULL, 0); + + D3D12_RESOURCE_BARRIER index_buf_barriers[] = { + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = triangle_fan_index_buf, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + .StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER, + }, + }, + }; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, ARRAY_SIZE(index_buf_barriers), index_buf_barriers); + + /* After our triangle-fan lowering the draw is indexed */ + indexed = true; + ib_view = cmdbuf->state.ib.view; + cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf); + cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride; + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + } + + /* We don't mess up with the driver state when executing our internal + * compute shader, but we still change the D3D12 state, so let's mark + * things dirty if needed. + */ + cmdbuf->state.pipeline = NULL; + if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) { + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= + DZN_CMD_BINDPOINT_DIRTY_PIPELINE; + } + + cmdbuf->state.sysvals.gfx.first_vertex = 0; + cmdbuf->state.sysvals.gfx.base_instance = 0; + cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + + dzn_cmd_buffer_prepare_draw(cmdbuf, indexed); + + /* Restore the old IB view if we modified it during the triangle fan lowering */ + if (ib_view.SizeInBytes) { + cmdbuf->state.ib.view = ib_view; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + } + + enum dzn_indirect_draw_cmd_sig_type cmd_sig_type = + triangle_fan_index_buf_stride > 0 ? + DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG : + indexed ? + DZN_INDIRECT_INDEXED_DRAW_CMD_SIG : + DZN_INDIRECT_DRAW_CMD_SIG; + ID3D12CommandSignature *cmdsig = + dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_type); + + if (!cmdsig) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + return; + } + + ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, + draw_count, exec_buf, 0, NULL, 0); +} + +static void +dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf) +{ + dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); + dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); + dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); + dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); + + /* Reset the dirty states */ + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty = 0; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer); + VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer); + + for (int i = 0; i < info->regionCount; i++) { + const VkBufferCopy2 *region = info->pRegions + i; + + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset, + src_buffer->res, region->srcOffset, + region->size); + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + for (int i = 0; i < info->regionCount; i++) { + const VkBufferImageCopy2 *region = info->pRegions + i; + + dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) { + for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++) + dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + for (int i = 0; i < info->regionCount; i++) { + const VkBufferImageCopy2 *region = info->pRegions + i; + + dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) { + for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++) + dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdCopyImage2(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_image, src, info->srcImage); + VK_FROM_HANDLE(dzn_image, dst, info->dstImage); + + assert(src->vk.samples == dst->vk.samples); + + bool requires_temp_res = src->vk.format != dst->vk.format && + src->vk.tiling != VK_IMAGE_TILING_LINEAR && + dst->vk.tiling != VK_IMAGE_TILING_LINEAR; + bool use_blit = false; + if (src->vk.samples > 1) { + use_blit = requires_temp_res; + + for (int i = 0; i < info->regionCount; i++) { + const VkImageCopy2 *region = info->pRegions + i; + if (region->srcOffset.x != 0 || region->srcOffset.y != 0 || + region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) || + region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) || + region->dstOffset.x != 0 || region->dstOffset.y != 0 || + region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) || + region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel)) + use_blit = true; + } + } + + if (use_blit) { + /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is + * is issued on a transfer queue, but we don't have any better option + * right now... + */ + STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount); + + VkBlitImageInfo2 blit_info = { + .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2, + .srcImage = info->srcImage, + .srcImageLayout = info->srcImageLayout, + .dstImage = info->dstImage, + .dstImageLayout = info->dstImageLayout, + .regionCount = info->regionCount, + .pRegions = blit_regions, + .filter = VK_FILTER_NEAREST, + }; + + for (uint32_t r = 0; r < info->regionCount; r++) { + blit_regions[r] = (VkImageBlit2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2, + .srcSubresource = info->pRegions[r].srcSubresource, + .srcOffsets = { + info->pRegions[r].srcOffset, + info->pRegions[r].srcOffset, + }, + .dstSubresource = info->pRegions[r].dstSubresource, + .dstOffsets = { + info->pRegions[r].dstOffset, + info->pRegions[r].dstOffset, + }, + }; + + blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width; + blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height; + blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth; + blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width; + blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height; + blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth; + } + + dzn_CmdBlitImage2(commandBuffer, &blit_info); + + STACK_ARRAY_FINISH(blit_regions); + return; + } + + D3D12_TEXTURE_COPY_LOCATION tmp_loc = { 0 }; + D3D12_RESOURCE_DESC tmp_desc = { + .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = src->desc.Format, + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + .Flags = D3D12_RESOURCE_FLAG_NONE, + }; + + if (requires_temp_res) { + ID3D12Device1 *dev = device->dev; + VkImageAspectFlags aspect = 0; + uint64_t max_size = 0; + + if (vk_format_has_depth(src->vk.format)) + aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + else if (vk_format_has_stencil(src->vk.format)) + aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + else + aspect = VK_IMAGE_ASPECT_COLOR_BIT; + + for (uint32_t i = 0; i < info->regionCount; i++) { + const VkImageCopy2 *region = &info->pRegions[i]; + uint64_t region_size = 0; + + tmp_desc.Format = + dzn_image_get_dxgi_format(src->vk.format, + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + aspect); + tmp_desc.Width = region->extent.width; + tmp_desc.Height = region->extent.height; + + ID3D12Device1_GetCopyableFootprints(dev, &src->desc, + 0, 1, 0, + NULL, NULL, NULL, + ®ion_size); + max_size = MAX2(max_size, region_size * region->extent.depth); + } + + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_COPY_DEST, + &tmp_loc.pResource); + if (result != VK_SUCCESS) + return; + + tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + } + + for (int i = 0; i < info->regionCount; i++) { + const VkImageCopy2 *region = &info->pRegions[i]; + + dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { + for (uint32_t l = 0; l < region->srcSubresource.layerCount; l++) + dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBlitImage2(VkCommandBuffer commandBuffer, + const VkBlitImageInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + + if (info->regionCount == 0) + return; + + uint32_t desc_count = 0; + for (uint32_t r = 0; r < info->regionCount; r++) + desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask); + + struct dzn_descriptor_heap *heap; + uint32_t heap_slot; + VkResult result = + dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device, + desc_count, &heap, &heap_slot); + + if (result != VK_SUCCESS) { + cmdbuf->error = result; + return; + } + + if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) { + ID3D12DescriptorHeap * const heaps[] = { heap->heap }; + cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap; + ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps); + } + + ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + uint32_t heap_offset = 0; + for (uint32_t r = 0; r < info->regionCount; r++) + dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, r); + + cmdbuf->state.pipeline = NULL; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; + if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_PIPELINE; + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdResolveImage2(VkCommandBuffer commandBuffer, + const VkResolveImageInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + + if (info->regionCount == 0) + return; + + uint32_t desc_count = 0; + for (uint32_t r = 0; r < info->regionCount; r++) + desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask); + + struct dzn_descriptor_heap *heap; + uint32_t heap_slot; + VkResult result = + dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device, + desc_count, &heap, &heap_slot); + if (result != VK_SUCCESS) { + cmdbuf->error = result; + return; + } + + if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) { + ID3D12DescriptorHeap * const heaps[] = { heap->heap }; + cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap; + ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps); + } + + ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + uint32_t heap_offset = 0; + for (uint32_t r = 0; r < info->regionCount; r++) + dzn_cmd_buffer_resolve_region(cmdbuf, info, heap, &heap_offset, r); + + cmdbuf->state.pipeline = NULL; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; + if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_PIPELINE; + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdClearColorImage(VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue *pColor, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_image, img, image); + + dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_image, img, image); + + dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDispatch(VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + cmdbuf->state.sysvals.compute.group_count_x = groupCountX; + cmdbuf->state.sysvals.compute.group_count_y = groupCountY; + cmdbuf->state.sysvals.compute.group_count_z = groupCountZ; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + + dzn_cmd_buffer_prepare_dispatch(cmdbuf); + ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdFillBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); + + if (size == VK_WHOLE_SIZE) + size = buf->size - dstOffset; + + size &= ~3ULL; + + ID3D12Resource *src_res; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size, + D3D12_HEAP_TYPE_UPLOAD, + D3D12_RESOURCE_STATE_GENERIC_READ, + &src_res); + if (result != VK_SUCCESS) + return; + + uint32_t *cpu_ptr; + ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); + for (uint32_t i = 0; i < size / 4; i++) + cpu_ptr[i] = data; + + ID3D12Resource_Unmap(src_res, 0, NULL); + + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + const void *data) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); + + if (size == VK_WHOLE_SIZE) + size = buf->size - dstOffset; + + /* + * The spec says: + * 4, or VK_WHOLE_SIZE to fill the range from offset to the end of the + * buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer + * is not a multiple of 4, then the nearest smaller multiple is used." + */ + size &= ~3ULL; + + ID3D12Resource *src_res; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size, + D3D12_HEAP_TYPE_UPLOAD, + D3D12_RESOURCE_STATE_GENERIC_READ, + &src_res); + if (result != VK_SUCCESS) + return; + + void *cpu_ptr; + ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr); + memcpy(cpu_ptr, data, size), + ID3D12Resource_Unmap(src_res, 0, NULL); + + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdClearAttachments(VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment *pAttachments, + uint32_t rectCount, + const VkClearRect *pRects) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + dzn_cmd_buffer_clear_attachments(cmdbuf, attachmentCount, pAttachments, rectCount, pRects); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBeginInfo, + const VkSubpassBeginInfo *pSubpassBeginInfo) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_render_pass, pass, pRenderPassBeginInfo->renderPass); + VK_FROM_HANDLE(dzn_framebuffer, framebuffer, pRenderPassBeginInfo->framebuffer); + + assert(pass->attachment_count == framebuffer->attachment_count); + + cmdbuf->state.framebuffer = framebuffer; + cmdbuf->state.render_area = (D3D12_RECT) { + .left = pRenderPassBeginInfo->renderArea.offset.x, + .top = pRenderPassBeginInfo->renderArea.offset.y, + .right = (LONG)(pRenderPassBeginInfo->renderArea.offset.x + pRenderPassBeginInfo->renderArea.extent.width), + .bottom = (LONG)(pRenderPassBeginInfo->renderArea.offset.y + pRenderPassBeginInfo->renderArea.extent.height), + }; + + // The render area has an impact on the scissor state. + cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; + cmdbuf->state.pass = pass; + cmdbuf->state.subpass = 0; + dzn_cmd_buffer_begin_subpass(cmdbuf); + + uint32_t clear_count = + MIN2(pRenderPassBeginInfo->clearValueCount, framebuffer->attachment_count); + for (int i = 0; i < clear_count; ++i) { + VkImageAspectFlags aspectMask = 0; + + if (vk_format_is_depth_or_stencil(pass->attachments[i].format)) { + if (pass->attachments[i].clear.depth) + aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + if (pass->attachments[i].clear.stencil) + aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + } else if (pass->attachments[i].clear.color) { + aspectMask |= VK_IMAGE_ASPECT_COLOR_BIT; + } + + dzn_cmd_buffer_clear_attachment(cmdbuf, i, &pRenderPassBeginInfo->pClearValues[i], + aspectMask, 0, ~0, 1, &cmdbuf->state.render_area); + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdEndRenderPass2(VkCommandBuffer commandBuffer, + const VkSubpassEndInfo *pSubpassEndInfo) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + dzn_cmd_buffer_end_subpass(cmdbuf); + + for (uint32_t i = 0; i < cmdbuf->state.pass->attachment_count; i++) + dzn_cmd_buffer_attachment_transition(cmdbuf, &cmdbuf->state.pass->attachments[i]); + + cmdbuf->state.framebuffer = NULL; + cmdbuf->state.pass = NULL; + cmdbuf->state.subpass = 0; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdNextSubpass2(VkCommandBuffer commandBuffer, + const VkSubpassBeginInfo *pSubpassBeginInfo, + const VkSubpassEndInfo *pSubpassEndInfo) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + dzn_cmd_buffer_end_subpass(cmdbuf); + assert(cmdbuf->state.subpass + 1 < cmdbuf->state.pass->subpass_count); + cmdbuf->state.subpass++; + dzn_cmd_buffer_begin_subpass(cmdbuf); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBindPipeline(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline pipe) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe); + + cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline; + cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; + if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { + const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline; + + if (!gfx->vp.dynamic) { + memcpy(cmdbuf->state.viewports, gfx->vp.desc, + gfx->vp.count * sizeof(cmdbuf->state.viewports[0])); + cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS; + } + + if (!gfx->scissor.dynamic) { + memcpy(cmdbuf->state.scissors, gfx->scissor.desc, + gfx->scissor.count * sizeof(cmdbuf->state.scissors[0])); + cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; + } + + if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) { + cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref; + cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; + } + + if (!gfx->blend.dynamic_constants) { + memcpy(cmdbuf->state.blend.constants, gfx->blend.constants, + sizeof(cmdbuf->state.blend.constants)); + cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS; + } + + for (uint32_t vb = 0; vb < gfx->vb.count; vb++) + cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb]; + + if (gfx->vb.count > 0) + BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1); + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); + + struct dzn_descriptor_state *desc_state = + &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state; + uint32_t dirty = 0; + + for (uint32_t i = 0; i < descriptorSetCount; i++) { + uint32_t idx = firstSet + i; + VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]); + + if (desc_state->sets[idx].set != set) { + desc_state->sets[idx].set = set; + dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS; + } + + uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count; + if (dynamic_buffer_count) { + assert(dynamicOffsetCount >= dynamic_buffer_count); + + for (uint32_t j = 0; j < dynamic_buffer_count; j++) + desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j]; + + dynamicOffsetCount -= dynamic_buffer_count; + pDynamicOffsets += dynamic_buffer_count; + dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS; + } + } + + cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetViewport(VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport *pViewports) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT); + + for (uint32_t i = 0; i < viewportCount; i++) { + uint32_t vp = i + firstViewport; + + dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]); + + if (pViewports[i].minDepth > pViewports[i].maxDepth) + cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT); + else + cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT); + + if (pViewports[i].height > 0) + cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp); + else + cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp); + } + + if (viewportCount) { + cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetScissor(VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D *pScissors) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + for (uint32_t i = 0; i < scissorCount; i++) + dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]); + + if (scissorCount) + cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, + VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, + const void *pValues) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_cmd_buffer_push_constant_state *states[2]; + uint32_t num_states = 0; + + if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) + states[num_states++] = &cmdbuf->state.push_constant.gfx; + + if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) + states[num_states++] = &cmdbuf->state.push_constant.compute; + + for (uint32_t i = 0; i < num_states; i++) { + memcpy(((char *)states[i]->values) + offset, pValues, size); + + uint32_t current_offset = states[i]->offset; + uint32_t current_end = states[i]->end; + uint32_t end = offset + size; + if (current_end != 0) { + offset = MIN2(current_offset, offset); + end = MAX2(current_end, end); + } + states[i]->offset = offset; + states[i]->end = end; + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDraw(VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; + + cmdbuf->state.sysvals.gfx.first_vertex = firstVertex; + cmdbuf->state.sysvals.gfx.base_instance = firstInstance; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + + if (pipeline->ia.triangle_fan) { + D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; + + VkResult result = + dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount); + if (result != VK_SUCCESS || !vertexCount) + return; + + cmdbuf->state.sysvals.gfx.is_indexed_draw = true; + dzn_cmd_buffer_prepare_draw(cmdbuf, true); + ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0, + firstVertex, firstInstance); + + /* Restore the IB view if we modified it when lowering triangle fans. */ + if (ib_view.SizeInBytes > 0) { + cmdbuf->state.ib.view = ib_view; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + } + } else { + cmdbuf->state.sysvals.gfx.is_indexed_draw = false; + dzn_cmd_buffer_prepare_draw(cmdbuf, false); + ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, + firstVertex, firstInstance); + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; + + cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset; + cmdbuf->state.sysvals.gfx.base_instance = firstInstance; + cmdbuf->state.sysvals.gfx.is_indexed_draw = true; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + + D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; + + if (pipeline->ia.triangle_fan) { + VkResult result = + dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex); + if (result != VK_SUCCESS || !indexCount) + return; + } + + dzn_cmd_buffer_prepare_draw(cmdbuf, true); + ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex, + vertexOffset, firstInstance); + + /* Restore the IB view if we modified it when lowering triangle fans. */ + if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) { + cmdbuf->state.ib.view = ib_view; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, buf, buffer); + + dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset, drawCount, stride, false); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, buf, buffer); + + dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset, drawCount, stride, true); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + if (!bindingCount) + return; + + D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views; + + for (uint32_t i = 0; i < bindingCount; i++) { + VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]); + + vbviews[firstBinding + i].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + pOffsets[i]; + vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i]; + } + + BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding, + firstBinding + bindingCount - 1); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, buf, buffer); + + cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + offset; + cmdbuf->state.ib.view.SizeInBytes = buf->size - offset; + switch (indexType) { + case VK_INDEX_TYPE_UINT16: + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT; + break; + case VK_INDEX_TYPE_UINT32: + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; + break; + default: unreachable("Invalid index type"); + } + + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdResetEvent(VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_event, evt, event); + + if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, (void *)(uintptr_t)DZN_EVENT_STATE_RESET)) + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetEvent(VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_event, evt, event); + + if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, (void *)(uintptr_t)DZN_EVENT_STATE_SET)) + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdWaitEvents(VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent *pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + + /* Intra-command list wait is handle by this pipeline flush, which is + * overkill, but that's the best we can do with the standard D3D12 barrier + * API. + * + * Inter-command list is taken care of by the serialization done at the + * ExecuteCommandList() level: + * "Calling ExecuteCommandLists twice in succession (from the same thread, + * or different threads) guarantees that the first workload (A) finishes + * before the second workload (B)" + * + * HOST -> DEVICE signaling is ignored and we assume events are always + * signaled when we reach the vkCmdWaitEvents() point.: + * "Command buffers in the submission can include vkCmdWaitEvents commands + * that wait on events that will not be signaled by earlier commands in the + * queue. Such events must be signaled by the application using vkSetEvent, + * and the vkCmdWaitEvents commands that wait upon them must not be inside + * a render pass instance. + * The event must be set before the vkCmdWaitEvents command is executed." + */ + bool flush_pipeline = false; + + for (uint32_t i = 0; i < eventCount; i++) { + VK_FROM_HANDLE(dzn_event, event, pEvents[i]); + + struct hash_entry *he = + _mesa_hash_table_search(cmdbuf->events.ht, event); + if (he) { + enum dzn_event_state state = (enum dzn_event_state)(uintptr_t)he->data; + assert(state != DZN_EVENT_STATE_RESET); + flush_pipeline = state == DZN_EVENT_STATE_SET; + } else { + if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, + (void *)(uintptr_t)DZN_EVENT_STATE_EXTERNAL_WAIT)) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return; + } + + struct dzn_event **entry = (struct dzn_event **) + util_dynarray_grow(&cmdbuf->events.wait, struct dzn_event *, 1); + + if (!entry) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return; + } + + *entry = event; + } + } + + if (flush_pipeline) { + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .UAV = { .pResource = NULL }, + }; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBeginQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + struct dzn_cmd_buffer_query_pool_state *state = + dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); + if (!state) + return; + + qpool->queries[query].type = dzn_query_pool_get_query_type(qpool, flags); + dzn_cmd_buffer_dynbitset_clear(cmdbuf, &state->collect, query); + ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdEndQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + struct dzn_cmd_buffer_query_pool_state *state = + dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); + if (!state) + return; + + dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query); + ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, + VkPipelineStageFlags2 stage, + VkQueryPool queryPool, + uint32_t query) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + struct dzn_cmd_buffer_query_pool_state *state = + dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); + if (!state) + return; + + /* Execution barrier so the timestamp gets written after the pipeline flush. */ + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .UAV = { .pResource = NULL }, + }; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + + qpool->queries[query].type = D3D12_QUERY_TYPE_TIMESTAMP; + dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query); + ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); +} + + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + struct dzn_cmd_buffer_query_pool_state *state = + dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); + + if (!state) + return; + + uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); + + for (uint32_t q = 0; q < queryCount; q += q_step) { + uint32_t q_count = MIN2(queryCount - q, q_step); + + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer, + dzn_query_pool_get_availability_offset(qpool, firstQuery + q), + device->queries.refs, + DZN_QUERY_REFS_ALL_ZEROS_OFFSET, + q_count * sizeof(uint64_t)); + } + + q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size; + + for (uint32_t q = 0; q < queryCount; q += q_step) { + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer, + dzn_query_pool_get_result_offset(qpool, firstQuery + q), + device->queries.refs, + DZN_QUERY_REFS_ALL_ZEROS_OFFSET, + qpool->query_size); + } + + dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount); + dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); + + struct dzn_cmd_buffer_query_pool_state *qpstate = + dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); + if (!qpstate) + return; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + for (uint32_t i = 0; i < queryCount; i++) { + if (!dzn_cmd_buffer_dynbitset_test(&qpstate->collect, firstQuery + i) && + !dzn_cmd_buffer_dynbitset_test(&qpstate->signal, firstQuery + i)) + dzn_cmd_buffer_dynbitset_set(cmdbuf, &qpstate->wait, firstQuery + i); + } + } + + VkResult result = + dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount); + if (result != VK_SUCCESS) + return; + + bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) && + stride == qpool->query_size && + !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); +#define ALL_STATS \ + (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT) + if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS && + qpool->pipeline_statistics != ALL_STATS) + raw_copy = false; +#undef ALL_STATS + + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = qpool->collect_buffer, + .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, + .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, + }, + }; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); + + if (raw_copy) { + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, + qpool->collect_buffer, + dzn_query_pool_get_result_offset(qpool, firstQuery), + dzn_query_pool_get_result_size(qpool, queryCount)); + } else { + uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t); + + for (uint32_t q = 0; q < queryCount; q++) { + uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q); + uint32_t dst_counter_offset = 0; + + if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) { + for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) { + if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics)) + continue; + + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset, + qpool->collect_buffer, + res_offset + (c * sizeof(uint64_t)), + step); + dst_counter_offset += step; + } + } else { + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, + qpool->collect_buffer, + res_offset, step); + dst_counter_offset += step; + } + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset, + qpool->collect_buffer, + dzn_query_pool_get_availability_offset(qpool, firstQuery + q), + step); + } + + dstOffset += stride; + } + } + + DZN_SWAP(D3D12_RESOURCE_STATES, barrier.Transition.StateBefore, barrier.Transition.StateAfter); + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); + VK_FROM_HANDLE(dzn_buffer, buf, buffer); + + cmdbuf->state.sysvals.compute.group_count_x = 0; + cmdbuf->state.sysvals.compute.group_count_y = 0; + cmdbuf->state.sysvals.compute.group_count_z = 0; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + + dzn_cmd_buffer_prepare_dispatch(cmdbuf); + + struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; + ID3D12CommandSignature *cmdsig = + dzn_compute_pipeline_get_indirect_cmd_sig(pipeline); + + if (!cmdsig) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return; + } + + ID3D12Resource *exec_buf; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_COPY_DEST, + &exec_buf); + if (result != VK_SUCCESS) + return; + + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0, + buf->res, + offset, + sizeof(D3D12_DISPATCH_ARGUMENTS)); + ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS), + buf->res, + offset, + sizeof(D3D12_DISPATCH_ARGUMENTS)); + D3D12_RESOURCE_BARRIER barriers[] = { + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + /* Transition the exec buffer to indirect arg so it can be + * passed to ExecuteIndirect() as an argument buffer. + */ + .Transition = { + .pResource = exec_buf, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, + .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, + }, + }, + }; + + ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, ARRAY_SIZE(barriers), barriers); + + ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer, + float lineWidth) +{ + assert(lineWidth == 1.0f); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + dzn_stub(); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + memcpy(cmdbuf->state.blend.constants, blendConstants, + sizeof(cmdbuf->state.blend.constants)); + cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist, minDepthBounds, maxDepthBounds); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask; + + cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask; + + cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmdbuf->state.zsa.stencil_test.front.ref = reference; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmdbuf->state.zsa.stencil_test.back.ref = reference; + + cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; +} diff --git a/src/microsoft/vulkan/dzn_cmd_buffer.cpp b/src/microsoft/vulkan/dzn_cmd_buffer.cpp deleted file mode 100644 index 495b0468f7e..00000000000 --- a/src/microsoft/vulkan/dzn_cmd_buffer.cpp +++ /dev/null @@ -1,4281 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" - -#include "vk_alloc.h" -#include "vk_debug_report.h" -#include "vk_format.h" -#include "vk_util.h" - -static void -dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf) -{ - if (!cbuf) - return; - - struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk); - struct dzn_device *device = container_of(cbuf->base.device, struct dzn_device, vk); - - if (cmdbuf->cmdlist) - ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist); - - if (cmdbuf->cmdalloc) - ID3D12CommandAllocator_Release(cmdbuf->cmdalloc); - - list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) { - list_del(&res->link); - ID3D12Resource_Release(res->res); - vk_free(&cbuf->pool->alloc, res); - } - - dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool); - dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool); - dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool); - dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool); - util_dynarray_fini(&cmdbuf->events.wait); - util_dynarray_fini(&cmdbuf->events.signal); - util_dynarray_fini(&cmdbuf->queries.reset); - util_dynarray_fini(&cmdbuf->queries.wait); - util_dynarray_fini(&cmdbuf->queries.signal); - - if (cmdbuf->rtvs.ht) { - hash_table_foreach(cmdbuf->rtvs.ht, he) - vk_free(&cbuf->pool->alloc, he->data); - _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL); - } - - if (cmdbuf->dsvs.ht) { - hash_table_foreach(cmdbuf->dsvs.ht, he) - vk_free(&cbuf->pool->alloc, he->data); - _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL); - } - - if (cmdbuf->events.ht) - _mesa_hash_table_destroy(cmdbuf->events.ht, NULL); - - if (cmdbuf->queries.ht) { - hash_table_foreach(cmdbuf->queries.ht, he) { - struct dzn_cmd_buffer_query_pool_state *qpstate = - (struct dzn_cmd_buffer_query_pool_state *)he->data; - util_dynarray_fini(&qpstate->reset); - util_dynarray_fini(&qpstate->collect); - util_dynarray_fini(&qpstate->wait); - util_dynarray_fini(&qpstate->signal); - vk_free(&cbuf->pool->alloc, he->data); - } - _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL); - } - - vk_command_buffer_finish(&cmdbuf->vk); - vk_free(&cbuf->pool->alloc, cmdbuf); -} - -static uint32_t -dzn_cmd_buffer_rtv_key_hash_function(const void *key) -{ - return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key)); -} - -static bool -dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b) -{ - return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0; -} - -static uint32_t -dzn_cmd_buffer_dsv_key_hash_function(const void *key) -{ - return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key)); -} - -static bool -dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b) -{ - return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0; -} - -static VkResult -dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info, - VkCommandBuffer *out) -{ - VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool); - struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk); - struct dzn_physical_device *pdev = - container_of(device->vk.physical, struct dzn_physical_device, vk); - - assert(pool->queue_family_index < pdev->queue_family_count); - - D3D12_COMMAND_LIST_TYPE type = - pdev->queue_families[pool->queue_family_index].desc.Type; - - struct dzn_cmd_buffer *cmdbuf = (struct dzn_cmd_buffer *) - vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!cmdbuf) - return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY); - - VkResult result = - vk_command_buffer_init(&cmdbuf->vk, pool, info->level); - if (result != VK_SUCCESS) { - vk_free(&pool->alloc, cmdbuf); - return result; - } - - memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); - list_inithead(&cmdbuf->internal_bufs); - util_dynarray_init(&cmdbuf->events.wait, NULL); - util_dynarray_init(&cmdbuf->events.signal, NULL); - util_dynarray_init(&cmdbuf->queries.reset, NULL); - util_dynarray_init(&cmdbuf->queries.wait, NULL); - util_dynarray_init(&cmdbuf->queries.signal, NULL); - dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device, - D3D12_DESCRIPTOR_HEAP_TYPE_RTV, - false, &pool->alloc); - dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device, - D3D12_DESCRIPTOR_HEAP_TYPE_DSV, - false, &pool->alloc); - dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - true, &pool->alloc); - dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device, - D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - true, &pool->alloc); - - cmdbuf->events.ht = - _mesa_pointer_hash_table_create(NULL); - cmdbuf->queries.ht = - _mesa_pointer_hash_table_create(NULL); - cmdbuf->rtvs.ht = - _mesa_hash_table_create(NULL, - dzn_cmd_buffer_rtv_key_hash_function, - dzn_cmd_buffer_rtv_key_equals_function); - cmdbuf->dsvs.ht = - _mesa_hash_table_create(NULL, - dzn_cmd_buffer_dsv_key_hash_function, - dzn_cmd_buffer_dsv_key_equals_function); - if (!cmdbuf->events.ht || !cmdbuf->queries.ht || - !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) { - result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - goto out; - } - - cmdbuf->vk.destroy = dzn_cmd_buffer_destroy; - - if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type, - IID_ID3D12CommandAllocator, - (void **)&cmdbuf->cmdalloc))) { - result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - goto out; - } - - if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0, type, - cmdbuf->cmdalloc, NULL, - IID_ID3D12GraphicsCommandList1, - (void **)&cmdbuf->cmdlist))) { - result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - goto out; - } - -out: - if (result != VK_SUCCESS) - dzn_cmd_buffer_destroy(&cmdbuf->vk); - else - *out = dzn_cmd_buffer_to_handle(cmdbuf); - - return result; -} - -static VkResult -dzn_cmd_buffer_reset(struct dzn_cmd_buffer *cmdbuf) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - const struct dzn_physical_device *pdev = - container_of(device->vk.physical, struct dzn_physical_device, vk); - const struct vk_command_pool *pool = cmdbuf->vk.pool; - - /* Reset the state */ - memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); - - /* TODO: Return resources to the pool */ - list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) { - list_del(&res->link); - ID3D12Resource_Release(res->res); - vk_free(&cmdbuf->vk.pool->alloc, res); - } - - cmdbuf->error = VK_SUCCESS; - util_dynarray_clear(&cmdbuf->events.wait); - util_dynarray_clear(&cmdbuf->events.signal); - util_dynarray_clear(&cmdbuf->queries.reset); - util_dynarray_clear(&cmdbuf->queries.wait); - util_dynarray_clear(&cmdbuf->queries.signal); - hash_table_foreach(cmdbuf->rtvs.ht, he) - vk_free(&cmdbuf->vk.pool->alloc, he->data); - _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL); - dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool); - hash_table_foreach(cmdbuf->dsvs.ht, he) - vk_free(&cmdbuf->vk.pool->alloc, he->data); - _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL); - hash_table_foreach(cmdbuf->queries.ht, he) { - struct dzn_cmd_buffer_query_pool_state *qpstate = - (struct dzn_cmd_buffer_query_pool_state *)he->data; - util_dynarray_fini(&qpstate->reset); - util_dynarray_fini(&qpstate->collect); - util_dynarray_fini(&qpstate->wait); - util_dynarray_fini(&qpstate->signal); - vk_free(&cmdbuf->vk.pool->alloc, he->data); - } - _mesa_hash_table_clear(cmdbuf->queries.ht, NULL); - _mesa_hash_table_clear(cmdbuf->events.ht, NULL); - dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool); - dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool); - dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool); - vk_command_buffer_reset(&cmdbuf->vk); - - /* cmdlist->Reset() doesn't return the memory back the the command list - * allocator, and cmdalloc->Reset() can only be called if there's no live - * cmdlist allocated from the allocator, so we need to release and create - * a new command list. - */ - ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist); - cmdbuf->cmdlist = NULL; - ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc); - D3D12_COMMAND_LIST_TYPE type = - pdev->queue_families[pool->queue_family_index].desc.Type; - if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0, - type, - cmdbuf->cmdalloc, NULL, - IID_ID3D12GraphicsCommandList1, - (void **)&cmdbuf->cmdlist))) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - return cmdbuf->error; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_AllocateCommandBuffers(VkDevice device, - const VkCommandBufferAllocateInfo *pAllocateInfo, - VkCommandBuffer *pCommandBuffers) -{ - VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool); - VK_FROM_HANDLE(dzn_device, dev, device); - VkResult result = VK_SUCCESS; - uint32_t i; - - for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { - result = dzn_cmd_buffer_create(pAllocateInfo, - &pCommandBuffers[i]); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) { - dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool, - i, pCommandBuffers); - for (i = 0; i < pAllocateInfo->commandBufferCount; i++) - pCommandBuffers[i] = VK_NULL_HANDLE; - } - - return result; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_ResetCommandBuffer(VkCommandBuffer commandBuffer, - VkCommandBufferResetFlags flags) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - return dzn_cmd_buffer_reset(cmdbuf); -} - -VkResult -dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer, - const VkCommandBufferBeginInfo *info) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - /* If this is the first vkBeginCommandBuffer, we must *initialize* the - * command buffer's state. Otherwise, we must *reset* its state. In both - * cases we reset it. - * - * From the Vulkan 1.0 spec: - * - * If a command buffer is in the executable state and the command buffer - * was allocated from a command pool with the - * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then - * vkBeginCommandBuffer implicitly resets the command buffer, behaving - * as if vkResetCommandBuffer had been called with - * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts - * the command buffer in the recording state. - */ - return dzn_cmd_buffer_reset(cmdbuf); -} - -static void -dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - - if (cmdbuf->error != VK_SUCCESS) - goto out; - - hash_table_foreach(cmdbuf->events.ht, he) { - enum dzn_event_state state = (enum dzn_event_state)(uintptr_t)he->data; - - if (state != DZN_EVENT_STATE_EXTERNAL_WAIT) { - struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state == DZN_EVENT_STATE_SET }; - struct dzn_cmd_event_signal *entry = (struct dzn_cmd_event_signal *) - util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1); - - if (!entry) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - break; - } - - *entry = signal; - } - } - -out: - _mesa_hash_table_clear(cmdbuf->events.ht, NULL); -} - -static VkResult -dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - - if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS) - return VK_SUCCESS; - - unsigned old_sz = array->size; - void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS); - if (!ptr) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - return cmdbuf->error; - } - - memset(ptr, 0, array->size - old_sz); - return VK_SUCCESS; -} - -static bool -dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit) -{ - uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS; - - if (bit < nbits) - return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit); - - return false; -} - -static VkResult -dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - - VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit); - if (result != VK_SUCCESS) - return result; - - BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit); - return VK_SUCCESS; -} - -static void -dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - - if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS) - return; - - BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit); -} - -static VkResult -dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf, - struct util_dynarray *array, - uint32_t bit, uint32_t count) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - - VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1); - if (result != VK_SUCCESS) - return result; - - BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1); - return VK_SUCCESS; -} - -static void -dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf, - struct util_dynarray *array, - uint32_t bit, uint32_t count) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS; - - if (!nbits) - return; - - uint32_t end = MIN2(bit + count, nbits) - 1; - - while (bit <= end) { - uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32)); - BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1); - bit += subcount; - } -} - -static struct dzn_cmd_buffer_query_pool_state * -dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - struct dzn_cmd_buffer_query_pool_state *state = (struct dzn_cmd_buffer_query_pool_state *) - vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!state) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - return NULL; - } - - util_dynarray_init(&state->reset, NULL); - util_dynarray_init(&state->collect, NULL); - util_dynarray_init(&state->wait, NULL); - util_dynarray_init(&state->signal, NULL); - return state; -} - -static void -dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf, - struct dzn_cmd_buffer_query_pool_state *state) -{ - util_dynarray_fini(&state->reset); - util_dynarray_fini(&state->collect); - util_dynarray_fini(&state->wait); - util_dynarray_fini(&state->signal); - vk_free(&cmdbuf->vk.pool->alloc, state); -} - -static struct dzn_cmd_buffer_query_pool_state * -dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf, - struct dzn_query_pool *qpool) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - struct dzn_cmd_buffer_query_pool_state *state = NULL; - struct hash_entry *he = - _mesa_hash_table_search(cmdbuf->queries.ht, qpool); - - if (!he) { - state = dzn_cmd_buffer_create_query_pool_state(cmdbuf); - if (!state) - return NULL; - - he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state); - if (!he) { - dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state); - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - return NULL; - } - } else { - state = (struct dzn_cmd_buffer_query_pool_state *)he->data; - } - - return state; -} - -static VkResult -dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_query_pool *qpool, - struct dzn_cmd_buffer_query_pool_state *state, - uint32_t first_query, - uint32_t query_count) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - uint32_t nbits = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS; - uint32_t start, end; - - query_count = MIN2(query_count, nbits - first_query); - nbits = MIN2(first_query + query_count, nbits); - - VkResult result = - dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1); - if (result != VK_SUCCESS) - return result; - - BITSET_WORD *collect = - util_dynarray_element(&state->collect, BITSET_WORD, 0); - for (start = first_query, end = first_query, - __bitset_next_range(&start, &end, collect, nbits); - start < nbits; - __bitset_next_range(&start, &end, collect, nbits)) { - ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist, - qpool->heap, - qpool->queries[start].type, - start, end - start, - qpool->resolve_buffer, - qpool->query_size * start); - } - - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = qpool->resolve_buffer, - .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, - .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, - }, - }; - uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query); - uint32_t size = dzn_query_pool_get_result_size(qpool, query_count); - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, - qpool->collect_buffer, offset, - qpool->resolve_buffer, offset, - size); - - for (start = first_query, end = first_query, - __bitset_next_range(&start, &end, collect, nbits); - start < nbits; - __bitset_next_range(&start, &end, collect, nbits)) { - uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); - uint32_t count = end - start; - - for (unsigned i = 0; i < count; i+= step) { - uint32_t sub_count = MIN2(step, count - i); - - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, - qpool->collect_buffer, - dzn_query_pool_get_availability_offset(qpool, start + i), - device->queries.refs, - DZN_QUERY_REFS_ALL_ONES_OFFSET, - sizeof(uint64_t) * sub_count); - } - - dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count); - dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, start, count); - } - - DZN_SWAP(D3D12_RESOURCE_STATES, barrier.Transition.StateBefore, barrier.Transition.StateAfter); - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - return VK_SUCCESS; -} - -static VkResult -dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf, - struct dzn_query_pool *qpool, - struct util_dynarray *bitset_array, - struct util_dynarray *ops_array) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0); - uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS; - uint32_t start, end; - - BITSET_FOREACH_RANGE(start, end, bitset, nbits) { - struct dzn_cmd_buffer_query_range range = { qpool, start, end - start }; - struct dzn_cmd_buffer_query_range *entry = (struct dzn_cmd_buffer_query_range *) - util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1); - - if (!entry) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - return cmdbuf->error; - } - - *entry = range; - } - - return VK_SUCCESS; -} - -static VkResult -dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf) -{ - hash_table_foreach(cmdbuf->queries.ht, he) { - struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key; - struct dzn_cmd_buffer_query_pool_state *state = - (struct dzn_cmd_buffer_query_pool_state *)he->data; - VkResult result = - dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count); - if (result != VK_SUCCESS) - return result; - - result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset); - if (result != VK_SUCCESS) - return result; - - result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->wait, &cmdbuf->queries.wait); - if (result != VK_SUCCESS) - return result; - - result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal); - if (result != VK_SUCCESS) - return result; - } - - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_EndCommandBuffer(VkCommandBuffer commandBuffer) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - dzn_cmd_buffer_gather_events(cmdbuf); - dzn_cmd_buffer_gather_queries(cmdbuf); - HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist); - if (FAILED(hres)) - cmdbuf->error = vk_error(cmdbuf->vk.base.device, VK_ERROR_OUT_OF_HOST_MEMORY); - } else { - cmdbuf->error = cmdbuf->vk.cmd_queue.error; - } - - return cmdbuf->error; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, - const VkDependencyInfo *info) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - bool execution_barrier = - !info->memoryBarrierCount && - !info->bufferMemoryBarrierCount && - !info->imageMemoryBarrierCount; - - if (execution_barrier) { - /* Execution barrier can be emulated with a NULL UAV barrier (AKA - * pipeline flush). That's the best we can do with the standard D3D12 - * barrier API. - */ - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .UAV = { .pResource = NULL }, - }; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } - - /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers. - * Scopes are not taken into account, but that's inherent to the current - * D3D12 barrier API. - */ - if (info->memoryBarrierCount) { - D3D12_RESOURCE_BARRIER barriers[2] = {}; - - barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barriers[0].UAV.pResource = NULL; - barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; - barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barriers[1].Aliasing.pResourceBefore = NULL; - barriers[1].Aliasing.pResourceAfter = NULL; - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers); - } - - for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) { - VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer); - D3D12_RESOURCE_BARRIER barrier = {}; - - /* UAV are used only for storage buffers, skip all other buffers. */ - if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)) - continue; - - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.UAV.pResource = buf->res; - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } - - for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) { - const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i]; - const VkImageSubresourceRange *range = &ibarrier->subresourceRange; - VK_FROM_HANDLE(dzn_image, image, ibarrier->image); - - /* We use placed resource's simple model, in which only one resource - * pointing to a given heap is active at a given time. To make the - * resource active we need to add an aliasing barrier. - */ - D3D12_RESOURCE_BARRIER aliasing_barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Aliasing = { - .pResourceBefore = NULL, - .pResourceAfter = image->res, - }, - }; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &aliasing_barrier); - - dzn_foreach_aspect(aspect, range->aspectMask) { - D3D12_RESOURCE_BARRIER transition_barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = image->res, - .StateAfter = dzn_image_layout_to_state(ibarrier->newLayout, aspect), - }, - }; - - if (ibarrier->oldLayout == VK_IMAGE_LAYOUT_UNDEFINED || - ibarrier->oldLayout == VK_IMAGE_LAYOUT_PREINITIALIZED) { - transition_barrier.Transition.StateBefore = image->mem->initial_state; - } else { - transition_barrier.Transition.StateBefore = - dzn_image_layout_to_state(ibarrier->oldLayout, aspect); - } - - if (transition_barrier.Transition.StateBefore == transition_barrier.Transition.StateAfter) - continue; - - /* some layouts map to the same states, and NOP-barriers are illegal */ - uint32_t layer_count = dzn_get_layer_count(image, range); - uint32_t level_count = dzn_get_level_count(image, range); - for (uint32_t layer = 0; layer < layer_count; layer++) { - for (uint32_t lvl = 0; lvl < level_count; lvl++) { - transition_barrier.Transition.Subresource = - dzn_image_range_get_subresource_index(image, range, aspect, lvl, layer); - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &transition_barrier); - } - } - } - } -} - -static D3D12_CPU_DESCRIPTOR_HANDLE -dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_image *image, - const D3D12_DEPTH_STENCIL_VIEW_DESC *desc) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - struct dzn_cmd_buffer_dsv_key key = { image, *desc }; - struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key); - struct dzn_cmd_buffer_dsv_entry *dsve; - - if (!he) { - struct dzn_descriptor_heap *heap; - uint32_t slot; - - // TODO: error handling - dsve = (struct dzn_cmd_buffer_dsv_entry *) - vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - dsve->key = key; - dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot); - dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot); - ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle); - _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve); - } else { - dsve = (struct dzn_cmd_buffer_dsv_entry *)he->data; - } - - return dsve->handle; -} - -static D3D12_CPU_DESCRIPTOR_HANDLE -dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_image *image, - const D3D12_RENDER_TARGET_VIEW_DESC *desc) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - struct dzn_cmd_buffer_rtv_key key = { image, *desc }; - struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key); - struct dzn_cmd_buffer_rtv_entry *rtve; - - if (!he) { - struct dzn_descriptor_heap *heap; - uint32_t slot; - - // TODO: error handling - rtve = (struct dzn_cmd_buffer_rtv_entry *) - vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - rtve->key = key; - dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot); - rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot); - ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle); - he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve); - } else { - rtve = (struct dzn_cmd_buffer_rtv_entry *)he->data; - } - - return rtve->handle; -} - -static VkResult -dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf, - uint32_t size, - D3D12_HEAP_TYPE heap_type, - D3D12_RESOURCE_STATES init_state, - ID3D12Resource **out) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - ID3D12Resource *res; - *out = NULL; - - /* Align size on 64k (the default alignment) */ - size = ALIGN_POT(size, 64 * 1024); - - D3D12_HEAP_PROPERTIES hprops; - ID3D12Device1_GetCustomHeapProperties(device->dev, &hprops, 0, heap_type); - D3D12_RESOURCE_DESC rdesc = { - .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, - .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, - .Width = size, - .Height = 1, - .DepthOrArraySize = 1, - .MipLevels = 1, - .Format = DXGI_FORMAT_UNKNOWN, - .SampleDesc = { .Count = 1, .Quality = 0 }, - .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, - }; - - HRESULT hres = - ID3D12Device1_CreateCommittedResource(device->dev, &hprops, - D3D12_HEAP_FLAG_NONE, &rdesc, - init_state, NULL, - IID_ID3D12Resource, - (void **)&res); - if (FAILED(hres)) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - return cmdbuf->error; - } - - struct dzn_internal_resource *entry = (struct dzn_internal_resource *) - vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!entry) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - ID3D12Resource_Release(res); - return cmdbuf->error; - } - - entry->res = res; - list_addtail(&entry->link, &cmdbuf->internal_bufs); - *out = entry->res; - return VK_SUCCESS; -} - -static void -dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_image *image, - VkImageLayout layout, - const VkClearColorValue *color, - const VkImageSubresourceRange *range, - uint32_t rect_count, D3D12_RECT *rects) -{ - enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); - uint32_t blksize = util_format_get_blocksize(pfmt); - uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = {}; - uint32_t raw[4] = {}; - - assert(blksize <= sizeof(raw)); - assert(!(sizeof(buf) % blksize)); - - util_format_write_4(pfmt, (void *)color, 0, (void *)raw, 0, 0, 0, 1, 1); - - uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; - while (fill_step % blksize) - fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; - - uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel); - uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel); - uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step); - uint32_t res_size = max_h * row_pitch; - - assert(fill_step <= sizeof(buf)); - - for (uint32_t i = 0; i < fill_step; i += blksize) - memcpy(&buf[i], raw, blksize); - - ID3D12Resource *src_res; - - VkResult result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size, - D3D12_HEAP_TYPE_UPLOAD, - D3D12_RESOURCE_STATE_GENERIC_READ, - &src_res); - if (result != VK_SUCCESS) - return; - - assert(!(res_size % fill_step)); - - uint8_t *cpu_ptr; - ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); - for (uint32_t i = 0; i < res_size; i += fill_step) - memcpy(&cpu_ptr[i], buf, fill_step); - - ID3D12Resource_Unmap(src_res, 0, NULL); - - D3D12_TEXTURE_COPY_LOCATION src_loc = { - .pResource = src_res, - .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, - .PlacedFootprint = { - .Offset = 0, - .Footprint = { - .Width = max_w, - .Height = max_h, - .Depth = 1, - .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step), - }, - }, - }; - - D3D12_RESOURCE_STATES dst_state = - dzn_image_layout_to_state(layout, VK_IMAGE_ASPECT_COLOR_BIT); - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = src_res, - .StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ, - .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, - }, - }; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - - barrier.Transition.pResource = image->res; - - assert(dzn_get_level_count(image, range) == 1); - uint32_t layer_count = dzn_get_layer_count(image, range); - - dzn_foreach_aspect(aspect, range->aspectMask) { - VkImageSubresourceLayers subres = { - .aspectMask = (VkImageAspectFlags)aspect, - .mipLevel = range->baseMipLevel, - .baseArrayLayer = range->baseArrayLayer, - .layerCount = layer_count, - }; - - for (uint32_t layer = 0; layer < layer_count; layer++) { - if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { - barrier.Transition.Subresource = - dzn_image_range_get_subresource_index(image, range, aspect, 0, layer); - barrier.Transition.StateBefore = dst_state; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } - - D3D12_TEXTURE_COPY_LOCATION dst_loc = - dzn_image_get_copy_loc(image, &subres, aspect, layer); - - src_loc.PlacedFootprint.Footprint.Format = - dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ? - dst_loc.PlacedFootprint.Footprint.Format : - image->desc.Format; - - for (uint32_t r = 0; r < rect_count; r++) { - D3D12_BOX src_box = { - .left = 0, - .top = 0, - .front = 0, - .right = (UINT)(rects[r].right - rects[r].left), - .bottom = (UINT)(rects[r].bottom - rects[r].top), - .back = 1, - }; - - ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, - &dst_loc, - rects[r].left, - rects[r].top, 0, - &src_loc, - &src_box); - } - - if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { - barrier.Transition.StateAfter = dst_state; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } - } - } -} - -static VkClearColorValue -adjust_clear_color(VkFormat format, const VkClearColorValue *col) -{ - VkClearColorValue out = *col; - - // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things - // manually where it matters, like here, in the clear path. - if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { - DZN_SWAP(float, out.float32[0], out.float32[1]); - DZN_SWAP(float, out.float32[2], out.float32[3]); - } - - return out; -} - -static void -dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_image *image, - VkImageLayout layout, - const VkClearColorValue *color, - uint32_t range_count, - const VkImageSubresourceRange *ranges) -{ - enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); - uint32_t blksize = util_format_get_blocksize(pfmt); - uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = {}; - uint32_t raw[4] = {}; - - assert(blksize <= sizeof(raw)); - assert(!(sizeof(buf) % blksize)); - - util_format_write_4(pfmt, (void *)color, 0, (void *)raw, 0, 0, 0, 1, 1); - - uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; - while (fill_step % blksize) - fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; - - uint32_t res_size = 0; - for (uint32_t r = 0; r < range_count; r++) { - uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel); - uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel); - uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel); - uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step); - - res_size = MAX2(res_size, h * d * row_pitch); - } - - assert(fill_step <= sizeof(buf)); - - for (uint32_t i = 0; i < fill_step; i += blksize) - memcpy(&buf[i], raw, blksize); - - ID3D12Resource *src_res; - - VkResult result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size, - D3D12_HEAP_TYPE_UPLOAD, - D3D12_RESOURCE_STATE_GENERIC_READ, - &src_res); - if (result != VK_SUCCESS) - return; - - assert(!(res_size % fill_step)); - - uint8_t *cpu_ptr; - ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); - for (uint32_t i = 0; i < res_size; i += fill_step) - memcpy(&cpu_ptr[i], buf, fill_step); - - ID3D12Resource_Unmap(src_res, 0, NULL); - - D3D12_TEXTURE_COPY_LOCATION src_loc = { - .pResource = src_res, - .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, - .PlacedFootprint = { - .Offset = 0, - }, - }; - - D3D12_RESOURCE_STATES dst_state = - dzn_image_layout_to_state(layout, VK_IMAGE_ASPECT_COLOR_BIT); - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = src_res, - .StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ, - .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, - }, - }; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - - barrier.Transition.pResource = image->res; - for (uint32_t r = 0; r < range_count; r++) { - uint32_t level_count = dzn_get_level_count(image, &ranges[r]); - uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]); - - dzn_foreach_aspect(aspect, ranges[r].aspectMask) { - for (uint32_t lvl = 0; lvl < level_count; lvl++) { - uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl); - uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl); - uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl); - VkImageSubresourceLayers subres = { - .aspectMask = (VkImageAspectFlags)aspect, - .mipLevel = ranges[r].baseMipLevel + lvl, - .baseArrayLayer = ranges[r].baseArrayLayer, - .layerCount = layer_count, - }; - - for (uint32_t layer = 0; layer < layer_count; layer++) { - if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { - barrier.Transition.Subresource = - dzn_image_range_get_subresource_index(image, &ranges[r], aspect, lvl, layer); - barrier.Transition.StateBefore = dst_state; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } - - D3D12_TEXTURE_COPY_LOCATION dst_loc = - dzn_image_get_copy_loc(image, &subres, aspect, layer); - - src_loc.PlacedFootprint.Footprint.Format = - dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ? - dst_loc.PlacedFootprint.Footprint.Format : - image->desc.Format; - src_loc.PlacedFootprint.Footprint.Width = w; - src_loc.PlacedFootprint.Footprint.Height = h; - src_loc.PlacedFootprint.Footprint.Depth = d; - src_loc.PlacedFootprint.Footprint.RowPitch = - ALIGN_NPOT(w * blksize, fill_step); - D3D12_BOX src_box = { - .left = 0, - .top = 0, - .front = 0, - .right = w, - .bottom = h, - .back = d, - }; - - ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0, - &src_loc, &src_box); - - if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { - barrier.Transition.StateAfter = dst_state; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } - } - } - } - } -} - -static void -dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf, - uint32_t idx, - const VkClearValue *value, - VkImageAspectFlags aspects, - uint32_t base_layer, - uint32_t layer_count, - uint32_t rect_count, - D3D12_RECT *rects) -{ - if (idx == VK_ATTACHMENT_UNUSED) - return; - - struct dzn_image_view *view = cmdbuf->state.framebuffer->attachments[idx]; - struct dzn_image *image = container_of(view->vk.image, struct dzn_image, vk); - - VkImageSubresourceRange range = { - .aspectMask = aspects, - .baseMipLevel = view->vk.base_mip_level, - .levelCount = 1, - .baseArrayLayer = view->vk.base_array_layer + base_layer, - .layerCount = layer_count, - }; - bool all_layers = - base_layer == 0 && - (layer_count == view->vk.layer_count || - layer_count == VK_REMAINING_ARRAY_LAYERS); - - if (vk_format_is_depth_or_stencil(view->vk.format)) { - D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; - - if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - flags |= D3D12_CLEAR_FLAG_DEPTH; - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) - flags |= D3D12_CLEAR_FLAG_STENCIL; - - if (flags != 0) { - D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0); - D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc); - ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags, - value->depthStencil.depth, - value->depthStencil.stencil, - rect_count, rects); - } - } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { - VkClearColorValue color = adjust_clear_color(view->vk.format, &value->color); - bool clear_with_cpy = false; - float vals[4]; - - if (vk_format_is_sint(view->vk.format)) { - for (uint32_t i = 0; i < 4; i++) { - vals[i] = color.int32[i]; - if (color.int32[i] != (int32_t)vals[i]) { - clear_with_cpy = true; - break; - } - } - } else if (vk_format_is_uint(view->vk.format)) { - for (uint32_t i = 0; i < 4; i++) { - vals[i] = color.uint32[i]; - if (color.uint32[i] != (uint32_t)vals[i]) { - clear_with_cpy = true; - break; - } - } - } else { - for (uint32_t i = 0; i < 4; i++) - vals[i] = color.float32[i]; - } - - if (clear_with_cpy) { - dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - &value->color, - &range, rect_count, rects); - } else { - D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0); - D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc); - ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects); - } - } -} - -static void -dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_image *image, - VkImageLayout layout, - const VkClearColorValue *col, - uint32_t range_count, - const VkImageSubresourceRange *ranges) -{ - if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { - dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); - return; - } - - VkClearColorValue color = adjust_clear_color(image->vk.format, col); - float clear_vals[4]; - - enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); - - if (util_format_is_pure_sint(pfmt)) { - for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) { - clear_vals[c] = color.int32[c]; - if (color.int32[c] != (int32_t)clear_vals[c]) { - dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); - return; - } - } - } else if (util_format_is_pure_uint(pfmt)) { - for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) { - clear_vals[c] = color.uint32[c]; - if (color.uint32[c] != (uint32_t)clear_vals[c]) { - dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); - return; - } - } - } else { - memcpy(clear_vals, color.float32, sizeof(clear_vals)); - } - - for (uint32_t r = 0; r < range_count; r++) { - const VkImageSubresourceRange *range = &ranges[r]; - uint32_t layer_count = dzn_get_layer_count(image, range); - uint32_t level_count = dzn_get_level_count(image, range); - - for (uint32_t lvl = 0; lvl < level_count; lvl++) { - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = image->res, - .StateBefore = - dzn_image_layout_to_state(layout, VK_IMAGE_ASPECT_COLOR_BIT), - .StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET, - }, - }; - - if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) { - for (uint32_t layer = 0; layer < layer_count; layer++) { - barrier.Transition.Subresource = - dzn_image_range_get_subresource_index(image, range, - VK_IMAGE_ASPECT_COLOR_BIT, - lvl, layer); - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } - } - - VkImageSubresourceRange view_range = *range; - - if (image->vk.image_type == VK_IMAGE_TYPE_3D) { - view_range.baseArrayLayer = 0; - view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl); - } - - D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl); - D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc); - ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL); - - if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) { - DZN_SWAP(D3D12_RESOURCE_STATES, barrier.Transition.StateBefore, barrier.Transition.StateAfter); - - for (uint32_t layer = 0; layer < layer_count; layer++) { - barrier.Transition.Subresource = - dzn_image_range_get_subresource_index(image, range, VK_IMAGE_ASPECT_COLOR_BIT, lvl, layer); - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } - } - } - } -} - -static void -dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_image *image, - VkImageLayout layout, - const VkClearDepthStencilValue *zs, - uint32_t range_count, - const VkImageSubresourceRange *ranges) -{ - assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); - - for (uint32_t r = 0; r < range_count; r++) { - const VkImageSubresourceRange *range = &ranges[r]; - uint32_t layer_count = dzn_get_layer_count(image, range); - uint32_t level_count = dzn_get_level_count(image, range); - - D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; - - if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) - flags |= D3D12_CLEAR_FLAG_DEPTH; - if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) - flags |= D3D12_CLEAR_FLAG_STENCIL; - - for (uint32_t lvl = 0; lvl < level_count; lvl++) { - uint32_t barrier_count = 0; - D3D12_RESOURCE_BARRIER barriers[2]; - VkImageAspectFlagBits barrier_aspects[2]; - - dzn_foreach_aspect(aspect, range->aspectMask) { - barrier_aspects[barrier_count] = aspect; - barriers[barrier_count] = D3D12_RESOURCE_BARRIER { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = image->res, - .StateBefore = dzn_image_layout_to_state(layout, aspect), - .StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE, - }, - }; - - if (barriers[barrier_count].Transition.StateBefore != barriers[barrier_count].Transition.StateAfter) - barrier_count++; - } - - if (barrier_count > 0) { - for (uint32_t layer = 0; layer < layer_count; layer++) { - for (uint32_t b = 0; b < barrier_count; b++) { - barriers[b].Transition.Subresource = - dzn_image_range_get_subresource_index(image, range, barrier_aspects[b], lvl, layer); - } - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, - barrier_count, - barriers); - } - } - - D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl); - D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc); - ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, - handle, flags, - zs->depth, - zs->stencil, - 0, NULL); - - if (barrier_count > 0) { - for (uint32_t b = 0; b < barrier_count; b++) - DZN_SWAP(D3D12_RESOURCE_STATES, barriers[b].Transition.StateBefore, barriers[b].Transition.StateAfter); - - for (uint32_t layer = 0; layer < layer_count; layer++) { - for (uint32_t b = 0; b < barrier_count; b++) { - barriers[b].Transition.Subresource = - dzn_image_range_get_subresource_index(image, range, barrier_aspects[b], lvl, layer); - } - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, - barrier_count, - barriers); - } - } - } - } -} - -static void -dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf, - const VkCopyBufferToImageInfo2 *info, - uint32_t r, - VkImageAspectFlagBits aspect, - uint32_t l) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer); - VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage); - - ID3D12Device1 *dev = device->dev; - ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; - - const VkBufferImageCopy2 *region = &info->pRegions[r]; - enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format); - uint32_t blkh = util_format_get_blockheight(pfmt); - uint32_t blkd = util_format_get_blockdepth(pfmt); - - D3D12_TEXTURE_COPY_LOCATION dst_img_loc = - dzn_image_get_copy_loc(dst_image, ®ion->imageSubresource, aspect, l); - D3D12_TEXTURE_COPY_LOCATION src_buf_loc = - dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, region, aspect, l); - - if (dzn_buffer_supports_region_copy(&src_buf_loc)) { - /* RowPitch and Offset are properly aligned, we can copy - * the whole thing in one call. - */ - D3D12_BOX src_box = { - .left = 0, - .top = 0, - .front = 0, - .right = region->imageExtent.width, - .bottom = region->imageExtent.height, - .back = region->imageExtent.depth, - }; - - ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc, - region->imageOffset.x, - region->imageOffset.y, - region->imageOffset.z, - &src_buf_loc, &src_box); - return; - } - - /* Copy line-by-line if things are not properly aligned. */ - D3D12_BOX src_box = { - .top = 0, - .front = 0, - .bottom = blkh, - .back = blkd, - }; - - for (uint32_t z = 0; z < region->imageExtent.depth; z += blkd) { - for (uint32_t y = 0; y < region->imageExtent.height; y += blkh) { - uint32_t src_x; - - D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc = - dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format, - region, &src_buf_loc, - y, z, &src_x); - - src_box.left = src_x; - src_box.right = src_x + region->imageExtent.width; - ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, - &dst_img_loc, - region->imageOffset.x, - region->imageOffset.y + y, - region->imageOffset.z + z, - &src_buf_line_loc, - &src_box); - } - } -} - -static void -dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf, - const VkCopyImageToBufferInfo2 *info, - uint32_t r, - VkImageAspectFlagBits aspect, - uint32_t l) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_image, src_image, info->srcImage); - VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer); - - ID3D12Device1 *dev = device->dev; - ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; - - const VkBufferImageCopy2 *region = &info->pRegions[r]; - enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format); - uint32_t blkh = util_format_get_blockheight(pfmt); - uint32_t blkd = util_format_get_blockdepth(pfmt); - - D3D12_TEXTURE_COPY_LOCATION src_img_loc = - dzn_image_get_copy_loc(src_image, ®ion->imageSubresource, aspect, l); - D3D12_TEXTURE_COPY_LOCATION dst_buf_loc = - dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, region, aspect, l); - - if (dzn_buffer_supports_region_copy(&dst_buf_loc)) { - /* RowPitch and Offset are properly aligned on 256 bytes, we can copy - * the whole thing in one call. - */ - D3D12_BOX src_box = { - .left = (UINT)region->imageOffset.x, - .top = (UINT)region->imageOffset.y, - .front = (UINT)region->imageOffset.z, - .right = (UINT)(region->imageOffset.x + region->imageExtent.width), - .bottom = (UINT)(region->imageOffset.y + region->imageExtent.height), - .back = (UINT)(region->imageOffset.z + region->imageExtent.depth), - }; - - ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc, - 0, 0, 0, &src_img_loc, - &src_box); - return; - } - - D3D12_BOX src_box = { - .left = (UINT)region->imageOffset.x, - .right = (UINT)(region->imageOffset.x + region->imageExtent.width), - }; - - /* Copy line-by-line if things are not properly aligned. */ - for (uint32_t z = 0; z < region->imageExtent.depth; z += blkd) { - src_box.front = region->imageOffset.z + z; - src_box.back = src_box.front + blkd; - - for (uint32_t y = 0; y < region->imageExtent.height; y += blkh) { - uint32_t dst_x; - - D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc = - dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format, - region, &dst_buf_loc, - y, z, &dst_x); - - src_box.top = region->imageOffset.y + y; - src_box.bottom = src_box.top + blkh; - - ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, - &dst_buf_line_loc, - dst_x, 0, 0, - &src_img_loc, - &src_box); - } - } -} - -static void -dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf, - const VkCopyImageInfo2 *info, - D3D12_RESOURCE_DESC *tmp_desc, - D3D12_TEXTURE_COPY_LOCATION *tmp_loc, - uint32_t r, - VkImageAspectFlagBits aspect, - uint32_t l) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_image, src, info->srcImage); - VK_FROM_HANDLE(dzn_image, dst, info->dstImage); - - ID3D12Device1 *dev = device->dev; - ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; - - const VkImageCopy2 *region = &info->pRegions[r]; - const VkImageSubresourceLayers *src_subres = ®ion->srcSubresource; - const VkImageSubresourceLayers *dst_subres = ®ion->dstSubresource; - VkFormat src_format = - dzn_image_get_plane_format(src->vk.format, aspect); - VkFormat dst_format = - dzn_image_get_plane_format(dst->vk.format, aspect); - - enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format); - uint32_t src_blkw = util_format_get_blockwidth(src_pfmt); - uint32_t src_blkh = util_format_get_blockheight(src_pfmt); - uint32_t src_blkd = util_format_get_blockdepth(src_pfmt); - enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format); - uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt); - uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt); - uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt); - uint32_t dst_z = region->dstOffset.z, src_z = region->srcOffset.z; - uint32_t depth = region->extent.depth; - uint32_t dst_l = l, src_l = l; - - assert(src_subres->aspectMask == dst_subres->aspectMask); - - if (src->vk.image_type == VK_IMAGE_TYPE_3D && - dst->vk.image_type == VK_IMAGE_TYPE_2D) { - assert(src_subres->layerCount == 1); - src_l = 0; - src_z += l; - depth = 1; - } else if (src->vk.image_type == VK_IMAGE_TYPE_2D && - dst->vk.image_type == VK_IMAGE_TYPE_3D) { - assert(dst_subres->layerCount == 1); - dst_l = 0; - dst_z += l; - depth = 1; - } else { - assert(src_subres->layerCount == dst_subres->layerCount); - } - - D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l); - D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l); - - D3D12_BOX src_box = { - .left = (UINT)MAX2(region->srcOffset.x, 0), - .top = (UINT)MAX2(region->srcOffset.y, 0), - .front = (UINT)MAX2(src_z, 0), - .right = (UINT)region->srcOffset.x + region->extent.width, - .bottom = (UINT)region->srcOffset.y + region->extent.height, - .back = (UINT)src_z + depth, - }; - - if (!tmp_loc->pResource) { - ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc, - region->dstOffset.x, - region->dstOffset.y, - dst_z, &src_loc, - &src_box); - return; - } - - tmp_desc->Format = - dzn_image_get_placed_footprint_format(src->vk.format, aspect); - tmp_desc->Width = region->extent.width; - tmp_desc->Height = region->extent.height; - - ID3D12Device1_GetCopyableFootprints(dev, tmp_desc, - 0, 1, 0, - &tmp_loc->PlacedFootprint, - NULL, NULL, NULL); - - tmp_loc->PlacedFootprint.Footprint.Depth = depth; - - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = tmp_loc->pResource, - .Subresource = 0, - .StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE, - .StateAfter = D3D12_RESOURCE_STATE_COPY_DEST, - }, - }; - - if (r > 0 || l > 0) - ID3D12GraphicsCommandList1_ResourceBarrier(cmdlist, 1, &barrier); - - ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box); - - DZN_SWAP(D3D12_RESOURCE_STATES, barrier.Transition.StateBefore, barrier.Transition.StateAfter); - ID3D12GraphicsCommandList1_ResourceBarrier(cmdlist, 1, &barrier); - - tmp_desc->Format = - dzn_image_get_placed_footprint_format(dst->vk.format, aspect); - if (src_blkw != dst_blkw) - tmp_desc->Width = DIV_ROUND_UP(region->extent.width, src_blkw) * dst_blkw; - if (src_blkh != dst_blkh) - tmp_desc->Height = DIV_ROUND_UP(region->extent.height, src_blkh) * dst_blkh; - - ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc, - 0, 1, 0, - &tmp_loc->PlacedFootprint, - NULL, NULL, NULL); - - if (src_blkd != dst_blkd) { - tmp_loc->PlacedFootprint.Footprint.Depth = - DIV_ROUND_UP(depth, src_blkd) * dst_blkd; - } else { - tmp_loc->PlacedFootprint.Footprint.Depth = region->extent.depth; - } - - D3D12_BOX tmp_box = { - .left = 0, - .top = 0, - .front = 0, - .right = tmp_loc->PlacedFootprint.Footprint.Width, - .bottom = tmp_loc->PlacedFootprint.Footprint.Height, - .back = tmp_loc->PlacedFootprint.Footprint.Depth, - }; - - ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc, - region->dstOffset.x, - region->dstOffset.y, - dst_z, - tmp_loc, &tmp_box); -} - -static void -dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf, - VkImage image, - VkImageAspectFlagBits aspect, - const VkImageSubresourceLayers *subres, - struct dzn_descriptor_heap *heap, - uint32_t heap_slot) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_image, img, image); - VkImageViewCreateInfo iview_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = image, - .format = img->vk.format, - .subresourceRange = { - .aspectMask = (VkImageAspectFlags)aspect, - .baseMipLevel = subres->mipLevel, - .levelCount = 1, - .baseArrayLayer = subres->baseArrayLayer, - .layerCount = subres->layerCount, - }, - }; - - if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { - iview_info.components.r = VK_COMPONENT_SWIZZLE_G; - iview_info.components.g = VK_COMPONENT_SWIZZLE_G; - iview_info.components.b = VK_COMPONENT_SWIZZLE_G; - iview_info.components.a = VK_COMPONENT_SWIZZLE_G; - } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { - iview_info.components.r = VK_COMPONENT_SWIZZLE_R; - iview_info.components.g = VK_COMPONENT_SWIZZLE_R; - iview_info.components.b = VK_COMPONENT_SWIZZLE_R; - iview_info.components.a = VK_COMPONENT_SWIZZLE_R; - } - - switch (img->vk.image_type) { - case VK_IMAGE_TYPE_1D: - iview_info.viewType = img->vk.array_layers > 1 ? - VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D; - break; - case VK_IMAGE_TYPE_2D: - iview_info.viewType = img->vk.array_layers > 1 ? - VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; - break; - case VK_IMAGE_TYPE_3D: - iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D; - break; - default: - unreachable("Invalid type"); - } - - struct dzn_image_view iview; - dzn_image_view_init(device, &iview, &iview_info); - dzn_descriptor_heap_write_image_view_desc(heap, heap_slot, false, false, &iview); - dzn_image_view_finish(&iview); - - D3D12_GPU_DESCRIPTOR_HANDLE handle = - dzn_descriptor_heap_get_gpu_handle(heap, heap_slot); - ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle); -} - -static void -dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf, - struct dzn_image *img, - VkImageAspectFlagBits aspect, - uint32_t level, uint32_t layer) -{ - bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); - VkImageSubresourceRange range = { - .aspectMask = (VkImageAspectFlags)aspect, - .baseMipLevel = level, - .levelCount = 1, - .baseArrayLayer = layer, - .layerCount = 1, - }; - - if (ds) { - D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0); - D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc); - ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, TRUE, &handle); - } else { - D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0); - D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc); - ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, FALSE, NULL); - } -} - -static void -dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_image *src, - const struct dzn_image *dst, - VkImageAspectFlagBits aspect, - VkFilter filter, bool resolve) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format); - VkImageUsageFlags usage = - vk_format_is_depth_or_stencil(dst->vk.format) ? - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - struct dzn_meta_blit_key ctx_key = { - .out_format = dzn_image_get_dxgi_format(dst->vk.format, usage, aspect), - .samples = (uint32_t)src->vk.samples, - .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? - FRAG_RESULT_DEPTH : - aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? - FRAG_RESULT_STENCIL : - FRAG_RESULT_DATA0), - .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT : - util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT : - aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT : - GLSL_TYPE_FLOAT), - .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D : - src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D : - src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS : - GLSL_SAMPLER_DIM_3D), - .src_is_array = src->vk.array_layers > 1, - .resolve = resolve, - .linear_filter = filter == VK_FILTER_LINEAR, - .padding = 0, - }; - - const struct dzn_meta_blit *ctx = - dzn_meta_blits_get_context(device, &ctx_key); - assert(ctx); - - ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig); - ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state); -} - -static void -dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_image *src, - const VkImageSubresourceLayers *src_subres, - const VkOffset3D *src_offsets, - const struct dzn_image *dst, - const VkImageSubresourceLayers *dst_subres, - const VkOffset3D *dst_offsets, - bool normalize_src_coords) -{ - uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel); - uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel); - uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel); - uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel); - - float dst_pos[4] = { - (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f), - (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f), - }; - - float src_pos[4] = { - (float)src_offsets[0].x, (float)src_offsets[0].y, - (float)src_offsets[1].x, (float)src_offsets[1].y, - }; - - if (normalize_src_coords) { - src_pos[0] /= src_w; - src_pos[1] /= src_h; - src_pos[2] /= src_w; - src_pos[3] /= src_h; - } - - float coords[] = { - dst_pos[0], dst_pos[1], src_pos[0], src_pos[1], - dst_pos[2], dst_pos[1], src_pos[2], src_pos[1], - dst_pos[0], dst_pos[3], src_pos[0], src_pos[3], - dst_pos[2], dst_pos[3], src_pos[2], src_pos[3], - }; - - ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0); - - D3D12_VIEWPORT vp = { - .TopLeftX = 0, - .TopLeftY = 0, - .Width = (float)dst_w, - .Height = (float)dst_h, - .MinDepth = 0, - .MaxDepth = 1, - }; - ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp); - - D3D12_RECT scissor = { - .left = MIN2(dst_offsets[0].x, dst_offsets[1].x), - .top = MIN2(dst_offsets[0].y, dst_offsets[1].y), - .right = MAX2(dst_offsets[0].x, dst_offsets[1].x), - .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y), - }; - ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor); -} - -static void -dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf, - struct dzn_image *src, VkImageLayout src_layout, - const VkImageSubresourceLayers *src_subres, - struct dzn_image *dst, VkImageLayout dst_layout, - const VkImageSubresourceLayers *dst_subres, - VkImageAspectFlagBits aspect, - bool post) -{ - bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); - D3D12_RESOURCE_BARRIER barriers[2] = { - { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = src->res, - .StateBefore = dzn_image_layout_to_state(src_layout, aspect), - .StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, - }, - }, - { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = dst->res, - .StateBefore = dzn_image_layout_to_state(dst_layout, aspect), - .StateAfter = ds ? - D3D12_RESOURCE_STATE_DEPTH_WRITE : - D3D12_RESOURCE_STATE_RENDER_TARGET, - }, - }, - }; - - if (post) { - DZN_SWAP(D3D12_RESOURCE_STATES, barriers[0].Transition.StateBefore, barriers[0].Transition.StateAfter); - DZN_SWAP(D3D12_RESOURCE_STATES, barriers[1].Transition.StateBefore, barriers[1].Transition.StateAfter); - } - - uint32_t layer_count = dzn_get_layer_count(src, src_subres); - uint32_t src_level = src_subres->mipLevel; - uint32_t dst_level = dst_subres->mipLevel; - - assert(dzn_get_layer_count(dst, dst_subres) == layer_count); - assert(src_level < src->vk.mip_levels); - assert(dst_level < dst->vk.mip_levels); - - for (uint32_t layer = 0; layer < layer_count; layer++) { - barriers[0].Transition.Subresource = - dzn_image_layers_get_subresource_index(src, src_subres, aspect, layer); - barriers[1].Transition.Subresource = - dzn_image_layers_get_subresource_index(dst, dst_subres, aspect, layer); - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, ARRAY_SIZE(barriers), barriers); - } -} - -static void -dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf, - const VkBlitImageInfo2 *info, - struct dzn_descriptor_heap *heap, - uint32_t *heap_slot, - uint32_t r) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_image, src, info->srcImage); - VK_FROM_HANDLE(dzn_image, dst, info->dstImage); - - const VkImageBlit2 *region = &info->pRegions[r]; - bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D; - bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D; - - dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { - dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, false); - dzn_cmd_buffer_blit_issue_barriers(cmdbuf, - src, info->srcImageLayout, ®ion->srcSubresource, - dst, info->dstImageLayout, ®ion->dstSubresource, - aspect, false); - dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, - aspect, ®ion->srcSubresource, - heap, (*heap_slot)++); - dzn_cmd_buffer_blit_set_2d_region(cmdbuf, - src, ®ion->srcSubresource, region->srcOffsets, - dst, ®ion->dstSubresource, region->dstOffsets, - src->vk.samples == 1); - - uint32_t dst_depth = - region->dstOffsets[1].z > region->dstOffsets[0].z ? - region->dstOffsets[1].z - region->dstOffsets[0].z : - region->dstOffsets[0].z - region->dstOffsets[1].z; - uint32_t src_depth = - region->srcOffsets[1].z > region->srcOffsets[0].z ? - region->srcOffsets[1].z - region->srcOffsets[0].z : - region->srcOffsets[0].z - region->srcOffsets[1].z; - - uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource); - uint32_t dst_level = region->dstSubresource.mipLevel; - - float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1; - if (region->srcOffsets[0].z > region->srcOffsets[1].z) - src_slice_step = -src_slice_step; - float src_z_coord = - src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0; - uint32_t slice_count = dst_is_3d ? dst_depth : layer_count; - uint32_t dst_z_coord = - dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer; - if (region->dstOffsets[0].z > region->dstOffsets[1].z) - dst_z_coord--; - - uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ? - 1 : -1; - - /* Normalize the src coordinates/step */ - if (src_is_3d) { - src_z_coord /= src->vk.extent.depth; - src_slice_step /= src->vk.extent.depth; - } - - for (uint32_t slice = 0; slice < slice_count; slice++) { - dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord); - ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16); - ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); - src_z_coord += src_slice_step; - dst_z_coord += dst_slice_step; - } - - dzn_cmd_buffer_blit_issue_barriers(cmdbuf, - src, info->srcImageLayout, ®ion->srcSubresource, - dst, info->dstImageLayout, ®ion->dstSubresource, - aspect, true); - } -} - -static void -dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf, - const VkResolveImageInfo2 *info, - struct dzn_descriptor_heap *heap, - uint32_t *heap_slot, - uint32_t r) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_image, src, info->srcImage); - VK_FROM_HANDLE(dzn_image, dst, info->dstImage); - - ID3D12Device1 *dev = device->dev; - const VkImageResolve2 *region = &info->pRegions[r]; - - dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { - dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, true); - dzn_cmd_buffer_blit_issue_barriers(cmdbuf, - src, info->srcImageLayout, ®ion->srcSubresource, - dst, info->dstImageLayout, ®ion->dstSubresource, - aspect, false); - dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect, - ®ion->srcSubresource, - heap, (*heap_slot)++); - - VkOffset3D src_offset[2] = { - { - .x = region->srcOffset.x, - .y = region->srcOffset.y, - }, - { - .x = (int32_t)(region->srcOffset.x + region->extent.width), - .y = (int32_t)(region->srcOffset.y + region->extent.height), - }, - }; - VkOffset3D dst_offset[2] = { - { - .x = region->dstOffset.x, - .y = region->dstOffset.y, - }, - { - .x = (int32_t)(region->dstOffset.x + region->extent.width), - .y = (int32_t)(region->dstOffset.y + region->extent.height), - }, - }; - - dzn_cmd_buffer_blit_set_2d_region(cmdbuf, - src, ®ion->srcSubresource, src_offset, - dst, ®ion->dstSubresource, dst_offset, - false); - - uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource); - for (uint32_t layer = 0; layer < layer_count; layer++) { - float src_z_coord = layer; - - dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, - dst, aspect, region->dstSubresource.mipLevel, - region->dstSubresource.baseArrayLayer + layer); - ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16); - ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); - } - - dzn_cmd_buffer_blit_issue_barriers(cmdbuf, - src, info->srcImageLayout, ®ion->srcSubresource, - dst, info->dstImageLayout, ®ion->dstSubresource, - aspect, true); - } -} - -static void -dzn_cmd_buffer_clear_attachments(struct dzn_cmd_buffer *cmdbuf, - uint32_t attachment_count, - const VkClearAttachment *attachments, - uint32_t rect_count, - const VkClearRect *rects) -{ - struct dzn_render_pass *pass = cmdbuf->state.pass; - const struct dzn_subpass *subpass = &pass->subpasses[cmdbuf->state.subpass]; - - for (unsigned i = 0; i < attachment_count; i++) { - uint32_t idx; - if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) - idx = subpass->colors[attachments[i].colorAttachment].idx; - else - idx = subpass->zs.idx; - - for (uint32_t j = 0; j < rect_count; j++) { - D3D12_RECT rect; - - dzn_translate_rect(&rect, &rects[j].rect); - dzn_cmd_buffer_clear_attachment(cmdbuf, - idx, &attachments[i].clearValue, - attachments[i].aspectMask, - rects[j].baseArrayLayer, - rects[j].layerCount, - 1, &rect); - } - } -} - -static void -dzn_cmd_buffer_attachment_ref_transition(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_attachment_ref *att) -{ - const struct dzn_image_view *iview = cmdbuf->state.framebuffer->attachments[att->idx]; - const struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk); - - if (att->before == att->during) - return; - - VkImageSubresourceRange subres = { - .aspectMask = att->aspects, - .baseMipLevel = iview->vk.base_mip_level, - .levelCount = iview->vk.level_count, - .baseArrayLayer = iview->vk.base_array_layer, - .layerCount = iview->vk.layer_count, - }; - - dzn_foreach_aspect(aspect, att->aspects) { - for (uint32_t lvl = 0; lvl < iview->vk.level_count; lvl++) { - for (uint32_t layer = 0; layer < iview->vk.layer_count; layer++) { - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = image->res, - .Subresource = - dzn_image_range_get_subresource_index(image, &subres, aspect, lvl, layer), - .StateBefore = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT) ? att->stencil.before : att->before, - .StateAfter = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT) ? att->stencil.during : att->during, - }, - }; - - if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } - } - } -} - -static void -dzn_cmd_buffer_attachment_transition(struct dzn_cmd_buffer *cmdbuf, - const struct dzn_attachment *att) -{ - const struct dzn_image_view *iview = cmdbuf->state.framebuffer->attachments[att->idx]; - const struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk); - - if (att->last == att->after) - return; - - VkImageSubresourceRange subres = { - .aspectMask = att->aspects, - .baseMipLevel = iview->vk.base_mip_level, - .levelCount = iview->vk.level_count, - .baseArrayLayer = iview->vk.base_array_layer, - .layerCount = iview->vk.layer_count, - }; - - dzn_foreach_aspect(aspect, att->aspects) { - for (uint32_t lvl = 0; lvl < iview->vk.level_count; lvl++) { - for (uint32_t layer = 0; layer < iview->vk.layer_count; layer++) { - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = image->res, - .Subresource = - dzn_image_range_get_subresource_index(image, &subres, aspect, lvl, layer), - .StateBefore = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT) ? att->stencil.last : att->last, - .StateAfter = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT) ? att->stencil.after : att->after, - }, - }; - - if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } - } - } -} - -static void -dzn_cmd_buffer_resolve_attachment(struct dzn_cmd_buffer *cmdbuf, uint32_t i) -{ - const struct dzn_subpass *subpass = - &cmdbuf->state.pass->subpasses[cmdbuf->state.subpass]; - - if (subpass->resolve[i].idx == VK_ATTACHMENT_UNUSED) - return; - - const struct dzn_framebuffer *framebuffer = cmdbuf->state.framebuffer; - struct dzn_image_view *src = framebuffer->attachments[subpass->colors[i].idx]; - struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk); - struct dzn_image_view *dst = framebuffer->attachments[subpass->resolve[i].idx]; - struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk); - D3D12_RESOURCE_BARRIER barriers[2]; - uint32_t barrier_count = 0; - - /* TODO: 2DArrays/3D */ - if (subpass->colors[i].during != D3D12_RESOURCE_STATE_RESOLVE_SOURCE) { - barriers[barrier_count++] = D3D12_RESOURCE_BARRIER { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = src_img->res, - .Subresource = 0, - .StateBefore = subpass->colors[i].during, - .StateAfter = D3D12_RESOURCE_STATE_RESOLVE_SOURCE, - }, - }; - } - - if (subpass->resolve[i].during != D3D12_RESOURCE_STATE_RESOLVE_DEST) { - barriers[barrier_count++] = D3D12_RESOURCE_BARRIER { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = dst_img->res, - .Subresource = 0, - .StateBefore = subpass->resolve[i].during, - .StateAfter = D3D12_RESOURCE_STATE_RESOLVE_DEST, - }, - }; - } - - if (barrier_count) - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, barrier_count, barriers); - - ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist, dst_img->res, 0, - src_img->res, 0, - dst->srv_desc.Format); - - for (uint32_t b = 0; b < barrier_count; b++) - DZN_SWAP(D3D12_RESOURCE_STATES, barriers[b].Transition.StateBefore, barriers[b].Transition.StateAfter); - - if (barrier_count) - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, barrier_count, barriers); -} - -static void -dzn_cmd_buffer_begin_subpass(struct dzn_cmd_buffer *cmdbuf) -{ - struct dzn_framebuffer *framebuffer = cmdbuf->state.framebuffer; - struct dzn_render_pass *pass = cmdbuf->state.pass; - const struct dzn_subpass *subpass = &pass->subpasses[cmdbuf->state.subpass]; - - D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { }; - D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 }; - - for (uint32_t i = 0; i < subpass->color_count; i++) { - if (subpass->colors[i].idx == VK_ATTACHMENT_UNUSED) continue; - - struct dzn_image_view *iview = framebuffer->attachments[subpass->colors[i].idx]; - struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk); - - rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc); - } - - if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) { - struct dzn_image_view *iview = framebuffer->attachments[subpass->zs.idx]; - struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk); - - zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc); - } - - ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, subpass->color_count, - subpass->color_count ? rt_handles : NULL, - FALSE, zs_handle.ptr ? &zs_handle : NULL); - - for (uint32_t i = 0; i < subpass->color_count; i++) - dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->colors[i]); - for (uint32_t i = 0; i < subpass->input_count; i++) - dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->inputs[i]); - - if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) - dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->zs); -} - -static void -dzn_cmd_buffer_end_subpass(struct dzn_cmd_buffer *cmdbuf) -{ - const struct dzn_subpass *subpass = &cmdbuf->state.pass->subpasses[cmdbuf->state.subpass]; - - for (uint32_t i = 0; i < subpass->color_count; i++) - dzn_cmd_buffer_resolve_attachment(cmdbuf, i); -} - -static void -dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) -{ - const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline; - - if (!pipeline) - return; - - if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) { - if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { - const struct dzn_graphics_pipeline *gfx = - (const struct dzn_graphics_pipeline *)pipeline; - ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig); - ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology); - } else { - ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig); - } - } - - if (cmdbuf->state.pipeline != pipeline) { - ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state); - cmdbuf->state.pipeline = pipeline; - } -} - -static void -dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - struct dzn_descriptor_state *desc_state = - &cmdbuf->state.bindpoint[bindpoint].desc_state; - struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = { - desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV], - desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] - }; - uint32_t new_heap_offsets[NUM_POOL_TYPES] = {}; - bool update_root_desc_table[NUM_POOL_TYPES] = {}; - const struct dzn_pipeline *pipeline = - cmdbuf->state.bindpoint[bindpoint].pipeline; - - if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_HEAPS)) - goto set_heaps; - - dzn_foreach_pool_type (type) { - uint32_t desc_count = pipeline->desc_count[type]; - if (!desc_count) - continue; - - struct dzn_descriptor_heap_pool *pool = - type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ? - &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool; - uint32_t dst_offset = 0; - struct dzn_descriptor_heap *dst_heap = NULL; - uint32_t dst_heap_offset = 0; - - dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count, - &dst_heap, &dst_heap_offset); - new_heap_offsets[type] = dst_heap_offset; - update_root_desc_table[type] = true; - - for (uint32_t s = 0; s < MAX_SETS; s++) { - const struct dzn_descriptor_set *set = desc_state->sets[s].set; - if (!set) continue; - - uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type]; - uint32_t set_desc_count = pipeline->sets[s].range_desc_count[type]; - if (set_desc_count) { - mtx_lock(&set->pool->defragment_lock); - dzn_descriptor_heap_copy(dst_heap, dst_heap_offset + set_heap_offset, - &set->pool->heaps[type], set->heap_offsets[type], - set_desc_count); - mtx_unlock(&set->pool->defragment_lock); - } - - if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) { - uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count; - for (uint32_t o = 0; o < dynamic_buffer_count; o++) { - uint32_t desc_heap_offset = - pipeline->sets[s].dynamic_buffer_heap_offsets[o].srv; - struct dzn_buffer_desc bdesc = set->dynamic_buffers[o]; - bdesc.offset += desc_state->sets[s].dynamic_offsets[o]; - - dzn_descriptor_heap_write_buffer_desc(dst_heap, - dst_heap_offset + set_heap_offset + desc_heap_offset, - false, &bdesc); - - if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav != ~0) { - desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav; - dzn_descriptor_heap_write_buffer_desc(dst_heap, - dst_heap_offset + set_heap_offset + desc_heap_offset, - true, &bdesc); - } - } - } - } - - new_heaps[type] = dst_heap; - } - -set_heaps: - if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] || - new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) { - ID3D12DescriptorHeap *desc_heaps[2]; - uint32_t num_desc_heaps = 0; - if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) - desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap; - if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) - desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap; - ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps); - - for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++) - cmdbuf->state.heaps[h] = new_heaps[h]; - } - - for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) { - D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r]; - - if (!update_root_desc_table[type]) - continue; - - D3D12_GPU_DESCRIPTOR_HANDLE handle = - dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]); - - if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) - ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle); - else - ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle); - } -} - -static void -dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) -{ - if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS)) - return; - - const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline; - uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx; - - if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { - ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx, - sizeof(cmdbuf->state.sysvals.gfx) / 4, - &cmdbuf->state.sysvals.gfx, 0); - } else { - ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx, - sizeof(cmdbuf->state.sysvals.compute) / 4, - &cmdbuf->state.sysvals.compute, 0); - } -} - -static void -dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf) -{ - const struct dzn_graphics_pipeline *pipeline = - (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; - - if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) || - !pipeline->vp.count) - return; - - ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports); -} - -static void -dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf) -{ - const struct dzn_graphics_pipeline *pipeline = - (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; - - if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS)) - return; - - if (!pipeline->scissor.count) { - /* Apply a scissor delimiting the render area. */ - ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render_area); - return; - } - - D3D12_RECT scissors[MAX_SCISSOR]; - uint32_t scissor_count = pipeline->scissor.count; - - memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count); - for (uint32_t i = 0; i < pipeline->scissor.count; i++) { - scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render_area.left); - scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render_area.top); - scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render_area.right); - scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render_area.bottom); - } - - ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors); -} - -static void -dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf) -{ - const struct dzn_graphics_pipeline *pipeline = - (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; - unsigned start, end; - - BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS) - ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, cmdbuf->state.vb.views); - - BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS); -} - -static void -dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf) -{ - if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB)) - return; - - ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view); -} - -static void -dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) -{ - struct dzn_cmd_buffer_push_constant_state *state = - bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ? - &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute; - - uint32_t offset = state->offset / 4; - uint32_t end = ALIGN(state->end, 4) / 4; - uint32_t count = end - offset; - - if (!count) - return; - - uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx; - uint32_t *vals = state->values + offset; - - if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) - ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset); - else - ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset); - - state->offset = 0; - state->end = 0; -} - -static void -dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf) -{ - if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) { - const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *) - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; - uint32_t ref = - gfx->zsa.stencil_test.front.uses_ref ? - cmdbuf->state.zsa.stencil_test.front.ref : - cmdbuf->state.zsa.stencil_test.back.ref; - ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref); - } -} - -static void -dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf) -{ - if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS) - ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist, - cmdbuf->state.blend.constants); -} - -static VkResult -dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4; - uint32_t triangle_count = MAX2(*vertex_count, 2) - 2; - - *vertex_count = triangle_count * 3; - if (!*vertex_count) - return VK_SUCCESS; - - ID3D12Resource *index_buf; - VkResult result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size, - D3D12_HEAP_TYPE_UPLOAD, - D3D12_RESOURCE_STATE_GENERIC_READ, - &index_buf); - if (result != VK_SUCCESS) - return result; - - void *cpu_ptr; - ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr); - - /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */ - if (index_size == 2) { - uint16_t *indices = (uint16_t *)cpu_ptr; - for (uint32_t t = 0; t < triangle_count; t++) { - indices[t * 3] = t + 1; - indices[(t * 3) + 1] = t + 2; - indices[(t * 3) + 2] = 0; - } - cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT; - } else { - uint32_t *indices = (uint32_t *)cpu_ptr; - for (uint32_t t = 0; t < triangle_count; t++) { - indices[t * 3] = t + 1; - indices[(t * 3) + 1] = t + 2; - indices[(t * 3) + 2] = 0; - } - cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; - } - - cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size; - cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf); - cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; - return VK_SUCCESS; -} - -static VkResult -dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf, - uint32_t *index_count, - uint32_t *first_index) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - uint32_t triangle_count = MAX2(*index_count, 2) - 2; - - *index_count = triangle_count * 3; - if (!*index_count) - return VK_SUCCESS; - - /* New index is always 32bit to make the compute shader rewriting the - * index simpler */ - ID3D12Resource *new_index_buf; - VkResult result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4, - D3D12_HEAP_TYPE_DEFAULT, - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - &new_index_buf); - if (result != VK_SUCCESS) - return result; - - D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu = - cmdbuf->state.ib.view.BufferLocation; - - enum dzn_index_type index_type = - dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format); - const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index = - &device->triangle_fan[index_type]; - - const struct dzn_pipeline *compute_pipeline = - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; - - struct dzn_triangle_fan_rewrite_index_params params = { - .first_index = *first_index, - }; - - ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig); - ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state); - ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf)); - ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4, - ¶ms, 0); - ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu); - ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1); - - D3D12_RESOURCE_BARRIER post_barriers[] = { - { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - /* Transition the exec buffer to indirect arg so it can be - * pass to ExecuteIndirect() as an argument buffer. - */ - .Transition = { - .pResource = new_index_buf, - .Subresource = 0, - .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - .StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER, - }, - }, - }; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, ARRAY_SIZE(post_barriers), post_barriers); - - /* We don't mess up with the driver state when executing our internal - * compute shader, but we still change the D3D12 state, so let's mark - * things dirty if needed. - */ - cmdbuf->state.pipeline = NULL; - if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) { - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= - DZN_CMD_BINDPOINT_DIRTY_PIPELINE; - } - - cmdbuf->state.ib.view.SizeInBytes = *index_count * 4; - cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf); - cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; - cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; - *first_index = 0; - return VK_SUCCESS; -} - -static void -dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed) -{ - dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); - dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); - dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); - dzn_cmd_buffer_update_viewports(cmdbuf); - dzn_cmd_buffer_update_scissors(cmdbuf); - dzn_cmd_buffer_update_vbviews(cmdbuf); - dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); - dzn_cmd_buffer_update_zsa(cmdbuf); - dzn_cmd_buffer_update_blend_constants(cmdbuf); - - if (indexed) - dzn_cmd_buffer_update_ibview(cmdbuf); - - /* Reset the dirty states */ - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty = 0; - cmdbuf->state.dirty = 0; -} - -static uint32_t -dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed) -{ - struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *) - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; - - if (!pipeline->ia.triangle_fan) - return 0; - - uint32_t max_triangles; - - if (indexed) { - uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2; - uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size; - - max_triangles = MAX2(max_indices, 2) - 2; - } else { - uint32_t max_vertex = 0; - for (uint32_t i = 0; i < pipeline->vb.count; i++) { - max_vertex = - MAX2(max_vertex, - cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes); - } - - max_triangles = MAX2(max_vertex, 2) - 2; - } - - return max_triangles * 3; -} - -static void -dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf, - struct dzn_buffer *draw_buf, - size_t draw_buf_offset, - uint32_t draw_count, - uint32_t draw_buf_stride, - bool indexed) -{ - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *) - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; - bool triangle_fan = pipeline->ia.triangle_fan; - uint32_t min_draw_buf_stride = - indexed ? - sizeof(struct dzn_indirect_indexed_draw_params) : - sizeof(struct dzn_indirect_draw_params); - - draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride; - assert(draw_buf_stride >= min_draw_buf_stride); - assert((draw_buf_stride & 3) == 0); - - uint32_t sysvals_stride = ALIGN_POT(sizeof(cmdbuf->state.sysvals.gfx), 256); - uint32_t exec_buf_stride = 32; - uint32_t triangle_fan_index_buf_stride = - dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, indexed) * - sizeof(uint32_t); - uint32_t triangle_fan_exec_buf_stride = - sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params); - ID3D12Resource *exec_buf; - VkResult result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, draw_count * exec_buf_stride, - D3D12_HEAP_TYPE_DEFAULT, - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - &exec_buf); - if (result != VK_SUCCESS) - return; - - D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu = - ID3D12Resource_GetGPUVirtualAddress(draw_buf->res) + draw_buf_offset; - ID3D12Resource *triangle_fan_index_buf = NULL; - ID3D12Resource *triangle_fan_exec_buf = NULL; - - if (triangle_fan_index_buf_stride) { - result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, - draw_count * triangle_fan_index_buf_stride, - D3D12_HEAP_TYPE_DEFAULT, - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - &triangle_fan_index_buf); - if (result != VK_SUCCESS) - return; - - result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, - draw_count * triangle_fan_exec_buf_stride, - D3D12_HEAP_TYPE_DEFAULT, - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - &triangle_fan_exec_buf); - if (result != VK_SUCCESS) - return; - } - - struct dzn_indirect_draw_triangle_fan_rewrite_params params = { - .draw_buf_stride = draw_buf_stride, - .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride, - .triangle_fan_index_buf_start = - triangle_fan_index_buf ? - ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0, - }; - uint32_t params_size = - triangle_fan_index_buf_stride > 0 ? - sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) : - sizeof(struct dzn_indirect_draw_rewrite_params); - - enum dzn_indirect_draw_type draw_type; - - if (indexed && triangle_fan_index_buf_stride > 0) - draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN; - else if (!indexed && triangle_fan_index_buf_stride > 0) - draw_type = DZN_INDIRECT_DRAW_TRIANGLE_FAN; - else if (indexed) - draw_type = DZN_INDIRECT_INDEXED_DRAW; - else - draw_type = DZN_INDIRECT_DRAW; - - struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type]; - - const struct dzn_pipeline *compute_pipeline = - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; - - ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig); - ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state); - ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 0, params_size / 4, (const void *)¶ms, 0); - ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 1, draw_buf_gpu); - ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 2, ID3D12Resource_GetGPUVirtualAddress(exec_buf)); - if (triangle_fan_exec_buf) - ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 3, ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf)); - - ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, draw_count, 1, 1); - - D3D12_RESOURCE_BARRIER post_barriers[] = { - { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - /* Transition the exec buffer to indirect arg so it can be - * pass to ExecuteIndirect() as an argument buffer. - */ - .Transition = { - .pResource = exec_buf, - .Subresource = 0, - .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, - }, - }, - { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - /* Transition the exec buffer to indirect arg so it can be - * pass to ExecuteIndirect() as an argument buffer. - */ - .Transition = { - .pResource = triangle_fan_exec_buf, - .Subresource = 0, - .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, - }, - }, - }; - - uint32_t post_barrier_count = triangle_fan_exec_buf ? 2 : 1; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, post_barrier_count, post_barriers); - - D3D12_INDEX_BUFFER_VIEW ib_view = {}; - - if (triangle_fan_exec_buf) { - enum dzn_index_type index_type = - indexed ? - dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format) : - DZN_NO_INDEX; - struct dzn_meta_triangle_fan_rewrite_index *rewrite_index = - &device->triangle_fan[index_type]; - - struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = {}; - - assert(rewrite_index->root_sig); - assert(rewrite_index->pipeline_state); - assert(rewrite_index->cmd_sig); - - ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig); - ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state); - ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf)); - ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(rewrite_index_params) / 4, - (const void *)&rewrite_index_params, 0); - - if (indexed) - ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, cmdbuf->state.ib.view.BufferLocation); - - ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig, - draw_count, triangle_fan_exec_buf, - 0, NULL, 0); - - D3D12_RESOURCE_BARRIER index_buf_barriers[] = { - { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = triangle_fan_index_buf, - .Subresource = 0, - .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - .StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER, - }, - }, - }; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, ARRAY_SIZE(index_buf_barriers), index_buf_barriers); - - /* After our triangle-fan lowering the draw is indexed */ - indexed = true; - ib_view = cmdbuf->state.ib.view; - cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf); - cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride; - cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; - cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; - } - - /* We don't mess up with the driver state when executing our internal - * compute shader, but we still change the D3D12 state, so let's mark - * things dirty if needed. - */ - cmdbuf->state.pipeline = NULL; - if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) { - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= - DZN_CMD_BINDPOINT_DIRTY_PIPELINE; - } - - cmdbuf->state.sysvals.gfx.first_vertex = 0; - cmdbuf->state.sysvals.gfx.base_instance = 0; - cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed; - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= - DZN_CMD_BINDPOINT_DIRTY_SYSVALS; - - dzn_cmd_buffer_prepare_draw(cmdbuf, indexed); - - /* Restore the old IB view if we modified it during the triangle fan lowering */ - if (ib_view.SizeInBytes) { - cmdbuf->state.ib.view = ib_view; - cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; - } - - enum dzn_indirect_draw_cmd_sig_type cmd_sig_type = - triangle_fan_index_buf_stride > 0 ? - DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG : - indexed ? - DZN_INDIRECT_INDEXED_DRAW_CMD_SIG : - DZN_INDIRECT_DRAW_CMD_SIG; - ID3D12CommandSignature *cmdsig = - dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_type); - - if (!cmdsig) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - return; - } - - ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, - draw_count, exec_buf, 0, NULL, 0); -} - -static void -dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf) -{ - dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); - dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); - dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); - dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); - - /* Reset the dirty states */ - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty = 0; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer, - const VkCopyBufferInfo2 *info) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer); - VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer); - - for (int i = 0; i < info->regionCount; i++) { - const VkBufferCopy2 *region = info->pRegions + i; - - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset, - src_buffer->res, region->srcOffset, - region->size); - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, - const VkCopyBufferToImageInfo2 *info) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - for (int i = 0; i < info->regionCount; i++) { - const VkBufferImageCopy2 *region = info->pRegions + i; - - dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) { - for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++) - dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l); - } - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2 *info) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - for (int i = 0; i < info->regionCount; i++) { - const VkBufferImageCopy2 *region = info->pRegions + i; - - dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) { - for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++) - dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l); - } - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdCopyImage2(VkCommandBuffer commandBuffer, - const VkCopyImageInfo2 *info) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_image, src, info->srcImage); - VK_FROM_HANDLE(dzn_image, dst, info->dstImage); - - assert(src->vk.samples == dst->vk.samples); - - bool requires_temp_res = src->vk.format != dst->vk.format && - src->vk.tiling != VK_IMAGE_TILING_LINEAR && - dst->vk.tiling != VK_IMAGE_TILING_LINEAR; - bool use_blit = false; - if (src->vk.samples > 1) { - use_blit = requires_temp_res; - - for (int i = 0; i < info->regionCount; i++) { - const VkImageCopy2 *region = info->pRegions + i; - if (region->srcOffset.x != 0 || region->srcOffset.y != 0 || - region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) || - region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) || - region->dstOffset.x != 0 || region->dstOffset.y != 0 || - region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) || - region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel)) - use_blit = true; - } - } - - if (use_blit) { - /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is - * is issued on a transfer queue, but we don't have any better option - * right now... - */ - STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount); - - VkBlitImageInfo2 blit_info = { - .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2, - .srcImage = info->srcImage, - .srcImageLayout = info->srcImageLayout, - .dstImage = info->dstImage, - .dstImageLayout = info->dstImageLayout, - .regionCount = info->regionCount, - .pRegions = blit_regions, - .filter = VK_FILTER_NEAREST, - }; - - for (uint32_t r = 0; r < info->regionCount; r++) { - blit_regions[r] = VkImageBlit2 { - .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2, - .srcSubresource = info->pRegions[r].srcSubresource, - .srcOffsets = { - info->pRegions[r].srcOffset, - info->pRegions[r].srcOffset, - }, - .dstSubresource = info->pRegions[r].dstSubresource, - .dstOffsets = { - info->pRegions[r].dstOffset, - info->pRegions[r].dstOffset, - }, - }; - - blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width; - blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height; - blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth; - blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width; - blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height; - blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth; - } - - dzn_CmdBlitImage2(commandBuffer, &blit_info); - - STACK_ARRAY_FINISH(blit_regions); - return; - } - - D3D12_TEXTURE_COPY_LOCATION tmp_loc = {}; - D3D12_RESOURCE_DESC tmp_desc = { - .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, - .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, - .DepthOrArraySize = 1, - .MipLevels = 1, - .Format = src->desc.Format, - .SampleDesc = { .Count = 1, .Quality = 0 }, - .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, - .Flags = D3D12_RESOURCE_FLAG_NONE, - }; - - if (requires_temp_res) { - ID3D12Device1 *dev = device->dev; - VkImageAspectFlags aspect = 0; - uint64_t max_size = 0; - - if (vk_format_has_depth(src->vk.format)) - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - else if (vk_format_has_stencil(src->vk.format)) - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - else - aspect = VK_IMAGE_ASPECT_COLOR_BIT; - - for (uint32_t i = 0; i < info->regionCount; i++) { - const VkImageCopy2 *region = &info->pRegions[i]; - uint64_t region_size = 0; - - tmp_desc.Format = - dzn_image_get_dxgi_format(src->vk.format, - VK_IMAGE_USAGE_TRANSFER_DST_BIT, - aspect); - tmp_desc.Width = region->extent.width; - tmp_desc.Height = region->extent.height; - - ID3D12Device1_GetCopyableFootprints(dev, &src->desc, - 0, 1, 0, - NULL, NULL, NULL, - ®ion_size); - max_size = MAX2(max_size, region_size * region->extent.depth); - } - - VkResult result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size, - D3D12_HEAP_TYPE_DEFAULT, - D3D12_RESOURCE_STATE_COPY_DEST, - &tmp_loc.pResource); - if (result != VK_SUCCESS) - return; - - tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - } - - for (int i = 0; i < info->regionCount; i++) { - const VkImageCopy2 *region = &info->pRegions[i]; - - dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { - for (uint32_t l = 0; l < region->srcSubresource.layerCount; l++) - dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l); - } - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdBlitImage2(VkCommandBuffer commandBuffer, - const VkBlitImageInfo2 *info) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - - if (info->regionCount == 0) - return; - - uint32_t desc_count = 0; - for (uint32_t r = 0; r < info->regionCount; r++) - desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask); - - struct dzn_descriptor_heap *heap; - uint32_t heap_slot; - VkResult result = - dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device, - desc_count, &heap, &heap_slot); - - if (result != VK_SUCCESS) { - cmdbuf->error = result; - return; - } - - if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) { - ID3D12DescriptorHeap * const heaps[] = { heap->heap }; - cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap; - ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps); - } - - ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - - uint32_t heap_offset = 0; - for (uint32_t r = 0; r < info->regionCount; r++) - dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, r); - - cmdbuf->state.pipeline = NULL; - cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; - if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= - DZN_CMD_BINDPOINT_DIRTY_PIPELINE; - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdResolveImage2(VkCommandBuffer commandBuffer, - const VkResolveImageInfo2 *info) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - - if (info->regionCount == 0) - return; - - uint32_t desc_count = 0; - for (uint32_t r = 0; r < info->regionCount; r++) - desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask); - - struct dzn_descriptor_heap *heap; - uint32_t heap_slot; - VkResult result = - dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device, - desc_count, &heap, &heap_slot); - if (result != VK_SUCCESS) { - cmdbuf->error = result; - return; - } - - if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) { - ID3D12DescriptorHeap * const heaps[] = { heap->heap }; - cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap; - ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps); - } - - ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - - uint32_t heap_offset = 0; - for (uint32_t r = 0; r < info->regionCount; r++) - dzn_cmd_buffer_resolve_region(cmdbuf, info, heap, &heap_offset, r); - - cmdbuf->state.pipeline = NULL; - cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; - if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= - DZN_CMD_BINDPOINT_DIRTY_PIPELINE; - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdClearColorImage(VkCommandBuffer commandBuffer, - VkImage image, - VkImageLayout imageLayout, - const VkClearColorValue *pColor, - uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_image, img, image); - - dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, - VkImage image, - VkImageLayout imageLayout, - const VkClearDepthStencilValue *pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_image, img, image); - - dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdDispatch(VkCommandBuffer commandBuffer, - uint32_t groupCountX, - uint32_t groupCountY, - uint32_t groupCountZ) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - cmdbuf->state.sysvals.compute.group_count_x = groupCountX; - cmdbuf->state.sysvals.compute.group_count_y = groupCountY; - cmdbuf->state.sysvals.compute.group_count_z = groupCountZ; - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= - DZN_CMD_BINDPOINT_DIRTY_SYSVALS; - - dzn_cmd_buffer_prepare_dispatch(cmdbuf); - ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdFillBuffer(VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize size, - uint32_t data) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); - - if (size == VK_WHOLE_SIZE) - size = buf->size - dstOffset; - - size &= ~3ULL; - - ID3D12Resource *src_res; - VkResult result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size, - D3D12_HEAP_TYPE_UPLOAD, - D3D12_RESOURCE_STATE_GENERIC_READ, - &src_res); - if (result != VK_SUCCESS) - return; - - uint32_t *cpu_ptr; - ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); - for (uint32_t i = 0; i < size / 4; i++) - cpu_ptr[i] = data; - - ID3D12Resource_Unmap(src_res, 0, NULL); - - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize size, - const void *data) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); - - if (size == VK_WHOLE_SIZE) - size = buf->size - dstOffset; - - /* - * The spec says: - * 4, or VK_WHOLE_SIZE to fill the range from offset to the end of the - * buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer - * is not a multiple of 4, then the nearest smaller multiple is used." - */ - size &= ~3ULL; - - ID3D12Resource *src_res; - VkResult result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size, - D3D12_HEAP_TYPE_UPLOAD, - D3D12_RESOURCE_STATE_GENERIC_READ, - &src_res); - if (result != VK_SUCCESS) - return; - - void *cpu_ptr; - ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr); - memcpy(cpu_ptr, data, size), - ID3D12Resource_Unmap(src_res, 0, NULL); - - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdClearAttachments(VkCommandBuffer commandBuffer, - uint32_t attachmentCount, - const VkClearAttachment *pAttachments, - uint32_t rectCount, - const VkClearRect *pRects) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - dzn_cmd_buffer_clear_attachments(cmdbuf, attachmentCount, pAttachments, rectCount, pRects); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo *pRenderPassBeginInfo, - const VkSubpassBeginInfo *pSubpassBeginInfo) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_render_pass, pass, pRenderPassBeginInfo->renderPass); - VK_FROM_HANDLE(dzn_framebuffer, framebuffer, pRenderPassBeginInfo->framebuffer); - - assert(pass->attachment_count == framebuffer->attachment_count); - - cmdbuf->state.framebuffer = framebuffer; - cmdbuf->state.render_area = D3D12_RECT { - .left = pRenderPassBeginInfo->renderArea.offset.x, - .top = pRenderPassBeginInfo->renderArea.offset.y, - .right = (LONG)(pRenderPassBeginInfo->renderArea.offset.x + pRenderPassBeginInfo->renderArea.extent.width), - .bottom = (LONG)(pRenderPassBeginInfo->renderArea.offset.y + pRenderPassBeginInfo->renderArea.extent.height), - }; - - // The render area has an impact on the scissor state. - cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; - cmdbuf->state.pass = pass; - cmdbuf->state.subpass = 0; - dzn_cmd_buffer_begin_subpass(cmdbuf); - - uint32_t clear_count = - MIN2(pRenderPassBeginInfo->clearValueCount, framebuffer->attachment_count); - for (int i = 0; i < clear_count; ++i) { - VkImageAspectFlags aspectMask = 0; - - if (vk_format_is_depth_or_stencil(pass->attachments[i].format)) { - if (pass->attachments[i].clear.depth) - aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; - if (pass->attachments[i].clear.stencil) - aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; - } else if (pass->attachments[i].clear.color) { - aspectMask |= VK_IMAGE_ASPECT_COLOR_BIT; - } - - dzn_cmd_buffer_clear_attachment(cmdbuf, i, &pRenderPassBeginInfo->pClearValues[i], - aspectMask, 0, ~0, 1, &cmdbuf->state.render_area); - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdEndRenderPass2(VkCommandBuffer commandBuffer, - const VkSubpassEndInfo *pSubpassEndInfo) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - dzn_cmd_buffer_end_subpass(cmdbuf); - - for (uint32_t i = 0; i < cmdbuf->state.pass->attachment_count; i++) - dzn_cmd_buffer_attachment_transition(cmdbuf, &cmdbuf->state.pass->attachments[i]); - - cmdbuf->state.framebuffer = NULL; - cmdbuf->state.pass = NULL; - cmdbuf->state.subpass = 0; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdNextSubpass2(VkCommandBuffer commandBuffer, - const VkSubpassBeginInfo *pSubpassBeginInfo, - const VkSubpassEndInfo *pSubpassEndInfo) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - dzn_cmd_buffer_end_subpass(cmdbuf); - assert(cmdbuf->state.subpass + 1 < cmdbuf->state.pass->subpass_count); - cmdbuf->state.subpass++; - dzn_cmd_buffer_begin_subpass(cmdbuf); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdBindPipeline(VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline pipe) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe); - - cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline; - cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; - if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { - const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline; - - if (!gfx->vp.dynamic) { - memcpy(cmdbuf->state.viewports, gfx->vp.desc, - gfx->vp.count * sizeof(cmdbuf->state.viewports[0])); - cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS; - } - - if (!gfx->scissor.dynamic) { - memcpy(cmdbuf->state.scissors, gfx->scissor.desc, - gfx->scissor.count * sizeof(cmdbuf->state.scissors[0])); - cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; - } - - if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) { - cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref; - cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref; - cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; - } - - if (!gfx->blend.dynamic_constants) { - memcpy(cmdbuf->state.blend.constants, gfx->blend.constants, - sizeof(cmdbuf->state.blend.constants)); - cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS; - } - - for (uint32_t vb = 0; vb < gfx->vb.count; vb++) - cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb]; - - if (gfx->vb.count > 0) - BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1); - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout layout, - uint32_t firstSet, - uint32_t descriptorSetCount, - const VkDescriptorSet *pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t *pDynamicOffsets) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); - - struct dzn_descriptor_state *desc_state = - &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state; - uint32_t dirty = 0; - - for (uint32_t i = 0; i < descriptorSetCount; i++) { - uint32_t idx = firstSet + i; - VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]); - - if (desc_state->sets[idx].set != set) { - desc_state->sets[idx].set = set; - dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS; - } - - uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count; - if (dynamic_buffer_count) { - assert(dynamicOffsetCount >= dynamic_buffer_count); - - for (uint32_t j = 0; j < dynamic_buffer_count; j++) - desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j]; - - dynamicOffsetCount -= dynamic_buffer_count; - pDynamicOffsets += dynamic_buffer_count; - dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS; - } - } - - cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdSetViewport(VkCommandBuffer commandBuffer, - uint32_t firstViewport, - uint32_t viewportCount, - const VkViewport *pViewports) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT); - - for (uint32_t i = 0; i < viewportCount; i++) { - uint32_t vp = i + firstViewport; - - dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]); - - if (pViewports[i].minDepth > pViewports[i].maxDepth) - cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT); - else - cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT); - - if (pViewports[i].height > 0) - cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp); - else - cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp); - } - - if (viewportCount) { - cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS; - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= - DZN_CMD_BINDPOINT_DIRTY_SYSVALS; - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdSetScissor(VkCommandBuffer commandBuffer, - uint32_t firstScissor, - uint32_t scissorCount, - const VkRect2D *pScissors) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - for (uint32_t i = 0; i < scissorCount; i++) - dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]); - - if (scissorCount) - cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, - VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, - const void *pValues) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - struct dzn_cmd_buffer_push_constant_state *states[2]; - uint32_t num_states = 0; - - if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) - states[num_states++] = &cmdbuf->state.push_constant.gfx; - - if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) - states[num_states++] = &cmdbuf->state.push_constant.compute; - - for (uint32_t i = 0; i < num_states; i++) { - memcpy(((char *)states[i]->values) + offset, pValues, size); - - uint32_t current_offset = states[i]->offset; - uint32_t current_end = states[i]->end; - uint32_t end = offset + size; - if (current_end != 0) { - offset = MIN2(current_offset, offset); - end = MAX2(current_end, end); - } - states[i]->offset = offset; - states[i]->end = end; - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdDraw(VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *) - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; - - cmdbuf->state.sysvals.gfx.first_vertex = firstVertex; - cmdbuf->state.sysvals.gfx.base_instance = firstInstance; - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= - DZN_CMD_BINDPOINT_DIRTY_SYSVALS; - - if (pipeline->ia.triangle_fan) { - D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; - - VkResult result = - dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount); - if (result != VK_SUCCESS || !vertexCount) - return; - - cmdbuf->state.sysvals.gfx.is_indexed_draw = true; - dzn_cmd_buffer_prepare_draw(cmdbuf, true); - ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0, - firstVertex, firstInstance); - - /* Restore the IB view if we modified it when lowering triangle fans. */ - if (ib_view.SizeInBytes > 0) { - cmdbuf->state.ib.view = ib_view; - cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; - } - } else { - cmdbuf->state.sysvals.gfx.is_indexed_draw = false; - dzn_cmd_buffer_prepare_draw(cmdbuf, false); - ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, - firstVertex, firstInstance); - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *) - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; - - cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset; - cmdbuf->state.sysvals.gfx.base_instance = firstInstance; - cmdbuf->state.sysvals.gfx.is_indexed_draw = true; - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= - DZN_CMD_BINDPOINT_DIRTY_SYSVALS; - - D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; - - if (pipeline->ia.triangle_fan) { - VkResult result = - dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex); - if (result != VK_SUCCESS || !indexCount) - return; - } - - dzn_cmd_buffer_prepare_draw(cmdbuf, true); - ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex, - vertexOffset, firstInstance); - - /* Restore the IB view if we modified it when lowering triangle fans. */ - if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) { - cmdbuf->state.ib.view = ib_view; - cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_buffer, buf, buffer); - - dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset, drawCount, stride, false); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_buffer, buf, buffer); - - dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset, drawCount, stride, true); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, - uint32_t firstBinding, - uint32_t bindingCount, - const VkBuffer *pBuffers, - const VkDeviceSize *pOffsets) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - if (!bindingCount) - return; - - D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views; - - for (uint32_t i = 0; i < bindingCount; i++) { - VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]); - - vbviews[firstBinding + i].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + pOffsets[i]; - vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i]; - } - - BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding, - firstBinding + bindingCount - 1); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_buffer, buf, buffer); - - cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + offset; - cmdbuf->state.ib.view.SizeInBytes = buf->size - offset; - switch (indexType) { - case VK_INDEX_TYPE_UINT16: - cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT; - break; - case VK_INDEX_TYPE_UINT32: - cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; - break; - default: unreachable("Invalid index type"); - } - - cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdResetEvent(VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_event, evt, event); - - if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, (void *)(uintptr_t)DZN_EVENT_STATE_RESET)) - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdSetEvent(VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_event, evt, event); - - if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, (void *)(uintptr_t)DZN_EVENT_STATE_SET)) - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdWaitEvents(VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent *pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier *pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier *pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier *pImageMemoryBarriers) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - - /* Intra-command list wait is handle by this pipeline flush, which is - * overkill, but that's the best we can do with the standard D3D12 barrier - * API. - * - * Inter-command list is taken care of by the serialization done at the - * ExecuteCommandList() level: - * "Calling ExecuteCommandLists twice in succession (from the same thread, - * or different threads) guarantees that the first workload (A) finishes - * before the second workload (B)" - * - * HOST -> DEVICE signaling is ignored and we assume events are always - * signaled when we reach the vkCmdWaitEvents() point.: - * "Command buffers in the submission can include vkCmdWaitEvents commands - * that wait on events that will not be signaled by earlier commands in the - * queue. Such events must be signaled by the application using vkSetEvent, - * and the vkCmdWaitEvents commands that wait upon them must not be inside - * a render pass instance. - * The event must be set before the vkCmdWaitEvents command is executed." - */ - bool flush_pipeline = false; - - for (uint32_t i = 0; i < eventCount; i++) { - VK_FROM_HANDLE(dzn_event, event, pEvents[i]); - - struct hash_entry *he = - _mesa_hash_table_search(cmdbuf->events.ht, event); - if (he) { - enum dzn_event_state state = (enum dzn_event_state)(uintptr_t)he->data; - assert(state != DZN_EVENT_STATE_RESET); - flush_pipeline = state == DZN_EVENT_STATE_SET; - } else { - if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, - (void *)(uintptr_t)DZN_EVENT_STATE_EXTERNAL_WAIT)) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - return; - } - - struct dzn_event **entry = (struct dzn_event **) - util_dynarray_grow(&cmdbuf->events.wait, struct dzn_event *, 1); - - if (!entry) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - return; - } - - *entry = event; - } - } - - if (flush_pipeline) { - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .UAV = { .pResource = NULL }, - }; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdBeginQuery(VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query, - VkQueryControlFlags flags) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); - - struct dzn_cmd_buffer_query_pool_state *state = - dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); - if (!state) - return; - - qpool->queries[query].type = dzn_query_pool_get_query_type(qpool, flags); - dzn_cmd_buffer_dynbitset_clear(cmdbuf, &state->collect, query); - ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdEndQuery(VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); - - struct dzn_cmd_buffer_query_pool_state *state = - dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); - if (!state) - return; - - dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query); - ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, - VkPipelineStageFlags2 stage, - VkQueryPool queryPool, - uint32_t query) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); - - struct dzn_cmd_buffer_query_pool_state *state = - dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); - if (!state) - return; - - /* Execution barrier so the timestamp gets written after the pipeline flush. */ - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .UAV = { .pResource = NULL }, - }; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - - qpool->queries[query].type = D3D12_QUERY_TYPE_TIMESTAMP; - dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query); - ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); -} - - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); - - struct dzn_cmd_buffer_query_pool_state *state = - dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); - - if (!state) - return; - - uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); - - for (uint32_t q = 0; q < queryCount; q += q_step) { - uint32_t q_count = MIN2(queryCount - q, q_step); - - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer, - dzn_query_pool_get_availability_offset(qpool, firstQuery + q), - device->queries.refs, - DZN_QUERY_REFS_ALL_ZEROS_OFFSET, - q_count * sizeof(uint64_t)); - } - - q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size; - - for (uint32_t q = 0; q < queryCount; q += q_step) { - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer, - dzn_query_pool_get_result_offset(qpool, firstQuery + q), - device->queries.refs, - DZN_QUERY_REFS_ALL_ZEROS_OFFSET, - qpool->query_size); - } - - dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount); - dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize stride, - VkQueryResultFlags flags) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); - VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); - - struct dzn_cmd_buffer_query_pool_state *qpstate = - dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); - if (!qpstate) - return; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - for (uint32_t i = 0; i < queryCount; i++) { - if (!dzn_cmd_buffer_dynbitset_test(&qpstate->collect, firstQuery + i) && - !dzn_cmd_buffer_dynbitset_test(&qpstate->signal, firstQuery + i)) - dzn_cmd_buffer_dynbitset_set(cmdbuf, &qpstate->wait, firstQuery + i); - } - } - - VkResult result = - dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount); - if (result != VK_SUCCESS) - return; - - bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) && - stride == qpool->query_size && - !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); -#define ALL_STATS \ - (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \ - VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \ - VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \ - VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \ - VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \ - VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \ - VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \ - VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \ - VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \ - VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \ - VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT) - if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS && - qpool->pipeline_statistics != ALL_STATS) - raw_copy = false; -#undef ALL_STATS - - D3D12_RESOURCE_BARRIER barrier = { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - .Transition = { - .pResource = qpool->collect_buffer, - .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, - .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, - }, - }; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); - - if (raw_copy) { - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, - qpool->collect_buffer, - dzn_query_pool_get_result_offset(qpool, firstQuery), - dzn_query_pool_get_result_size(qpool, queryCount)); - } else { - uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t); - - for (uint32_t q = 0; q < queryCount; q++) { - uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q); - uint32_t dst_counter_offset = 0; - - if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) { - for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) { - if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics)) - continue; - - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset, - qpool->collect_buffer, - res_offset + (c * sizeof(uint64_t)), - step); - dst_counter_offset += step; - } - } else { - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, - qpool->collect_buffer, - res_offset, step); - dst_counter_offset += step; - } - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset, - qpool->collect_buffer, - dzn_query_pool_get_availability_offset(qpool, firstQuery + q), - step); - } - - dstOffset += stride; - } - } - - DZN_SWAP(D3D12_RESOURCE_STATES, barrier.Transition.StateBefore, barrier.Transition.StateAfter); - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); - VK_FROM_HANDLE(dzn_buffer, buf, buffer); - - cmdbuf->state.sysvals.compute.group_count_x = 0; - cmdbuf->state.sysvals.compute.group_count_y = 0; - cmdbuf->state.sysvals.compute.group_count_z = 0; - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= - DZN_CMD_BINDPOINT_DIRTY_SYSVALS; - - dzn_cmd_buffer_prepare_dispatch(cmdbuf); - - struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *) - cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; - ID3D12CommandSignature *cmdsig = - dzn_compute_pipeline_get_indirect_cmd_sig(pipeline); - - if (!cmdsig) { - cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - return; - } - - ID3D12Resource *exec_buf; - VkResult result = - dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2, - D3D12_HEAP_TYPE_DEFAULT, - D3D12_RESOURCE_STATE_COPY_DEST, - &exec_buf); - if (result != VK_SUCCESS) - return; - - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0, - buf->res, - offset, - sizeof(D3D12_DISPATCH_ARGUMENTS)); - ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS), - buf->res, - offset, - sizeof(D3D12_DISPATCH_ARGUMENTS)); - D3D12_RESOURCE_BARRIER barriers[] = { - { - .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, - /* Transition the exec buffer to indirect arg so it can be - * passed to ExecuteIndirect() as an argument buffer. - */ - .Transition = { - .pResource = exec_buf, - .Subresource = 0, - .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, - .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, - }, - }, - }; - - ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, ARRAY_SIZE(barriers), barriers); - - ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer, - float lineWidth) -{ - assert(lineWidth == 1.0f); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer, - float depthBiasConstantFactor, - float depthBiasClamp, - float depthBiasSlopeFactor) -{ - dzn_stub(); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer, - const float blendConstants[4]) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - memcpy(cmdbuf->state.blend.constants, blendConstants, - sizeof(cmdbuf->state.blend.constants)); - cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer, - float minDepthBounds, - float maxDepthBounds) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist, minDepthBounds, maxDepthBounds); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t compareMask) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask; - - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask; - - cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t writeMask) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask; - - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask; - - cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t reference) -{ - VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmdbuf->state.zsa.stencil_test.front.ref = reference; - - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmdbuf->state.zsa.stencil_test.back.ref = reference; - - cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; -} diff --git a/src/microsoft/vulkan/dzn_descriptor_set.c b/src/microsoft/vulkan/dzn_descriptor_set.c new file mode 100644 index 00000000000..b19da113a33 --- /dev/null +++ b/src/microsoft/vulkan/dzn_descriptor_set.c @@ -0,0 +1,1818 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_descriptors.h" +#include "vk_util.h" + +static D3D12_SHADER_VISIBILITY +translate_desc_visibility(VkShaderStageFlags in) +{ + switch (in) { + case VK_SHADER_STAGE_VERTEX_BIT: return D3D12_SHADER_VISIBILITY_VERTEX; + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return D3D12_SHADER_VISIBILITY_HULL; + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return D3D12_SHADER_VISIBILITY_DOMAIN; + case VK_SHADER_STAGE_GEOMETRY_BIT: return D3D12_SHADER_VISIBILITY_GEOMETRY; + case VK_SHADER_STAGE_FRAGMENT_BIT: return D3D12_SHADER_VISIBILITY_PIXEL; + default: return D3D12_SHADER_VISIBILITY_ALL; + } +} + +static D3D12_DESCRIPTOR_RANGE_TYPE +desc_type_to_range_type(VkDescriptorType in, bool writeable) +{ + switch (in) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + return D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + return D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + return writeable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + default: + unreachable("Unsupported desc type"); + } +} + +static bool +is_dynamic_desc_type(VkDescriptorType desc_type) +{ + return (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC); +} + +static bool +dzn_descriptor_type_depends_on_shader_usage(VkDescriptorType type) +{ + return type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER || + type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE || + type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; +} + +static uint32_t +num_descs_for_type(VkDescriptorType type, bool static_sampler) +{ + unsigned num_descs = 1; + + /* Some type map to an SRV or UAV depending on how the shaders is using the + * resource (NONWRITEABLE flag set or not), in that case we need to reserve + * slots for both the UAV and SRV descs. + */ + if (dzn_descriptor_type_depends_on_shader_usage(type)) + num_descs++; + + /* There's no combined SRV+SAMPLER type in d3d12, we need an descriptor + * for the sampler. + */ + if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + num_descs++; + + /* Don't count immutable samplers, they have their own descriptor. */ + if (static_sampler && + (type == VK_DESCRIPTOR_TYPE_SAMPLER || + type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)) + num_descs--; + + return num_descs; +} + +static void +dzn_descriptor_set_layout_destroy(struct dzn_descriptor_set_layout *set_layout, + const VkAllocationCallbacks *pAllocator) +{ + if (!set_layout) + return; + + struct dzn_device *device = container_of(set_layout->base.device, struct dzn_device, vk); + + vk_object_base_finish(&set_layout->base); + vk_free2(&device->vk.alloc, pAllocator, set_layout); +} + +static VkResult +dzn_descriptor_set_layout_create(struct dzn_device *device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *out) +{ + const VkDescriptorSetLayoutBinding *bindings = pCreateInfo->pBindings; + uint32_t binding_count = 0, static_sampler_count = 0, total_ranges = 0; + uint32_t dynamic_ranges_offset = 0, immutable_sampler_count = 0; + uint32_t range_count[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES] = { 0 }; + + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { + binding_count = MAX2(binding_count, bindings[i].binding + 1); + + if (!bindings[i].descriptorCount) + continue; + + D3D12_SHADER_VISIBILITY visibility = + translate_desc_visibility(bindings[i].stageFlags); + VkDescriptorType desc_type = bindings[i].descriptorType; + bool has_sampler = + desc_type == VK_DESCRIPTOR_TYPE_SAMPLER || + desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + + /* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding: + * + * "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or + * VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then + * pImmutableSamplers can be used to initialize a set of immutable + * samplers. [...] If descriptorType is not one of these descriptor + * types, then pImmutableSamplers is ignored. + * + * We need to be careful here and only parse pImmutableSamplers if we + * have one of the right descriptor types. + */ + bool immutable_samplers = + has_sampler && + bindings[i].pImmutableSamplers != NULL; + bool static_sampler = false; + + if (immutable_samplers && bindings[i].descriptorCount == 1) { + VK_FROM_HANDLE(dzn_sampler, sampler, bindings[i].pImmutableSamplers[0]); + + if (sampler->static_border_color != -1) + static_sampler = true; + } + + if (static_sampler) { + static_sampler_count += bindings[i].descriptorCount; + } else if (has_sampler) { + range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]++; + total_ranges++; + + if (immutable_samplers) + immutable_sampler_count += bindings[i].descriptorCount; + } + + if (desc_type != VK_DESCRIPTOR_TYPE_SAMPLER) { + range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]++; + total_ranges++; + + if (dzn_descriptor_type_depends_on_shader_usage(desc_type)) { + range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]++; + total_ranges++; + } + + if (!is_dynamic_desc_type(desc_type)) { + uint32_t factor = + dzn_descriptor_type_depends_on_shader_usage(desc_type) ? 2 : 1; + dynamic_ranges_offset += bindings[i].descriptorCount * factor; + } + } + } + + /* We need to allocate decriptor set layouts off the device allocator + * with DEVICE scope because they are reference counted and may not be + * destroyed when vkDestroyDescriptorSetLayout is called. + */ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct dzn_descriptor_set_layout, set_layout, 1); + VK_MULTIALLOC_DECL(&ma, D3D12_DESCRIPTOR_RANGE1, + ranges, total_ranges); + VK_MULTIALLOC_DECL(&ma, D3D12_STATIC_SAMPLER_DESC, static_samplers, + static_sampler_count); + VK_MULTIALLOC_DECL(&ma, const struct dzn_sampler *, immutable_samplers, + immutable_sampler_count); + VK_MULTIALLOC_DECL(&ma, struct dzn_descriptor_set_layout_binding, binfos, + binding_count); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &set_layout->base, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT); + set_layout->static_samplers = static_samplers; + set_layout->static_sampler_count = static_sampler_count; + set_layout->immutable_samplers = immutable_samplers; + set_layout->immutable_sampler_count = immutable_sampler_count; + set_layout->bindings = binfos; + set_layout->binding_count = binding_count; + set_layout->dynamic_buffers.range_offset = dynamic_ranges_offset; + + for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) { + dzn_foreach_pool_type (type) { + if (range_count[i][type]) { + set_layout->ranges[i][type] = ranges; + set_layout->range_count[i][type] = range_count[i][type]; + ranges += range_count[i][type]; + } + } + } + + VkDescriptorSetLayoutBinding *ordered_bindings; + VkResult ret = + vk_create_sorted_bindings(pCreateInfo->pBindings, + pCreateInfo->bindingCount, + &ordered_bindings); + if (ret != VK_SUCCESS) + return ret; + + assert(binding_count == + (pCreateInfo->bindingCount ? + (ordered_bindings[pCreateInfo->bindingCount - 1].binding + 1) : 0)); + + uint32_t range_idx[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES] = { 0 }; + uint32_t static_sampler_idx = 0, immutable_sampler_idx = 0; + uint32_t dynamic_buffer_idx = 0; + uint32_t base_register = 0; + + for (uint32_t i = 0; i < binding_count; i++) { + binfos[i].static_sampler_idx = ~0; + binfos[i].immutable_sampler_idx = ~0; + binfos[i].dynamic_buffer_idx = ~0; + dzn_foreach_pool_type (type) + binfos[i].range_idx[type] = ~0; + } + + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { + VkDescriptorType desc_type = ordered_bindings[i].descriptorType; + uint32_t binding = ordered_bindings[i].binding; + uint32_t desc_count = ordered_bindings[i].descriptorCount; + bool has_sampler = + desc_type == VK_DESCRIPTOR_TYPE_SAMPLER || + desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bool has_immutable_samplers = + has_sampler && + ordered_bindings[i].pImmutableSamplers != NULL; + bool has_static_sampler = has_immutable_samplers && desc_count == 1; + bool is_dynamic = is_dynamic_desc_type(desc_type); + + D3D12_SHADER_VISIBILITY visibility = + translate_desc_visibility(ordered_bindings[i].stageFlags); + binfos[binding].type = desc_type; + binfos[binding].visibility = visibility; + binfos[binding].base_shader_register = base_register; + assert(base_register + desc_count >= base_register); + base_register += desc_count; + + if (has_static_sampler) { + VK_FROM_HANDLE(dzn_sampler, sampler, ordered_bindings[i].pImmutableSamplers[0]); + + /* Not all border colors are supported. */ + if (sampler->static_border_color != -1) { + binfos[binding].static_sampler_idx = static_sampler_idx; + D3D12_STATIC_SAMPLER_DESC *desc = (D3D12_STATIC_SAMPLER_DESC *) + &static_samplers[static_sampler_idx]; + + desc->Filter = sampler->desc.Filter; + desc->AddressU = sampler->desc.AddressU; + desc->AddressV = sampler->desc.AddressV; + desc->AddressW = sampler->desc.AddressW; + desc->MipLODBias = sampler->desc.MipLODBias; + desc->MaxAnisotropy = sampler->desc.MaxAnisotropy; + desc->ComparisonFunc = sampler->desc.ComparisonFunc; + desc->BorderColor = sampler->static_border_color; + desc->MinLOD = sampler->desc.MinLOD; + desc->MaxLOD = sampler->desc.MaxLOD; + desc->ShaderRegister = binfos[binding].base_shader_register; + desc->ShaderVisibility = translate_desc_visibility(ordered_bindings[i].stageFlags); + static_sampler_idx++; + } else { + has_static_sampler = false; + } + } + + if (has_immutable_samplers && !has_static_sampler) { + binfos[binding].immutable_sampler_idx = immutable_sampler_idx; + for (uint32_t s = 0; s < desc_count; s++) { + VK_FROM_HANDLE(dzn_sampler, sampler, ordered_bindings[i].pImmutableSamplers[s]); + + immutable_samplers[immutable_sampler_idx++] = sampler; + } + } + + if (is_dynamic) { + binfos[binding].dynamic_buffer_idx = dynamic_buffer_idx; + for (uint32_t d = 0; d < desc_count; d++) + set_layout->dynamic_buffers.bindings[dynamic_buffer_idx + d] = binding; + dynamic_buffer_idx += desc_count; + assert(dynamic_buffer_idx <= MAX_DYNAMIC_BUFFERS); + } + + if (!ordered_bindings[i].descriptorCount) + continue; + + unsigned num_descs = + num_descs_for_type(desc_type, has_static_sampler); + if (!num_descs) continue; + + assert(visibility < ARRAY_SIZE(set_layout->ranges)); + + bool has_range[NUM_POOL_TYPES] = { 0 }; + has_range[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = + has_sampler && !has_static_sampler; + has_range[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = + desc_type != VK_DESCRIPTOR_TYPE_SAMPLER; + + dzn_foreach_pool_type (type) { + if (!has_range[type]) continue; + + uint32_t idx = range_idx[visibility][type]++; + assert(idx < range_count[visibility][type]); + + binfos[binding].range_idx[type] = idx; + D3D12_DESCRIPTOR_RANGE1 *range = (D3D12_DESCRIPTOR_RANGE1 *) + &set_layout->ranges[visibility][type][idx]; + VkDescriptorType range_type = desc_type; + if (desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + range_type = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ? + VK_DESCRIPTOR_TYPE_SAMPLER : + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + } + range->RangeType = desc_type_to_range_type(range_type, false); + range->NumDescriptors = desc_count; + range->BaseShaderRegister = binfos[binding].base_shader_register; + range->Flags = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ? + D3D12_DESCRIPTOR_RANGE_FLAG_NONE : + D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS; + if (is_dynamic) { + range->OffsetInDescriptorsFromTableStart = + set_layout->dynamic_buffers.range_offset + + set_layout->dynamic_buffers.desc_count; + set_layout->dynamic_buffers.count += range->NumDescriptors; + set_layout->dynamic_buffers.desc_count += range->NumDescriptors; + } else { + range->OffsetInDescriptorsFromTableStart = set_layout->range_desc_count[type]; + set_layout->range_desc_count[type] += range->NumDescriptors; + } + + if (!dzn_descriptor_type_depends_on_shader_usage(desc_type)) + continue; + + assert(idx + 1 < range_count[visibility][type]); + range_idx[visibility][type]++; + range[1] = range[0]; + range++; + range->RangeType = desc_type_to_range_type(range_type, true); + if (is_dynamic) { + range->OffsetInDescriptorsFromTableStart = + set_layout->dynamic_buffers.range_offset + + set_layout->dynamic_buffers.desc_count; + set_layout->dynamic_buffers.desc_count += range->NumDescriptors; + } else { + range->OffsetInDescriptorsFromTableStart = set_layout->range_desc_count[type]; + set_layout->range_desc_count[type] += range->NumDescriptors; + } + } + } + + free(ordered_bindings); + + *out = dzn_descriptor_set_layout_to_handle(set_layout); + return VK_SUCCESS; +} + +static uint32_t +dzn_descriptor_set_layout_get_heap_offset(const struct dzn_descriptor_set_layout *layout, + uint32_t b, + D3D12_DESCRIPTOR_HEAP_TYPE type, + bool writeable) +{ + assert(b < layout->binding_count); + D3D12_SHADER_VISIBILITY visibility = layout->bindings[b].visibility; + assert(visibility < ARRAY_SIZE(layout->ranges)); + assert(type < NUM_POOL_TYPES); + + uint32_t range_idx = layout->bindings[b].range_idx[type]; + + if (range_idx == ~0) + return ~0; + + if (writeable && + !dzn_descriptor_type_depends_on_shader_usage(layout->bindings[b].type)) + return ~0; + + if (writeable) + range_idx++; + + assert(range_idx < layout->range_count[visibility][type]); + return layout->ranges[visibility][type][range_idx].OffsetInDescriptorsFromTableStart; +} + +static uint32_t +dzn_descriptor_set_layout_get_desc_count(const struct dzn_descriptor_set_layout *layout, + uint32_t b) +{ + D3D12_SHADER_VISIBILITY visibility = layout->bindings[b].visibility; + assert(visibility < ARRAY_SIZE(layout->ranges)); + + dzn_foreach_pool_type (type) { + uint32_t range_idx = layout->bindings[b].range_idx[type]; + assert(range_idx == ~0 || range_idx < layout->range_count[visibility][type]); + + if (range_idx != ~0) + return layout->ranges[visibility][type][range_idx].NumDescriptors; + } + + return 0; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateDescriptorSetLayout(VkDevice device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *pSetLayout) +{ + return dzn_descriptor_set_layout_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pSetLayout); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyDescriptorSetLayout(VkDevice device, + VkDescriptorSetLayout descriptorSetLayout, + const VkAllocationCallbacks *pAllocator) +{ + dzn_descriptor_set_layout_destroy(dzn_descriptor_set_layout_from_handle(descriptorSetLayout), + pAllocator); +} + +static void +dzn_pipeline_layout_destroy(struct dzn_pipeline_layout *layout) +{ + struct dzn_device *device = container_of(layout->base.device, struct dzn_device, vk); + + if (layout->root.sig) + ID3D12RootSignature_Release(layout->root.sig); + + vk_free(&device->vk.alloc, layout); +} + +// Reserve two root parameters for the push constants and sysvals CBVs. +#define MAX_INTERNAL_ROOT_PARAMS 2 + +// One root parameter for samplers and the other one for views, multiplied by +// the number of visibility combinations, plus the internal root parameters. +#define MAX_ROOT_PARAMS ((MAX_SHADER_VISIBILITIES * 2) + MAX_INTERNAL_ROOT_PARAMS) + +// Maximum number of DWORDS (32-bit words) that can be used for a root signature +#define MAX_ROOT_DWORDS 64 + +static VkResult +dzn_pipeline_layout_create(struct dzn_device *device, + const VkPipelineLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineLayout *out) +{ + uint32_t binding_count = 0; + + for (uint32_t s = 0; s < pCreateInfo->setLayoutCount; s++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[s]); + + if (!set_layout) + continue; + + binding_count += set_layout->binding_count; + } + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct dzn_pipeline_layout, layout, 1); + VK_MULTIALLOC_DECL(&ma, struct dxil_spirv_vulkan_binding, + bindings, binding_count); + + if (!vk_multialloc_zalloc(&ma, &device->vk.alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_PIPELINE_LAYOUT); + + for (uint32_t s = 0; s < pCreateInfo->setLayoutCount; s++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[s]); + + if (!set_layout || !set_layout->binding_count) + continue; + + layout->binding_translation[s].bindings = bindings; + bindings += set_layout->binding_count; + } + + uint32_t range_count = 0, static_sampler_count = 0; + + p_atomic_set(&layout->refcount, 1); + + layout->root.param_count = 0; + dzn_foreach_pool_type (type) + layout->desc_count[type] = 0; + + layout->set_count = pCreateInfo->setLayoutCount; + for (uint32_t j = 0; j < layout->set_count; j++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]); + struct dxil_spirv_vulkan_binding *bindings = layout->binding_translation[j].bindings; + + layout->sets[j].dynamic_buffer_count = set_layout->dynamic_buffers.count; + memcpy(layout->sets[j].range_desc_count, set_layout->range_desc_count, + sizeof(layout->sets[j].range_desc_count)); + layout->binding_translation[j].binding_count = set_layout->binding_count; + for (uint32_t b = 0; b < set_layout->binding_count; b++) + bindings[b].base_register = set_layout->bindings[b].base_shader_register; + + static_sampler_count += set_layout->static_sampler_count; + dzn_foreach_pool_type (type) { + layout->sets[j].heap_offsets[type] = layout->desc_count[type]; + layout->desc_count[type] += set_layout->range_desc_count[type]; + for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) + range_count += set_layout->range_count[i][type]; + } + + layout->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += + set_layout->dynamic_buffers.desc_count; + for (uint32_t o = 0, elem = 0; o < set_layout->dynamic_buffers.count; o++, elem++) { + uint32_t b = set_layout->dynamic_buffers.bindings[o]; + + if (o > 0 && set_layout->dynamic_buffers.bindings[o - 1] != b) + elem = 0; + + uint32_t srv = + dzn_descriptor_set_layout_get_heap_offset(set_layout, b, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, false); + uint32_t uav = + dzn_descriptor_set_layout_get_heap_offset(set_layout, b, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, true); + + layout->sets[j].dynamic_buffer_heap_offsets[o].srv = srv != ~0 ? srv + elem : ~0; + layout->sets[j].dynamic_buffer_heap_offsets[o].uav = uav != ~0 ? uav + elem : ~0; + } + } + + D3D12_DESCRIPTOR_RANGE1 *ranges = (D3D12_DESCRIPTOR_RANGE1 *) + vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*ranges) * range_count, 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (range_count && !ranges) { + dzn_pipeline_layout_destroy(layout); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + D3D12_STATIC_SAMPLER_DESC *static_sampler_descs = (D3D12_STATIC_SAMPLER_DESC *) + vk_alloc2(&device->vk.alloc, pAllocator, + sizeof(*static_sampler_descs) * static_sampler_count, 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (static_sampler_count && !static_sampler_descs) { + vk_free2(&device->vk.alloc, pAllocator, ranges); + dzn_pipeline_layout_destroy(layout); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + + D3D12_ROOT_PARAMETER1 root_params[MAX_ROOT_PARAMS] = { 0 }; + D3D12_DESCRIPTOR_RANGE1 *range_ptr = ranges; + D3D12_ROOT_PARAMETER1 *root_param; + uint32_t root_dwords = 0; + + for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) { + dzn_foreach_pool_type (type) { + root_param = &root_params[layout->root.param_count]; + root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_param->DescriptorTable.pDescriptorRanges = range_ptr; + root_param->DescriptorTable.NumDescriptorRanges = 0; + root_param->ShaderVisibility = (D3D12_SHADER_VISIBILITY)i; + + for (uint32_t j = 0; j < pCreateInfo->setLayoutCount; j++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]); + uint32_t range_count = set_layout->range_count[i][type]; + + memcpy(range_ptr, set_layout->ranges[i][type], + range_count * sizeof(D3D12_DESCRIPTOR_RANGE1)); + for (uint32_t k = 0; k < range_count; k++) { + range_ptr[k].RegisterSpace = j; + range_ptr[k].OffsetInDescriptorsFromTableStart += + layout->sets[j].heap_offsets[type]; + } + root_param->DescriptorTable.NumDescriptorRanges += range_count; + range_ptr += range_count; + } + + if (root_param->DescriptorTable.NumDescriptorRanges) { + layout->root.type[layout->root.param_count++] = (D3D12_DESCRIPTOR_HEAP_TYPE)type; + root_dwords++; + } + } + } + + layout->root.sets_param_count = layout->root.param_count; + + /* Add our sysval CBV, and make it visible to all shaders */ + layout->root.sysval_cbv_param_idx = layout->root.param_count; + root_param = &root_params[layout->root.param_count++]; + root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + root_param->Descriptor.RegisterSpace = DZN_REGISTER_SPACE_SYSVALS; + root_param->Constants.ShaderRegister = 0; + root_param->Constants.Num32BitValues = + DIV_ROUND_UP(MAX2(sizeof(struct dxil_spirv_vertex_runtime_data), + sizeof(struct dxil_spirv_compute_runtime_data)), + 4); + root_param->ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_dwords += root_param->Constants.Num32BitValues; + + D3D12_STATIC_SAMPLER_DESC *static_sampler_ptr = static_sampler_descs; + for (uint32_t j = 0; j < pCreateInfo->setLayoutCount; j++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]); + + memcpy(static_sampler_ptr, set_layout->static_samplers, + set_layout->static_sampler_count * sizeof(*set_layout->static_samplers)); + if (j > 0) { + for (uint32_t k = 0; k < set_layout->static_sampler_count; k++) + static_sampler_ptr[k].RegisterSpace = j; + } + static_sampler_ptr += set_layout->static_sampler_count; + } + + uint32_t push_constant_size = 0; + uint32_t push_constant_flags = 0; + for (uint32_t j = 0; j < pCreateInfo->pushConstantRangeCount; j++) { + const VkPushConstantRange* range = pCreateInfo->pPushConstantRanges + j; + push_constant_size = MAX2(push_constant_size, range->offset + range->size); + push_constant_flags |= range->stageFlags; + } + + if (push_constant_size > 0) { + layout->root.push_constant_cbv_param_idx = layout->root.param_count; + D3D12_ROOT_PARAMETER1 *root_param = &root_params[layout->root.param_count++]; + + root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + root_param->Constants.ShaderRegister = 0; + root_param->Constants.Num32BitValues = ALIGN(push_constant_size, 4) / 4; + root_param->Constants.RegisterSpace = DZN_REGISTER_SPACE_PUSH_CONSTANT; + root_param->ShaderVisibility = translate_desc_visibility(push_constant_flags); + root_dwords += root_param->Constants.Num32BitValues; + } + + assert(layout->root.param_count <= ARRAY_SIZE(root_params)); + assert(root_dwords <= MAX_ROOT_DWORDS); + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { + .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, + .Desc_1_1 = { + .NumParameters = layout->root.param_count, + .pParameters = layout->root.param_count ? root_params : NULL, + .NumStaticSamplers =static_sampler_count, + .pStaticSamplers = static_sampler_descs, + /* TODO Only enable this flag when needed (optimization) */ + .Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT, + }, + }; + + layout->root.sig = dzn_device_create_root_sig(device, &root_sig_desc); + vk_free2(&device->vk.alloc, pAllocator, ranges); + vk_free2(&device->vk.alloc, pAllocator, static_sampler_descs); + + if (!layout->root.sig) { + dzn_pipeline_layout_destroy(layout); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + *out = dzn_pipeline_layout_to_handle(layout); + return VK_SUCCESS; +} + +struct dzn_pipeline_layout * +dzn_pipeline_layout_ref(struct dzn_pipeline_layout *layout) +{ + if (layout) + p_atomic_inc(&layout->refcount); + + return layout; +} + +void +dzn_pipeline_layout_unref(struct dzn_pipeline_layout *layout) +{ + if (layout) { + if (p_atomic_dec_zero(&layout->refcount)) + dzn_pipeline_layout_destroy(layout); + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreatePipelineLayout(VkDevice device, + const VkPipelineLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineLayout *pPipelineLayout) +{ + return dzn_pipeline_layout_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pPipelineLayout); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyPipelineLayout(VkDevice device, + VkPipelineLayout layout, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); + + dzn_pipeline_layout_unref(playout); +} + +static D3D12_DESCRIPTOR_HEAP_TYPE +desc_type_to_heap_type(VkDescriptorType in) +{ + switch (in) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + return D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + default: + unreachable("Unsupported desc type"); + } +} + +static void +dzn_descriptor_heap_finish(struct dzn_descriptor_heap *heap) +{ + if (heap->heap) + ID3D12DescriptorHeap_Release(heap->heap); + + if (heap->dev) + ID3D12Device_Release(heap->dev); +} + +static VkResult +dzn_descriptor_heap_init(struct dzn_descriptor_heap *heap, + struct dzn_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE type, + uint32_t desc_count, + bool shader_visible) +{ + heap->desc_count = desc_count; + heap->type = type; + heap->dev = device->dev; + ID3D12Device1_AddRef(heap->dev); + heap->desc_sz = ID3D12Device1_GetDescriptorHandleIncrementSize(device->dev, type); + + D3D12_DESCRIPTOR_HEAP_DESC desc = { + .Type = type, + .NumDescriptors = desc_count, + .Flags = shader_visible ? + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE : + D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + }; + + if (FAILED(ID3D12Device1_CreateDescriptorHeap(device->dev, &desc, + &IID_ID3D12DescriptorHeap, + &heap->heap))) { + return vk_error(device, + shader_visible ? + VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_ERROR_OUT_OF_HOST_MEMORY); + } + + D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; + ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->heap, &cpu_handle); + heap->cpu_base = cpu_handle.ptr; + if (shader_visible) { + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle; + ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(heap->heap, &gpu_handle); + heap->gpu_base = gpu_handle.ptr; + } + + return VK_SUCCESS; +} + +D3D12_CPU_DESCRIPTOR_HANDLE +dzn_descriptor_heap_get_cpu_handle(const struct dzn_descriptor_heap *heap, uint32_t desc_offset) +{ + return (D3D12_CPU_DESCRIPTOR_HANDLE) { + .ptr = heap->cpu_base + (desc_offset * heap->desc_sz), + }; +} + +D3D12_GPU_DESCRIPTOR_HANDLE +dzn_descriptor_heap_get_gpu_handle(const struct dzn_descriptor_heap *heap, uint32_t desc_offset) +{ + return (D3D12_GPU_DESCRIPTOR_HANDLE) { + .ptr = heap->gpu_base ? heap->gpu_base + (desc_offset * heap->desc_sz) : 0, + }; +} + +static void +dzn_descriptor_heap_write_sampler_desc(struct dzn_descriptor_heap *heap, + uint32_t desc_offset, + const struct dzn_sampler *sampler) +{ + ID3D12Device1_CreateSampler(heap->dev, &sampler->desc, + dzn_descriptor_heap_get_cpu_handle(heap, desc_offset)); +} + +void +dzn_descriptor_heap_write_image_view_desc(struct dzn_descriptor_heap *heap, + uint32_t desc_offset, + bool writeable, bool cube_as_2darray, + const struct dzn_image_view *iview) +{ + D3D12_CPU_DESCRIPTOR_HANDLE view_handle = + dzn_descriptor_heap_get_cpu_handle(heap, desc_offset); + struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk); + + if (writeable) { + ID3D12Device1_CreateUnorderedAccessView(heap->dev, image->res, NULL, &iview->uav_desc, view_handle); + } else if (cube_as_2darray && + (iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY || + iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE)) { + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = iview->srv_desc; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + srv_desc.Texture2DArray.PlaneSlice = 0; + if (iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) { + srv_desc.Texture2DArray.MostDetailedMip = + iview->srv_desc.TextureCubeArray.MostDetailedMip; + srv_desc.Texture2DArray.MipLevels = + iview->srv_desc.TextureCubeArray.MipLevels; + srv_desc.Texture2DArray.FirstArraySlice = + iview->srv_desc.TextureCubeArray.First2DArrayFace; + srv_desc.Texture2DArray.ArraySize = + iview->srv_desc.TextureCubeArray.NumCubes * 6; + } else { + srv_desc.Texture2DArray.MostDetailedMip = + iview->srv_desc.TextureCube.MostDetailedMip; + srv_desc.Texture2DArray.MipLevels = + iview->srv_desc.TextureCube.MipLevels; + srv_desc.Texture2DArray.FirstArraySlice = 0; + srv_desc.Texture2DArray.ArraySize = 6; + } + + ID3D12Device1_CreateShaderResourceView(heap->dev, image->res, &srv_desc, view_handle); + } else { + ID3D12Device1_CreateShaderResourceView(heap->dev, image->res, &iview->srv_desc, view_handle); + } +} + +static void +dzn_descriptor_heap_write_buffer_view_desc(struct dzn_descriptor_heap *heap, + uint32_t desc_offset, + bool writeable, + const struct dzn_buffer_view *bview) +{ + D3D12_CPU_DESCRIPTOR_HANDLE view_handle = + dzn_descriptor_heap_get_cpu_handle(heap, desc_offset); + + if (writeable) + ID3D12Device1_CreateUnorderedAccessView(heap->dev, bview->buffer->res, NULL, &bview->uav_desc, view_handle); + else + ID3D12Device1_CreateShaderResourceView(heap->dev, bview->buffer->res, &bview->srv_desc, view_handle); +} + +void +dzn_descriptor_heap_write_buffer_desc(struct dzn_descriptor_heap *heap, + uint32_t desc_offset, + bool writeable, + const struct dzn_buffer_desc *info) +{ + D3D12_CPU_DESCRIPTOR_HANDLE view_handle = + dzn_descriptor_heap_get_cpu_handle(heap, desc_offset); + + VkDeviceSize size = + info->range == VK_WHOLE_SIZE ? + info->buffer->size - info->offset : + info->range; + + if (info->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || + info->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { + assert(!writeable); + D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = { + .BufferLocation = ID3D12Resource_GetGPUVirtualAddress(info->buffer->res) + info->offset, + .SizeInBytes = ALIGN_POT(size, 256), + }; + ID3D12Device1_CreateConstantBufferView(heap->dev, &cbv_desc, view_handle); + } else if (writeable) { + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, + .Buffer = { + .FirstElement = info->offset / sizeof(uint32_t), + .NumElements = (UINT)size / sizeof(uint32_t), + .Flags = D3D12_BUFFER_UAV_FLAG_RAW, + }, + }; + ID3D12Device1_CreateUnorderedAccessView(heap->dev, info->buffer->res, NULL, &uav_desc, view_handle); + } else { + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, + .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .Buffer = { + .FirstElement = info->offset / sizeof(uint32_t), + .NumElements = (UINT)size / sizeof(uint32_t), + .Flags = D3D12_BUFFER_SRV_FLAG_RAW, + }, + }; + ID3D12Device1_CreateShaderResourceView(heap->dev, info->buffer->res, &srv_desc, view_handle); + } +} + +void +dzn_descriptor_heap_copy(struct dzn_descriptor_heap *dst_heap, + uint32_t dst_offset, + const struct dzn_descriptor_heap *src_heap, + uint32_t src_offset, + uint32_t desc_count) +{ + D3D12_CPU_DESCRIPTOR_HANDLE dst_handle = + dzn_descriptor_heap_get_cpu_handle(dst_heap, dst_offset); + D3D12_CPU_DESCRIPTOR_HANDLE src_handle = + dzn_descriptor_heap_get_cpu_handle(src_heap, src_offset); + + ID3D12Device1_CopyDescriptorsSimple(dst_heap->dev, desc_count, + dst_handle, + src_handle, + dst_heap->type); +} + +struct dzn_descriptor_set_ptr { + uint32_t binding, elem; +}; + +static void +dzn_descriptor_set_ptr_validate(const struct dzn_descriptor_set *set, + struct dzn_descriptor_set_ptr *ptr) +{ + + if (ptr->binding >= set->layout->binding_count) { + ptr->binding = ~0; + ptr->elem = ~0; + return; + } + + uint32_t desc_count = + dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding); + if (ptr->elem >= desc_count) { + ptr->binding = ~0; + ptr->elem = ~0; + } +} + +static void +dzn_descriptor_set_ptr_init(const struct dzn_descriptor_set *set, + struct dzn_descriptor_set_ptr *ptr, + uint32_t binding, uint32_t elem) +{ + ptr->binding = binding; + ptr->elem = elem; + dzn_descriptor_set_ptr_validate(set, ptr); +} + +static void +dzn_descriptor_set_ptr_move(const struct dzn_descriptor_set *set, + struct dzn_descriptor_set_ptr *ptr, + uint32_t count) +{ + if (ptr->binding == ~0) + return; + + while (count) { + uint32_t desc_count = + dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding); + + if (count >= desc_count - ptr->elem) { + count -= desc_count - ptr->elem; + ptr->binding++; + ptr->elem = 0; + } else { + ptr->elem += count; + count = 0; + } + } + + dzn_descriptor_set_ptr_validate(set, ptr); +} + +static bool +dzn_descriptor_set_ptr_is_valid(const struct dzn_descriptor_set_ptr *ptr) +{ + return ptr->binding != ~0 && ptr->elem != ~0; +} + +static uint32_t +dzn_descriptor_set_remaining_descs_in_binding(const struct dzn_descriptor_set *set, + const struct dzn_descriptor_set_ptr *ptr) +{ + if (ptr->binding >= set->layout->binding_count) + return 0; + + uint32_t desc_count = + dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding); + + return desc_count >= ptr->elem ? desc_count - ptr->elem : 0; +} + + +static uint32_t +dzn_descriptor_set_get_heap_offset(const struct dzn_descriptor_set *set, + D3D12_DESCRIPTOR_HEAP_TYPE type, + const struct dzn_descriptor_set_ptr *ptr, + bool writeable) +{ + if (ptr->binding == ~0) + return ~0; + + uint32_t base = + dzn_descriptor_set_layout_get_heap_offset(set->layout, ptr->binding, type, writeable); + if (base == ~0) + return ~0; + + return base + ptr->elem; +} + +static void +dzn_descriptor_set_write_sampler_desc(struct dzn_descriptor_set *set, + const struct dzn_descriptor_set_ptr *ptr, + const struct dzn_sampler *sampler) +{ + D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + uint32_t heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, false); + + if (heap_offset != ~0) { + mtx_lock(&set->pool->defragment_lock); + dzn_descriptor_heap_write_sampler_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + sampler); + mtx_unlock(&set->pool->defragment_lock); + } +} + +static uint32_t +dzn_descriptor_set_get_dynamic_buffer_idx(const struct dzn_descriptor_set *set, + const struct dzn_descriptor_set_ptr *ptr) +{ + if (ptr->binding == ~0) + return ~0; + + uint32_t base = set->layout->bindings[ptr->binding].dynamic_buffer_idx; + + if (base == ~0) + return ~0; + + return base + ptr->elem; +} + +static void +dzn_descriptor_set_write_dynamic_buffer_desc(struct dzn_descriptor_set *set, + const struct dzn_descriptor_set_ptr *ptr, + const struct dzn_buffer_desc *info) +{ + uint32_t dynamic_buffer_idx = + dzn_descriptor_set_get_dynamic_buffer_idx(set, ptr); + if (dynamic_buffer_idx == ~0) + return; + + assert(dynamic_buffer_idx < set->layout->dynamic_buffers.count); + set->dynamic_buffers[dynamic_buffer_idx] = *info; +} + +static VkDescriptorType +dzn_descriptor_set_get_desc_vk_type(const struct dzn_descriptor_set *set, + const struct dzn_descriptor_set_ptr *ptr) +{ + if (ptr->binding >= set->layout->binding_count) + return (VkDescriptorType)~0; + + return set->layout->bindings[ptr->binding].type; +} + +static void +dzn_descriptor_set_write_image_view_desc(struct dzn_descriptor_set *set, + const struct dzn_descriptor_set_ptr *ptr, + bool cube_as_2darray, + const struct dzn_image_view *iview) +{ + D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + uint32_t heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, false); + if (heap_offset == ~0) + return; + + mtx_lock(&set->pool->defragment_lock); + dzn_descriptor_heap_write_image_view_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + false, cube_as_2darray, + iview); + + VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr); + if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) { + heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, true); + assert(heap_offset != ~0); + dzn_descriptor_heap_write_image_view_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + true, cube_as_2darray, + iview); + } + mtx_unlock(&set->pool->defragment_lock); +} + +static void +dzn_descriptor_set_write_buffer_view_desc(struct dzn_descriptor_set *set, + const struct dzn_descriptor_set_ptr *ptr, + const struct dzn_buffer_view *bview) +{ + D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + uint32_t heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, false); + if (heap_offset == ~0) + return; + + mtx_lock(&set->pool->defragment_lock); + dzn_descriptor_heap_write_buffer_view_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + false, bview); + + VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr); + if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) { + heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, true); + assert(heap_offset != ~0); + dzn_descriptor_heap_write_buffer_view_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + true, bview); + } + mtx_unlock(&set->pool->defragment_lock); +} + +static void +dzn_descriptor_set_write_buffer_desc(struct dzn_descriptor_set *set, + const struct dzn_descriptor_set_ptr *ptr, + const struct dzn_buffer_desc *bdesc) +{ + D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + uint32_t heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, false); + if (heap_offset == ~0) + return; + + mtx_lock(&set->pool->defragment_lock); + dzn_descriptor_heap_write_buffer_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + false, bdesc); + + VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr); + if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) { + heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, true); + assert(heap_offset != ~0); + dzn_descriptor_heap_write_buffer_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + true, bdesc); + } + mtx_unlock(&set->pool->defragment_lock); +} + +static void +dzn_descriptor_set_init(struct dzn_descriptor_set *set, + struct dzn_device *device, + struct dzn_descriptor_pool *pool, + struct dzn_descriptor_set_layout *layout) +{ + vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET); + + set->pool = pool; + set->layout = layout; + + mtx_lock(&pool->defragment_lock); + dzn_foreach_pool_type(type) { + set->heap_offsets[type] = pool->free_offset[type]; + set->heap_sizes[type] = layout->range_desc_count[type]; + set->pool->free_offset[type] += layout->range_desc_count[type]; + } + mtx_unlock(&pool->defragment_lock); + + /* Pre-fill the immutable samplers */ + if (layout->immutable_sampler_count) { + for (uint32_t b = 0; b < layout->binding_count; b++) { + bool has_samplers = + layout->bindings[b].type == VK_DESCRIPTOR_TYPE_SAMPLER || + layout->bindings[b].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + + if (!has_samplers || layout->bindings[b].immutable_sampler_idx == ~0) + continue; + + struct dzn_descriptor_set_ptr ptr; + const struct dzn_sampler **sampler = + &layout->immutable_samplers[layout->bindings[b].immutable_sampler_idx]; + for (dzn_descriptor_set_ptr_init(set, &ptr, b, 0); + dzn_descriptor_set_ptr_is_valid(&ptr); + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + dzn_descriptor_set_write_sampler_desc(set, &ptr, *sampler); + sampler++; + } + } + } +} + +static void +dzn_descriptor_set_finish(struct dzn_descriptor_set *set) +{ + vk_object_base_finish(&set->base); + set->pool = NULL; + set->layout = NULL; +} + +static void +dzn_descriptor_pool_destroy(struct dzn_descriptor_pool *pool, + const VkAllocationCallbacks *pAllocator) +{ + if (!pool) + return; + + struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk); + + dzn_foreach_pool_type (type) { + if (pool->desc_count[type]) + dzn_descriptor_heap_finish(&pool->heaps[type]); + } + + vk_object_base_finish(&pool->base); + vk_free2(&device->vk.alloc, pAllocator, pool); +} + +static VkResult +dzn_descriptor_pool_create(struct dzn_device *device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *out) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct dzn_descriptor_pool, pool, 1); + VK_MULTIALLOC_DECL(&ma, struct dzn_descriptor_set, sets, pCreateInfo->maxSets); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->alloc = pAllocator ? *pAllocator : device->vk.alloc; + pool->sets = sets; + pool->set_count = pCreateInfo->maxSets; + mtx_init(&pool->defragment_lock, mtx_plain); + + vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_DESCRIPTOR_POOL); + + for (uint32_t p = 0; p < pCreateInfo->poolSizeCount; p++) { + VkDescriptorType type = pCreateInfo->pPoolSizes[p].type; + uint32_t num_desc = pCreateInfo->pPoolSizes[p].descriptorCount; + + switch (type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] += num_desc; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc; + pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] += num_desc; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + /* Reserve one UAV and one SRV slot for those. */ + pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc * 2; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + break; + default: + unreachable("Unsupported desc type"); + } + } + + dzn_foreach_pool_type (type) { + if (!pool->desc_count[type]) + continue; + + VkResult result = + dzn_descriptor_heap_init(&pool->heaps[type], device, type, pool->desc_count[type], false); + if (result != VK_SUCCESS) { + dzn_descriptor_pool_destroy(pool, pAllocator); + return result; + } + } + + *out = dzn_descriptor_pool_to_handle(pool); + return VK_SUCCESS; +} + +static VkResult +dzn_descriptor_pool_defragment_heap(struct dzn_descriptor_pool *pool, + D3D12_DESCRIPTOR_HEAP_TYPE type) +{ + struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk); + struct dzn_descriptor_heap new_heap; + + VkResult result = + dzn_descriptor_heap_init(&new_heap, device, type, + pool->heaps[type].desc_count, + false); + if (result != VK_SUCCESS) + return result; + + mtx_lock(&pool->defragment_lock); + uint32_t heap_offset = 0; + for (uint32_t s = 0; s < pool->set_count; s++) { + if (!pool->sets[s].layout) + continue; + + dzn_descriptor_heap_copy(&new_heap, heap_offset, + &pool->heaps[type], + pool->sets[s].heap_offsets[type], + pool->sets[s].heap_sizes[type]); + pool->sets[s].heap_offsets[type] = heap_offset; + heap_offset += pool->sets[s].heap_sizes[type]; + } + mtx_unlock(&pool->defragment_lock); + + dzn_descriptor_heap_finish(&pool->heaps[type]); + pool->heaps[type] = new_heap; + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateDescriptorPool(VkDevice device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *pDescriptorPool) +{ + return dzn_descriptor_pool_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pDescriptorPool); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyDescriptorPool(VkDevice device, + VkDescriptorPool descriptorPool, + const VkAllocationCallbacks *pAllocator) +{ + dzn_descriptor_pool_destroy(dzn_descriptor_pool_from_handle(descriptorPool), + pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_ResetDescriptorPool(VkDevice device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) +{ + VK_FROM_HANDLE(dzn_descriptor_pool, pool, descriptorPool); + + for (uint32_t s = 0; s < pool->set_count; s++) + dzn_descriptor_set_finish(&pool->sets[s]); + + dzn_foreach_pool_type(type) + pool->free_offset[type] = 0; + + return VK_SUCCESS; +} + +void +dzn_descriptor_heap_pool_finish(struct dzn_descriptor_heap_pool *pool) +{ + list_splicetail(&pool->active_heaps, &pool->free_heaps); + list_for_each_entry_safe(struct dzn_descriptor_heap_pool_entry, entry, &pool->free_heaps, link) { + list_del(&entry->link); + dzn_descriptor_heap_finish(&entry->heap); + vk_free(pool->alloc, entry); + } +} + +void +dzn_descriptor_heap_pool_init(struct dzn_descriptor_heap_pool *pool, + struct dzn_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE type, + bool shader_visible, + const VkAllocationCallbacks *alloc) +{ + assert(!shader_visible || + type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || + type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + + pool->alloc = alloc; + pool->type = type; + pool->shader_visible = shader_visible; + list_inithead(&pool->active_heaps); + list_inithead(&pool->free_heaps); + pool->offset = 0; + pool->desc_sz = ID3D12Device1_GetDescriptorHandleIncrementSize(device->dev, type); +} + +VkResult +dzn_descriptor_heap_pool_alloc_slots(struct dzn_descriptor_heap_pool *pool, + struct dzn_device *device, uint32_t desc_count, + struct dzn_descriptor_heap **heap, + uint32_t *first_slot) +{ + struct dzn_descriptor_heap *last_heap = + list_is_empty(&pool->active_heaps) ? + NULL : + &(list_last_entry(&pool->active_heaps, struct dzn_descriptor_heap_pool_entry, link)->heap); + uint32_t last_heap_desc_count = + last_heap ? last_heap->desc_count : 0; + + if (pool->offset + desc_count > last_heap_desc_count) { + uint32_t granularity = + (pool->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || + pool->type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) ? + 64 * 1024 : 4 * 1024; + uint32_t alloc_step = ALIGN_POT(desc_count * pool->desc_sz, granularity); + uint32_t heap_desc_count = MAX2(alloc_step / pool->desc_sz, 16); + + /* Maximum of 2048 samplers per heap when shader_visible is true. */ + if (pool->shader_visible && + pool->type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) { + assert(desc_count <= 2048); + heap_desc_count = MIN2(heap_desc_count, 2048); + } + + struct dzn_descriptor_heap_pool_entry *new_heap = NULL; + + list_for_each_entry_safe(struct dzn_descriptor_heap_pool_entry, entry, &pool->free_heaps, link) { + if (entry->heap.desc_count >= heap_desc_count) { + new_heap = entry; + list_del(&entry->link); + break; + } + } + + if (!new_heap) { + new_heap = (struct dzn_descriptor_heap_pool_entry *) + vk_zalloc(pool->alloc, sizeof(*new_heap), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!new_heap) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = + dzn_descriptor_heap_init(&new_heap->heap, device, pool->type, + heap_desc_count, pool->shader_visible); + if (result != VK_SUCCESS) { + vk_free(&device->vk.alloc, new_heap); + return result; + } + } + + list_addtail(&new_heap->link, &pool->active_heaps); + pool->offset = 0; + last_heap = &new_heap->heap; + } + + *heap = last_heap; + *first_slot = pool->offset; + pool->offset += desc_count; + return VK_SUCCESS; +} + +void +dzn_descriptor_heap_pool_reset(struct dzn_descriptor_heap_pool *pool) +{ + pool->offset = 0; + list_splicetail(&pool->active_heaps, &pool->free_heaps); + list_inithead(&pool->free_heaps); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_AllocateDescriptorSets(VkDevice dev, + const VkDescriptorSetAllocateInfo *pAllocateInfo, + VkDescriptorSet *pDescriptorSets) +{ + VK_FROM_HANDLE(dzn_descriptor_pool, pool, pAllocateInfo->descriptorPool); + VK_FROM_HANDLE(dzn_device, device, dev); + VkResult result; + unsigned i; + + if (pAllocateInfo->descriptorSetCount > (pool->set_count - pool->used_set_count)) + return VK_ERROR_OUT_OF_POOL_MEMORY; + + uint32_t set_idx = 0; + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, layout, pAllocateInfo->pSetLayouts[i]); + + dzn_foreach_pool_type(type) { + if (pool->used_desc_count[type] + layout->range_desc_count[type] > pool->desc_count[type]) { + dzn_FreeDescriptorSets(dev, pAllocateInfo->descriptorPool, i, pDescriptorSets); + return vk_error(device, VK_ERROR_OUT_OF_POOL_MEMORY); + } + + if (pool->free_offset[type] + layout->range_desc_count[type] > pool->desc_count[type]) { + result = dzn_descriptor_pool_defragment_heap(pool, type); + if (result != VK_SUCCESS) { + dzn_FreeDescriptorSets(dev, pAllocateInfo->descriptorPool, i, pDescriptorSets); + return vk_error(device, VK_ERROR_FRAGMENTED_POOL); + } + } + } + + struct dzn_descriptor_set *set = NULL; + for (; set_idx < pool->set_count; set_idx++) { + if (!pool->sets[set_idx].layout) { + set = &pool->sets[set_idx]; + break; + } + } + + dzn_descriptor_set_init(set, device, pool, layout); + pDescriptorSets[i] = dzn_descriptor_set_to_handle(set); + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_FreeDescriptorSets(VkDevice dev, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet *pDescriptorSets) +{ + VK_FROM_HANDLE(dzn_descriptor_pool, pool, descriptorPool); + VK_FROM_HANDLE(dzn_device, device, dev); + + for (uint32_t s = 0; s < count; s++) { + VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[s]); + + if (!set) + continue; + + assert(set->pool == pool); + + dzn_descriptor_set_finish(set); + } + + mtx_lock(&pool->defragment_lock); + dzn_foreach_pool_type(type) + pool->free_offset[type] = 0; + + for (uint32_t s = 0; s < pool->set_count; s++) { + const struct dzn_descriptor_set *set = &pool->sets[s]; + + if (set->layout) { + dzn_foreach_pool_type (type) { + pool->free_offset[type] = + MAX2(pool->free_offset[type], + set->heap_offsets[type] + + set->layout->range_desc_count[type]); + } + } + } + mtx_unlock(&pool->defragment_lock); + + return VK_SUCCESS; +} + +static void +dzn_descriptor_set_write(const VkWriteDescriptorSet *pDescriptorWrite) +{ + VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorWrite->dstSet); + + struct dzn_descriptor_set_ptr ptr; + + dzn_descriptor_set_ptr_init(set, &ptr, + pDescriptorWrite->dstBinding, + pDescriptorWrite->dstArrayElement); + uint32_t desc_count = pDescriptorWrite->descriptorCount; + + uint32_t d = 0; + bool cube_as_2darray = + pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + + switch (pDescriptorWrite->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d; + VK_FROM_HANDLE(dzn_sampler, sampler, pImageInfo->sampler); + + if (sampler) + dzn_descriptor_set_write_sampler_desc(set, &ptr, sampler); + + d++; + } + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d; + VK_FROM_HANDLE(dzn_sampler, sampler, pImageInfo->sampler); + VK_FROM_HANDLE(dzn_image_view, iview, pImageInfo->imageView); + + if (sampler) + dzn_descriptor_set_write_sampler_desc(set, &ptr, sampler); + + if (iview) + dzn_descriptor_set_write_image_view_desc(set, &ptr, cube_as_2darray, iview); + + d++; + } + break; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d; + VK_FROM_HANDLE(dzn_image_view, iview, pImageInfo->imageView); + + if (iview) + dzn_descriptor_set_write_image_view_desc(set, &ptr, cube_as_2darray, iview); + + d++; + } + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + const VkDescriptorBufferInfo *binfo = &pDescriptorWrite->pBufferInfo[d]; + struct dzn_buffer_desc desc = { + pDescriptorWrite->descriptorType, + dzn_buffer_from_handle(binfo->buffer), + binfo->range, binfo->offset + }; + + if (desc.buffer) + dzn_descriptor_set_write_buffer_desc(set, &ptr, &desc); + + d++; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + const VkDescriptorBufferInfo *binfo = &pDescriptorWrite->pBufferInfo[d]; + struct dzn_buffer_desc desc = { + pDescriptorWrite->descriptorType, + dzn_buffer_from_handle(binfo->buffer), + binfo->range, binfo->offset + }; + + if (desc.buffer) + dzn_descriptor_set_write_dynamic_buffer_desc(set, &ptr, &desc); + + d++; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + VK_FROM_HANDLE(dzn_buffer_view, bview, pDescriptorWrite->pTexelBufferView[d]); + + if (bview) + dzn_descriptor_set_write_buffer_view_desc(set, &ptr, bview); + + d++; + } + break; + + default: + unreachable("invalid descriptor type"); + break; + } + + assert(d == pDescriptorWrite->descriptorCount); +} + +static void +dzn_descriptor_set_copy(const VkCopyDescriptorSet *pDescriptorCopy) +{ + VK_FROM_HANDLE(dzn_descriptor_set, src_set, pDescriptorCopy->srcSet); + VK_FROM_HANDLE(dzn_descriptor_set, dst_set, pDescriptorCopy->dstSet); + struct dzn_descriptor_set_ptr src_ptr, dst_ptr; + + dzn_descriptor_set_ptr_init(src_set, &src_ptr, + pDescriptorCopy->srcBinding, + pDescriptorCopy->srcArrayElement); + dzn_descriptor_set_ptr_init(dst_set, &dst_ptr, + pDescriptorCopy->dstBinding, + pDescriptorCopy->dstArrayElement); + + uint32_t copied_count = 0; + + while (dzn_descriptor_set_ptr_is_valid(&src_ptr) && + dzn_descriptor_set_ptr_is_valid(&dst_ptr) && + copied_count < pDescriptorCopy->descriptorCount) { + VkDescriptorType src_type = + dzn_descriptor_set_get_desc_vk_type(src_set, &src_ptr); + VkDescriptorType dst_type = + dzn_descriptor_set_get_desc_vk_type(dst_set, &dst_ptr); + + assert(src_type == dst_type); + uint32_t count = + MIN2(dzn_descriptor_set_remaining_descs_in_binding(src_set, &src_ptr), + dzn_descriptor_set_remaining_descs_in_binding(dst_set, &dst_ptr)); + + if (src_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + src_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { + uint32_t src_idx = + dzn_descriptor_set_get_dynamic_buffer_idx(src_set, &src_ptr); + uint32_t dst_idx = + dzn_descriptor_set_get_dynamic_buffer_idx(dst_set, &dst_ptr); + + memcpy(&dst_set->dynamic_buffers[dst_idx], + &src_set->dynamic_buffers[src_idx], + sizeof(*dst_set->dynamic_buffers) * count); + } else { + dzn_foreach_pool_type(type) { + uint32_t src_heap_offset = + dzn_descriptor_set_get_heap_offset(src_set, type, &src_ptr, false); + uint32_t dst_heap_offset = + dzn_descriptor_set_get_heap_offset(dst_set, type, &dst_ptr, false); + + if (src_heap_offset == ~0) { + assert(dst_heap_offset == ~0); + continue; + } + + mtx_lock(&src_set->pool->defragment_lock); + mtx_lock(&dst_set->pool->defragment_lock); + dzn_descriptor_heap_copy(&dst_set->pool->heaps[type], + dst_set->heap_offsets[type] + dst_heap_offset, + &src_set->pool->heaps[type], + src_set->heap_offsets[type] + src_heap_offset, + count); + + if (dzn_descriptor_type_depends_on_shader_usage(src_type)) { + src_heap_offset = + dzn_descriptor_set_get_heap_offset(src_set, type, &src_ptr, true); + dst_heap_offset = + dzn_descriptor_set_get_heap_offset(dst_set, type, &dst_ptr, true); + assert(src_heap_offset != ~0); + assert(dst_heap_offset != ~0); + dzn_descriptor_heap_copy(&dst_set->pool->heaps[type], + dst_set->heap_offsets[type] + dst_heap_offset, + &src_set->pool->heaps[type], + src_set->heap_offsets[type] + src_heap_offset, + count); + } + mtx_unlock(&dst_set->pool->defragment_lock); + mtx_unlock(&src_set->pool->defragment_lock); + } + } + + dzn_descriptor_set_ptr_move(src_set, &src_ptr, count); + dzn_descriptor_set_ptr_move(dst_set, &dst_ptr, count); + copied_count += count; + } + + assert(copied_count == pDescriptorCopy->descriptorCount); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_UpdateDescriptorSets(VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies) +{ + VK_FROM_HANDLE(dzn_device, dev, _device); + + for (unsigned i = 0; i < descriptorWriteCount; i++) + dzn_descriptor_set_write(&pDescriptorWrites[i]); + + for (unsigned i = 0; i < descriptorCopyCount; i++) + dzn_descriptor_set_copy(&pDescriptorCopies[i]); +} diff --git a/src/microsoft/vulkan/dzn_descriptor_set.cpp b/src/microsoft/vulkan/dzn_descriptor_set.cpp deleted file mode 100644 index d15b618e3e3..00000000000 --- a/src/microsoft/vulkan/dzn_descriptor_set.cpp +++ /dev/null @@ -1,1818 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" - -#include "vk_alloc.h" -#include "vk_descriptors.h" -#include "vk_util.h" - -static D3D12_SHADER_VISIBILITY -translate_desc_visibility(VkShaderStageFlags in) -{ - switch (in) { - case VK_SHADER_STAGE_VERTEX_BIT: return D3D12_SHADER_VISIBILITY_VERTEX; - case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return D3D12_SHADER_VISIBILITY_HULL; - case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return D3D12_SHADER_VISIBILITY_DOMAIN; - case VK_SHADER_STAGE_GEOMETRY_BIT: return D3D12_SHADER_VISIBILITY_GEOMETRY; - case VK_SHADER_STAGE_FRAGMENT_BIT: return D3D12_SHADER_VISIBILITY_PIXEL; - default: return D3D12_SHADER_VISIBILITY_ALL; - } -} - -static D3D12_DESCRIPTOR_RANGE_TYPE -desc_type_to_range_type(VkDescriptorType in, bool writeable) -{ - switch (in) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - return D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - return D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - return D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - return writeable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - default: - unreachable("Unsupported desc type"); - } -} - -static bool -is_dynamic_desc_type(VkDescriptorType desc_type) -{ - return (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || - desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC); -} - -static bool -dzn_descriptor_type_depends_on_shader_usage(VkDescriptorType type) -{ - return type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER || - type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE || - type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || - type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; -} - -static uint32_t -num_descs_for_type(VkDescriptorType type, bool static_sampler) -{ - unsigned num_descs = 1; - - /* Some type map to an SRV or UAV depending on how the shaders is using the - * resource (NONWRITEABLE flag set or not), in that case we need to reserve - * slots for both the UAV and SRV descs. - */ - if (dzn_descriptor_type_depends_on_shader_usage(type)) - num_descs++; - - /* There's no combined SRV+SAMPLER type in d3d12, we need an descriptor - * for the sampler. - */ - if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - num_descs++; - - /* Don't count immutable samplers, they have their own descriptor. */ - if (static_sampler && - (type == VK_DESCRIPTOR_TYPE_SAMPLER || - type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)) - num_descs--; - - return num_descs; -} - -static void -dzn_descriptor_set_layout_destroy(struct dzn_descriptor_set_layout *set_layout, - const VkAllocationCallbacks *pAllocator) -{ - if (!set_layout) - return; - - struct dzn_device *device = container_of(set_layout->base.device, struct dzn_device, vk); - - vk_object_base_finish(&set_layout->base); - vk_free2(&device->vk.alloc, pAllocator, set_layout); -} - -static VkResult -dzn_descriptor_set_layout_create(struct dzn_device *device, - const VkDescriptorSetLayoutCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorSetLayout *out) -{ - const VkDescriptorSetLayoutBinding *bindings = pCreateInfo->pBindings; - uint32_t binding_count = 0, static_sampler_count = 0, total_ranges = 0; - uint32_t dynamic_ranges_offset = 0, immutable_sampler_count = 0; - uint32_t range_count[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES] = {}; - - for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { - binding_count = MAX2(binding_count, bindings[i].binding + 1); - - if (!bindings[i].descriptorCount) - continue; - - D3D12_SHADER_VISIBILITY visibility = - translate_desc_visibility(bindings[i].stageFlags); - VkDescriptorType desc_type = bindings[i].descriptorType; - bool has_sampler = - desc_type == VK_DESCRIPTOR_TYPE_SAMPLER || - desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - - /* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding: - * - * "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or - * VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then - * pImmutableSamplers can be used to initialize a set of immutable - * samplers. [...] If descriptorType is not one of these descriptor - * types, then pImmutableSamplers is ignored. - * - * We need to be careful here and only parse pImmutableSamplers if we - * have one of the right descriptor types. - */ - bool immutable_samplers = - has_sampler && - bindings[i].pImmutableSamplers != NULL; - bool static_sampler = false; - - if (immutable_samplers && bindings[i].descriptorCount == 1) { - VK_FROM_HANDLE(dzn_sampler, sampler, bindings[i].pImmutableSamplers[0]); - - if (sampler->static_border_color != -1) - static_sampler = true; - } - - if (static_sampler) { - static_sampler_count += bindings[i].descriptorCount; - } else if (has_sampler) { - range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]++; - total_ranges++; - - if (immutable_samplers) - immutable_sampler_count += bindings[i].descriptorCount; - } - - if (desc_type != VK_DESCRIPTOR_TYPE_SAMPLER) { - range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]++; - total_ranges++; - - if (dzn_descriptor_type_depends_on_shader_usage(desc_type)) { - range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]++; - total_ranges++; - } - - if (!is_dynamic_desc_type(desc_type)) { - uint32_t factor = - dzn_descriptor_type_depends_on_shader_usage(desc_type) ? 2 : 1; - dynamic_ranges_offset += bindings[i].descriptorCount * factor; - } - } - } - - /* We need to allocate decriptor set layouts off the device allocator - * with DEVICE scope because they are reference counted and may not be - * destroyed when vkDestroyDescriptorSetLayout is called. - */ - VK_MULTIALLOC(ma); - VK_MULTIALLOC_DECL(&ma, struct dzn_descriptor_set_layout, set_layout, 1); - VK_MULTIALLOC_DECL(&ma, D3D12_DESCRIPTOR_RANGE1, - ranges, total_ranges); - VK_MULTIALLOC_DECL(&ma, D3D12_STATIC_SAMPLER_DESC, static_samplers, - static_sampler_count); - VK_MULTIALLOC_DECL(&ma, const struct dzn_sampler *, immutable_samplers, - immutable_sampler_count); - VK_MULTIALLOC_DECL(&ma, struct dzn_descriptor_set_layout_binding, binfos, - binding_count); - - if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &set_layout->base, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT); - set_layout->static_samplers = static_samplers; - set_layout->static_sampler_count = static_sampler_count; - set_layout->immutable_samplers = immutable_samplers; - set_layout->immutable_sampler_count = immutable_sampler_count; - set_layout->bindings = binfos; - set_layout->binding_count = binding_count; - set_layout->dynamic_buffers.range_offset = dynamic_ranges_offset; - - for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) { - dzn_foreach_pool_type (type) { - if (range_count[i][type]) { - set_layout->ranges[i][type] = ranges; - set_layout->range_count[i][type] = range_count[i][type]; - ranges += range_count[i][type]; - } - } - } - - VkDescriptorSetLayoutBinding *ordered_bindings; - VkResult ret = - vk_create_sorted_bindings(pCreateInfo->pBindings, - pCreateInfo->bindingCount, - &ordered_bindings); - if (ret != VK_SUCCESS) - return ret; - - assert(binding_count == - (pCreateInfo->bindingCount ? - (ordered_bindings[pCreateInfo->bindingCount - 1].binding + 1) : 0)); - - uint32_t range_idx[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES] = {}; - uint32_t static_sampler_idx = 0, immutable_sampler_idx = 0; - uint32_t dynamic_buffer_idx = 0; - uint32_t base_register = 0; - - for (uint32_t i = 0; i < binding_count; i++) { - binfos[i].static_sampler_idx = ~0; - binfos[i].immutable_sampler_idx = ~0; - binfos[i].dynamic_buffer_idx = ~0; - dzn_foreach_pool_type (type) - binfos[i].range_idx[type] = ~0; - } - - for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { - VkDescriptorType desc_type = ordered_bindings[i].descriptorType; - uint32_t binding = ordered_bindings[i].binding; - uint32_t desc_count = ordered_bindings[i].descriptorCount; - bool has_sampler = - desc_type == VK_DESCRIPTOR_TYPE_SAMPLER || - desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bool has_immutable_samplers = - has_sampler && - ordered_bindings[i].pImmutableSamplers != NULL; - bool has_static_sampler = has_immutable_samplers && desc_count == 1; - bool is_dynamic = is_dynamic_desc_type(desc_type); - - D3D12_SHADER_VISIBILITY visibility = - translate_desc_visibility(ordered_bindings[i].stageFlags); - binfos[binding].type = desc_type; - binfos[binding].visibility = visibility; - binfos[binding].base_shader_register = base_register; - assert(base_register + desc_count >= base_register); - base_register += desc_count; - - if (has_static_sampler) { - VK_FROM_HANDLE(dzn_sampler, sampler, ordered_bindings[i].pImmutableSamplers[0]); - - /* Not all border colors are supported. */ - if (sampler->static_border_color != -1) { - binfos[binding].static_sampler_idx = static_sampler_idx; - D3D12_STATIC_SAMPLER_DESC *desc = (D3D12_STATIC_SAMPLER_DESC *) - &static_samplers[static_sampler_idx]; - - desc->Filter = sampler->desc.Filter; - desc->AddressU = sampler->desc.AddressU; - desc->AddressV = sampler->desc.AddressV; - desc->AddressW = sampler->desc.AddressW; - desc->MipLODBias = sampler->desc.MipLODBias; - desc->MaxAnisotropy = sampler->desc.MaxAnisotropy; - desc->ComparisonFunc = sampler->desc.ComparisonFunc; - desc->BorderColor = sampler->static_border_color; - desc->MinLOD = sampler->desc.MinLOD; - desc->MaxLOD = sampler->desc.MaxLOD; - desc->ShaderRegister = binfos[binding].base_shader_register; - desc->ShaderVisibility = translate_desc_visibility(ordered_bindings[i].stageFlags); - static_sampler_idx++; - } else { - has_static_sampler = false; - } - } - - if (has_immutable_samplers && !has_static_sampler) { - binfos[binding].immutable_sampler_idx = immutable_sampler_idx; - for (uint32_t s = 0; s < desc_count; s++) { - VK_FROM_HANDLE(dzn_sampler, sampler, ordered_bindings[i].pImmutableSamplers[s]); - - immutable_samplers[immutable_sampler_idx++] = sampler; - } - } - - if (is_dynamic) { - binfos[binding].dynamic_buffer_idx = dynamic_buffer_idx; - for (uint32_t d = 0; d < desc_count; d++) - set_layout->dynamic_buffers.bindings[dynamic_buffer_idx + d] = binding; - dynamic_buffer_idx += desc_count; - assert(dynamic_buffer_idx <= MAX_DYNAMIC_BUFFERS); - } - - if (!ordered_bindings[i].descriptorCount) - continue; - - unsigned num_descs = - num_descs_for_type(desc_type, has_static_sampler); - if (!num_descs) continue; - - assert(visibility < ARRAY_SIZE(set_layout->ranges)); - - bool has_range[NUM_POOL_TYPES] = {}; - has_range[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = - has_sampler && !has_static_sampler; - has_range[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = - desc_type != VK_DESCRIPTOR_TYPE_SAMPLER; - - dzn_foreach_pool_type (type) { - if (!has_range[type]) continue; - - uint32_t idx = range_idx[visibility][type]++; - assert(idx < range_count[visibility][type]); - - binfos[binding].range_idx[type] = idx; - D3D12_DESCRIPTOR_RANGE1 *range = (D3D12_DESCRIPTOR_RANGE1 *) - &set_layout->ranges[visibility][type][idx]; - VkDescriptorType range_type = desc_type; - if (desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - range_type = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ? - VK_DESCRIPTOR_TYPE_SAMPLER : - VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - } - range->RangeType = desc_type_to_range_type(range_type, false); - range->NumDescriptors = desc_count; - range->BaseShaderRegister = binfos[binding].base_shader_register; - range->Flags = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ? - D3D12_DESCRIPTOR_RANGE_FLAG_NONE : - D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS; - if (is_dynamic) { - range->OffsetInDescriptorsFromTableStart = - set_layout->dynamic_buffers.range_offset + - set_layout->dynamic_buffers.desc_count; - set_layout->dynamic_buffers.count += range->NumDescriptors; - set_layout->dynamic_buffers.desc_count += range->NumDescriptors; - } else { - range->OffsetInDescriptorsFromTableStart = set_layout->range_desc_count[type]; - set_layout->range_desc_count[type] += range->NumDescriptors; - } - - if (!dzn_descriptor_type_depends_on_shader_usage(desc_type)) - continue; - - assert(idx + 1 < range_count[visibility][type]); - range_idx[visibility][type]++; - range[1] = range[0]; - range++; - range->RangeType = desc_type_to_range_type(range_type, true); - if (is_dynamic) { - range->OffsetInDescriptorsFromTableStart = - set_layout->dynamic_buffers.range_offset + - set_layout->dynamic_buffers.desc_count; - set_layout->dynamic_buffers.desc_count += range->NumDescriptors; - } else { - range->OffsetInDescriptorsFromTableStart = set_layout->range_desc_count[type]; - set_layout->range_desc_count[type] += range->NumDescriptors; - } - } - } - - free(ordered_bindings); - - *out = dzn_descriptor_set_layout_to_handle(set_layout); - return VK_SUCCESS; -} - -static uint32_t -dzn_descriptor_set_layout_get_heap_offset(const struct dzn_descriptor_set_layout *layout, - uint32_t b, - D3D12_DESCRIPTOR_HEAP_TYPE type, - bool writeable) -{ - assert(b < layout->binding_count); - D3D12_SHADER_VISIBILITY visibility = layout->bindings[b].visibility; - assert(visibility < ARRAY_SIZE(layout->ranges)); - assert(type < NUM_POOL_TYPES); - - uint32_t range_idx = layout->bindings[b].range_idx[type]; - - if (range_idx == ~0) - return ~0; - - if (writeable && - !dzn_descriptor_type_depends_on_shader_usage(layout->bindings[b].type)) - return ~0; - - if (writeable) - range_idx++; - - assert(range_idx < layout->range_count[visibility][type]); - return layout->ranges[visibility][type][range_idx].OffsetInDescriptorsFromTableStart; -} - -static uint32_t -dzn_descriptor_set_layout_get_desc_count(const struct dzn_descriptor_set_layout *layout, - uint32_t b) -{ - D3D12_SHADER_VISIBILITY visibility = layout->bindings[b].visibility; - assert(visibility < ARRAY_SIZE(layout->ranges)); - - dzn_foreach_pool_type (type) { - uint32_t range_idx = layout->bindings[b].range_idx[type]; - assert(range_idx == ~0 || range_idx < layout->range_count[visibility][type]); - - if (range_idx != ~0) - return layout->ranges[visibility][type][range_idx].NumDescriptors; - } - - return 0; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateDescriptorSetLayout(VkDevice device, - const VkDescriptorSetLayoutCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorSetLayout *pSetLayout) -{ - return dzn_descriptor_set_layout_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pSetLayout); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyDescriptorSetLayout(VkDevice device, - VkDescriptorSetLayout descriptorSetLayout, - const VkAllocationCallbacks *pAllocator) -{ - dzn_descriptor_set_layout_destroy(dzn_descriptor_set_layout_from_handle(descriptorSetLayout), - pAllocator); -} - -static void -dzn_pipeline_layout_destroy(struct dzn_pipeline_layout *layout) -{ - struct dzn_device *device = container_of(layout->base.device, struct dzn_device, vk); - - if (layout->root.sig) - ID3D12RootSignature_Release(layout->root.sig); - - vk_free(&device->vk.alloc, layout); -} - -// Reserve two root parameters for the push constants and sysvals CBVs. -#define MAX_INTERNAL_ROOT_PARAMS 2 - -// One root parameter for samplers and the other one for views, multiplied by -// the number of visibility combinations, plus the internal root parameters. -#define MAX_ROOT_PARAMS ((MAX_SHADER_VISIBILITIES * 2) + MAX_INTERNAL_ROOT_PARAMS) - -// Maximum number of DWORDS (32-bit words) that can be used for a root signature -#define MAX_ROOT_DWORDS 64 - -static VkResult -dzn_pipeline_layout_create(struct dzn_device *device, - const VkPipelineLayoutCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipelineLayout *out) -{ - uint32_t binding_count = 0; - - for (uint32_t s = 0; s < pCreateInfo->setLayoutCount; s++) { - VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[s]); - - if (!set_layout) - continue; - - binding_count += set_layout->binding_count; - } - - VK_MULTIALLOC(ma); - VK_MULTIALLOC_DECL(&ma, struct dzn_pipeline_layout, layout, 1); - VK_MULTIALLOC_DECL(&ma, struct dxil_spirv_vulkan_binding, - bindings, binding_count); - - if (!vk_multialloc_zalloc(&ma, &device->vk.alloc, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_PIPELINE_LAYOUT); - - for (uint32_t s = 0; s < pCreateInfo->setLayoutCount; s++) { - VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[s]); - - if (!set_layout || !set_layout->binding_count) - continue; - - layout->binding_translation[s].bindings = bindings; - bindings += set_layout->binding_count; - } - - uint32_t range_count = 0, static_sampler_count = 0; - - p_atomic_set(&layout->refcount, 1); - - layout->root.param_count = 0; - dzn_foreach_pool_type (type) - layout->desc_count[type] = 0; - - layout->set_count = pCreateInfo->setLayoutCount; - for (uint32_t j = 0; j < layout->set_count; j++) { - VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]); - struct dxil_spirv_vulkan_binding *bindings = layout->binding_translation[j].bindings; - - layout->sets[j].dynamic_buffer_count = set_layout->dynamic_buffers.count; - memcpy(layout->sets[j].range_desc_count, set_layout->range_desc_count, - sizeof(layout->sets[j].range_desc_count)); - layout->binding_translation[j].binding_count = set_layout->binding_count; - for (uint32_t b = 0; b < set_layout->binding_count; b++) - bindings[b].base_register = set_layout->bindings[b].base_shader_register; - - static_sampler_count += set_layout->static_sampler_count; - dzn_foreach_pool_type (type) { - layout->sets[j].heap_offsets[type] = layout->desc_count[type]; - layout->desc_count[type] += set_layout->range_desc_count[type]; - for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) - range_count += set_layout->range_count[i][type]; - } - - layout->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += - set_layout->dynamic_buffers.desc_count; - for (uint32_t o = 0, elem = 0; o < set_layout->dynamic_buffers.count; o++, elem++) { - uint32_t b = set_layout->dynamic_buffers.bindings[o]; - - if (o > 0 && set_layout->dynamic_buffers.bindings[o - 1] != b) - elem = 0; - - uint32_t srv = - dzn_descriptor_set_layout_get_heap_offset(set_layout, b, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, false); - uint32_t uav = - dzn_descriptor_set_layout_get_heap_offset(set_layout, b, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, true); - - layout->sets[j].dynamic_buffer_heap_offsets[o].srv = srv != ~0 ? srv + elem : ~0; - layout->sets[j].dynamic_buffer_heap_offsets[o].uav = uav != ~0 ? uav + elem : ~0; - } - } - - D3D12_DESCRIPTOR_RANGE1 *ranges = (D3D12_DESCRIPTOR_RANGE1 *) - vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*ranges) * range_count, 8, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (range_count && !ranges) { - dzn_pipeline_layout_destroy(layout); - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - D3D12_STATIC_SAMPLER_DESC *static_sampler_descs = (D3D12_STATIC_SAMPLER_DESC *) - vk_alloc2(&device->vk.alloc, pAllocator, - sizeof(*static_sampler_descs) * static_sampler_count, 8, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (static_sampler_count && !static_sampler_descs) { - vk_free2(&device->vk.alloc, pAllocator, ranges); - dzn_pipeline_layout_destroy(layout); - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - - D3D12_ROOT_PARAMETER1 root_params[MAX_ROOT_PARAMS] = {}; - D3D12_DESCRIPTOR_RANGE1 *range_ptr = ranges; - D3D12_ROOT_PARAMETER1 *root_param; - uint32_t root_dwords = 0; - - for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) { - dzn_foreach_pool_type (type) { - root_param = &root_params[layout->root.param_count]; - root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - root_param->DescriptorTable.pDescriptorRanges = range_ptr; - root_param->DescriptorTable.NumDescriptorRanges = 0; - root_param->ShaderVisibility = (D3D12_SHADER_VISIBILITY)i; - - for (uint32_t j = 0; j < pCreateInfo->setLayoutCount; j++) { - VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]); - uint32_t range_count = set_layout->range_count[i][type]; - - memcpy(range_ptr, set_layout->ranges[i][type], - range_count * sizeof(D3D12_DESCRIPTOR_RANGE1)); - for (uint32_t k = 0; k < range_count; k++) { - range_ptr[k].RegisterSpace = j; - range_ptr[k].OffsetInDescriptorsFromTableStart += - layout->sets[j].heap_offsets[type]; - } - root_param->DescriptorTable.NumDescriptorRanges += range_count; - range_ptr += range_count; - } - - if (root_param->DescriptorTable.NumDescriptorRanges) { - layout->root.type[layout->root.param_count++] = (D3D12_DESCRIPTOR_HEAP_TYPE)type; - root_dwords++; - } - } - } - - layout->root.sets_param_count = layout->root.param_count; - - /* Add our sysval CBV, and make it visible to all shaders */ - layout->root.sysval_cbv_param_idx = layout->root.param_count; - root_param = &root_params[layout->root.param_count++]; - root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; - root_param->Descriptor.RegisterSpace = DZN_REGISTER_SPACE_SYSVALS; - root_param->Constants.ShaderRegister = 0; - root_param->Constants.Num32BitValues = - DIV_ROUND_UP(MAX2(sizeof(struct dxil_spirv_vertex_runtime_data), - sizeof(struct dxil_spirv_compute_runtime_data)), - 4); - root_param->ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - root_dwords += root_param->Constants.Num32BitValues; - - D3D12_STATIC_SAMPLER_DESC *static_sampler_ptr = static_sampler_descs; - for (uint32_t j = 0; j < pCreateInfo->setLayoutCount; j++) { - VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]); - - memcpy(static_sampler_ptr, set_layout->static_samplers, - set_layout->static_sampler_count * sizeof(*set_layout->static_samplers)); - if (j > 0) { - for (uint32_t k = 0; k < set_layout->static_sampler_count; k++) - static_sampler_ptr[k].RegisterSpace = j; - } - static_sampler_ptr += set_layout->static_sampler_count; - } - - uint32_t push_constant_size = 0; - uint32_t push_constant_flags = 0; - for (uint32_t j = 0; j < pCreateInfo->pushConstantRangeCount; j++) { - const VkPushConstantRange* range = pCreateInfo->pPushConstantRanges + j; - push_constant_size = MAX2(push_constant_size, range->offset + range->size); - push_constant_flags |= range->stageFlags; - } - - if (push_constant_size > 0) { - layout->root.push_constant_cbv_param_idx = layout->root.param_count; - D3D12_ROOT_PARAMETER1 *root_param = &root_params[layout->root.param_count++]; - - root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; - root_param->Constants.ShaderRegister = 0; - root_param->Constants.Num32BitValues = ALIGN(push_constant_size, 4) / 4; - root_param->Constants.RegisterSpace = DZN_REGISTER_SPACE_PUSH_CONSTANT; - root_param->ShaderVisibility = translate_desc_visibility(push_constant_flags); - root_dwords += root_param->Constants.Num32BitValues; - } - - assert(layout->root.param_count <= ARRAY_SIZE(root_params)); - assert(root_dwords <= MAX_ROOT_DWORDS); - - D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { - .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, - .Desc_1_1 = { - .NumParameters = layout->root.param_count, - .pParameters = layout->root.param_count ? root_params : NULL, - .NumStaticSamplers =static_sampler_count, - .pStaticSamplers = static_sampler_descs, - /* TODO Only enable this flag when needed (optimization) */ - .Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT, - }, - }; - - layout->root.sig = dzn_device_create_root_sig(device, &root_sig_desc); - vk_free2(&device->vk.alloc, pAllocator, ranges); - vk_free2(&device->vk.alloc, pAllocator, static_sampler_descs); - - if (!layout->root.sig) { - dzn_pipeline_layout_destroy(layout); - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - *out = dzn_pipeline_layout_to_handle(layout); - return VK_SUCCESS; -} - -struct dzn_pipeline_layout * -dzn_pipeline_layout_ref(struct dzn_pipeline_layout *layout) -{ - if (layout) - p_atomic_inc(&layout->refcount); - - return layout; -} - -void -dzn_pipeline_layout_unref(struct dzn_pipeline_layout *layout) -{ - if (layout) { - if (p_atomic_dec_zero(&layout->refcount)) - dzn_pipeline_layout_destroy(layout); - } -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreatePipelineLayout(VkDevice device, - const VkPipelineLayoutCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipelineLayout *pPipelineLayout) -{ - return dzn_pipeline_layout_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pPipelineLayout); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyPipelineLayout(VkDevice device, - VkPipelineLayout layout, - const VkAllocationCallbacks *pAllocator) -{ - VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); - - dzn_pipeline_layout_unref(playout); -} - -static D3D12_DESCRIPTOR_HEAP_TYPE -desc_type_to_heap_type(VkDescriptorType in) -{ - switch (in) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - return D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - return D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - default: - unreachable("Unsupported desc type"); - } -} - -static void -dzn_descriptor_heap_finish(struct dzn_descriptor_heap *heap) -{ - if (heap->heap) - ID3D12DescriptorHeap_Release(heap->heap); - - if (heap->dev) - ID3D12Device_Release(heap->dev); -} - -static VkResult -dzn_descriptor_heap_init(struct dzn_descriptor_heap *heap, - struct dzn_device *device, - D3D12_DESCRIPTOR_HEAP_TYPE type, - uint32_t desc_count, - bool shader_visible) -{ - heap->desc_count = desc_count; - heap->type = type; - heap->dev = device->dev; - ID3D12Device1_AddRef(heap->dev); - heap->desc_sz = ID3D12Device1_GetDescriptorHandleIncrementSize(device->dev, type); - - D3D12_DESCRIPTOR_HEAP_DESC desc = { - .Type = type, - .NumDescriptors = desc_count, - .Flags = shader_visible ? - D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE : - D3D12_DESCRIPTOR_HEAP_FLAG_NONE, - }; - - if (FAILED(ID3D12Device1_CreateDescriptorHeap(device->dev, &desc, - IID_ID3D12DescriptorHeap, - (void **)&heap->heap))) { - return vk_error(device, - shader_visible ? - VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_ERROR_OUT_OF_HOST_MEMORY); - } - - D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; - ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->heap, &cpu_handle); - heap->cpu_base = cpu_handle.ptr; - if (shader_visible) { - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle; - ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(heap->heap, &gpu_handle); - heap->gpu_base = gpu_handle.ptr; - } - - return VK_SUCCESS; -} - -D3D12_CPU_DESCRIPTOR_HANDLE -dzn_descriptor_heap_get_cpu_handle(const struct dzn_descriptor_heap *heap, uint32_t desc_offset) -{ - return D3D12_CPU_DESCRIPTOR_HANDLE { - .ptr = heap->cpu_base + (desc_offset * heap->desc_sz), - }; -} - -D3D12_GPU_DESCRIPTOR_HANDLE -dzn_descriptor_heap_get_gpu_handle(const struct dzn_descriptor_heap *heap, uint32_t desc_offset) -{ - return D3D12_GPU_DESCRIPTOR_HANDLE { - .ptr = heap->gpu_base ? heap->gpu_base + (desc_offset * heap->desc_sz) : 0, - }; -} - -static void -dzn_descriptor_heap_write_sampler_desc(struct dzn_descriptor_heap *heap, - uint32_t desc_offset, - const struct dzn_sampler *sampler) -{ - ID3D12Device1_CreateSampler(heap->dev, &sampler->desc, - dzn_descriptor_heap_get_cpu_handle(heap, desc_offset)); -} - -void -dzn_descriptor_heap_write_image_view_desc(struct dzn_descriptor_heap *heap, - uint32_t desc_offset, - bool writeable, bool cube_as_2darray, - const struct dzn_image_view *iview) -{ - D3D12_CPU_DESCRIPTOR_HANDLE view_handle = - dzn_descriptor_heap_get_cpu_handle(heap, desc_offset); - struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk); - - if (writeable) { - ID3D12Device1_CreateUnorderedAccessView(heap->dev, image->res, NULL, &iview->uav_desc, view_handle); - } else if (cube_as_2darray && - (iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY || - iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE)) { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = iview->srv_desc; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - srv_desc.Texture2DArray.PlaneSlice = 0; - if (iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) { - srv_desc.Texture2DArray.MostDetailedMip = - iview->srv_desc.TextureCubeArray.MostDetailedMip; - srv_desc.Texture2DArray.MipLevels = - iview->srv_desc.TextureCubeArray.MipLevels; - srv_desc.Texture2DArray.FirstArraySlice = - iview->srv_desc.TextureCubeArray.First2DArrayFace; - srv_desc.Texture2DArray.ArraySize = - iview->srv_desc.TextureCubeArray.NumCubes * 6; - } else { - srv_desc.Texture2DArray.MostDetailedMip = - iview->srv_desc.TextureCube.MostDetailedMip; - srv_desc.Texture2DArray.MipLevels = - iview->srv_desc.TextureCube.MipLevels; - srv_desc.Texture2DArray.FirstArraySlice = 0; - srv_desc.Texture2DArray.ArraySize = 6; - } - - ID3D12Device1_CreateShaderResourceView(heap->dev, image->res, &srv_desc, view_handle); - } else { - ID3D12Device1_CreateShaderResourceView(heap->dev, image->res, &iview->srv_desc, view_handle); - } -} - -static void -dzn_descriptor_heap_write_buffer_view_desc(struct dzn_descriptor_heap *heap, - uint32_t desc_offset, - bool writeable, - const struct dzn_buffer_view *bview) -{ - D3D12_CPU_DESCRIPTOR_HANDLE view_handle = - dzn_descriptor_heap_get_cpu_handle(heap, desc_offset); - - if (writeable) - ID3D12Device1_CreateUnorderedAccessView(heap->dev, bview->buffer->res, NULL, &bview->uav_desc, view_handle); - else - ID3D12Device1_CreateShaderResourceView(heap->dev, bview->buffer->res, &bview->srv_desc, view_handle); -} - -void -dzn_descriptor_heap_write_buffer_desc(struct dzn_descriptor_heap *heap, - uint32_t desc_offset, - bool writeable, - const struct dzn_buffer_desc *info) -{ - D3D12_CPU_DESCRIPTOR_HANDLE view_handle = - dzn_descriptor_heap_get_cpu_handle(heap, desc_offset); - - VkDeviceSize size = - info->range == VK_WHOLE_SIZE ? - info->buffer->size - info->offset : - info->range; - - if (info->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || - info->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { - assert(!writeable); - D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = { - .BufferLocation = ID3D12Resource_GetGPUVirtualAddress(info->buffer->res) + info->offset, - .SizeInBytes = ALIGN_POT(size, 256), - }; - ID3D12Device1_CreateConstantBufferView(heap->dev, &cbv_desc, view_handle); - } else if (writeable) { - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = { - .Format = DXGI_FORMAT_R32_TYPELESS, - .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, - .Buffer = { - .FirstElement = info->offset / sizeof(uint32_t), - .NumElements = (UINT)size / sizeof(uint32_t), - .Flags = D3D12_BUFFER_UAV_FLAG_RAW, - }, - }; - ID3D12Device1_CreateUnorderedAccessView(heap->dev, info->buffer->res, NULL, &uav_desc, view_handle); - } else { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { - .Format = DXGI_FORMAT_R32_TYPELESS, - .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, - .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, - .Buffer = { - .FirstElement = info->offset / sizeof(uint32_t), - .NumElements = (UINT)size / sizeof(uint32_t), - .Flags = D3D12_BUFFER_SRV_FLAG_RAW, - }, - }; - ID3D12Device1_CreateShaderResourceView(heap->dev, info->buffer->res, &srv_desc, view_handle); - } -} - -void -dzn_descriptor_heap_copy(struct dzn_descriptor_heap *dst_heap, - uint32_t dst_offset, - const struct dzn_descriptor_heap *src_heap, - uint32_t src_offset, - uint32_t desc_count) -{ - D3D12_CPU_DESCRIPTOR_HANDLE dst_handle = - dzn_descriptor_heap_get_cpu_handle(dst_heap, dst_offset); - D3D12_CPU_DESCRIPTOR_HANDLE src_handle = - dzn_descriptor_heap_get_cpu_handle(src_heap, src_offset); - - ID3D12Device1_CopyDescriptorsSimple(dst_heap->dev, desc_count, - dst_handle, - src_handle, - dst_heap->type); -} - -struct dzn_descriptor_set_ptr { - uint32_t binding, elem; -}; - -static void -dzn_descriptor_set_ptr_validate(const struct dzn_descriptor_set *set, - struct dzn_descriptor_set_ptr *ptr) -{ - - if (ptr->binding >= set->layout->binding_count) { - ptr->binding = ~0; - ptr->elem = ~0; - return; - } - - uint32_t desc_count = - dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding); - if (ptr->elem >= desc_count) { - ptr->binding = ~0; - ptr->elem = ~0; - } -} - -static void -dzn_descriptor_set_ptr_init(const struct dzn_descriptor_set *set, - struct dzn_descriptor_set_ptr *ptr, - uint32_t binding, uint32_t elem) -{ - ptr->binding = binding; - ptr->elem = elem; - dzn_descriptor_set_ptr_validate(set, ptr); -} - -static void -dzn_descriptor_set_ptr_move(const struct dzn_descriptor_set *set, - struct dzn_descriptor_set_ptr *ptr, - uint32_t count) -{ - if (ptr->binding == ~0) - return; - - while (count) { - uint32_t desc_count = - dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding); - - if (count >= desc_count - ptr->elem) { - count -= desc_count - ptr->elem; - ptr->binding++; - ptr->elem = 0; - } else { - ptr->elem += count; - count = 0; - } - } - - dzn_descriptor_set_ptr_validate(set, ptr); -} - -static bool -dzn_descriptor_set_ptr_is_valid(const struct dzn_descriptor_set_ptr *ptr) -{ - return ptr->binding != ~0 && ptr->elem != ~0; -} - -static uint32_t -dzn_descriptor_set_remaining_descs_in_binding(const struct dzn_descriptor_set *set, - const struct dzn_descriptor_set_ptr *ptr) -{ - if (ptr->binding >= set->layout->binding_count) - return 0; - - uint32_t desc_count = - dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding); - - return desc_count >= ptr->elem ? desc_count - ptr->elem : 0; -} - - -static uint32_t -dzn_descriptor_set_get_heap_offset(const struct dzn_descriptor_set *set, - D3D12_DESCRIPTOR_HEAP_TYPE type, - const struct dzn_descriptor_set_ptr *ptr, - bool writeable) -{ - if (ptr->binding == ~0) - return ~0; - - uint32_t base = - dzn_descriptor_set_layout_get_heap_offset(set->layout, ptr->binding, type, writeable); - if (base == ~0) - return ~0; - - return base + ptr->elem; -} - -static void -dzn_descriptor_set_write_sampler_desc(struct dzn_descriptor_set *set, - const struct dzn_descriptor_set_ptr *ptr, - const struct dzn_sampler *sampler) -{ - D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; - uint32_t heap_offset = - dzn_descriptor_set_get_heap_offset(set, type, ptr, false); - - if (heap_offset != ~0) { - mtx_lock(&set->pool->defragment_lock); - dzn_descriptor_heap_write_sampler_desc(&set->pool->heaps[type], - set->heap_offsets[type] + heap_offset, - sampler); - mtx_unlock(&set->pool->defragment_lock); - } -} - -static uint32_t -dzn_descriptor_set_get_dynamic_buffer_idx(const struct dzn_descriptor_set *set, - const struct dzn_descriptor_set_ptr *ptr) -{ - if (ptr->binding == ~0) - return ~0; - - uint32_t base = set->layout->bindings[ptr->binding].dynamic_buffer_idx; - - if (base == ~0) - return ~0; - - return base + ptr->elem; -} - -static void -dzn_descriptor_set_write_dynamic_buffer_desc(struct dzn_descriptor_set *set, - const struct dzn_descriptor_set_ptr *ptr, - const struct dzn_buffer_desc *info) -{ - uint32_t dynamic_buffer_idx = - dzn_descriptor_set_get_dynamic_buffer_idx(set, ptr); - if (dynamic_buffer_idx == ~0) - return; - - assert(dynamic_buffer_idx < set->layout->dynamic_buffers.count); - set->dynamic_buffers[dynamic_buffer_idx] = *info; -} - -static VkDescriptorType -dzn_descriptor_set_get_desc_vk_type(const struct dzn_descriptor_set *set, - const struct dzn_descriptor_set_ptr *ptr) -{ - if (ptr->binding >= set->layout->binding_count) - return (VkDescriptorType)~0; - - return set->layout->bindings[ptr->binding].type; -} - -static void -dzn_descriptor_set_write_image_view_desc(struct dzn_descriptor_set *set, - const struct dzn_descriptor_set_ptr *ptr, - bool cube_as_2darray, - const struct dzn_image_view *iview) -{ - D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - uint32_t heap_offset = - dzn_descriptor_set_get_heap_offset(set, type, ptr, false); - if (heap_offset == ~0) - return; - - mtx_lock(&set->pool->defragment_lock); - dzn_descriptor_heap_write_image_view_desc(&set->pool->heaps[type], - set->heap_offsets[type] + heap_offset, - false, cube_as_2darray, - iview); - - VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr); - if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) { - heap_offset = - dzn_descriptor_set_get_heap_offset(set, type, ptr, true); - assert(heap_offset != ~0); - dzn_descriptor_heap_write_image_view_desc(&set->pool->heaps[type], - set->heap_offsets[type] + heap_offset, - true, cube_as_2darray, - iview); - } - mtx_unlock(&set->pool->defragment_lock); -} - -static void -dzn_descriptor_set_write_buffer_view_desc(struct dzn_descriptor_set *set, - const struct dzn_descriptor_set_ptr *ptr, - const struct dzn_buffer_view *bview) -{ - D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - uint32_t heap_offset = - dzn_descriptor_set_get_heap_offset(set, type, ptr, false); - if (heap_offset == ~0) - return; - - mtx_lock(&set->pool->defragment_lock); - dzn_descriptor_heap_write_buffer_view_desc(&set->pool->heaps[type], - set->heap_offsets[type] + heap_offset, - false, bview); - - VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr); - if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) { - heap_offset = - dzn_descriptor_set_get_heap_offset(set, type, ptr, true); - assert(heap_offset != ~0); - dzn_descriptor_heap_write_buffer_view_desc(&set->pool->heaps[type], - set->heap_offsets[type] + heap_offset, - true, bview); - } - mtx_unlock(&set->pool->defragment_lock); -} - -static void -dzn_descriptor_set_write_buffer_desc(struct dzn_descriptor_set *set, - const struct dzn_descriptor_set_ptr *ptr, - const struct dzn_buffer_desc *bdesc) -{ - D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - uint32_t heap_offset = - dzn_descriptor_set_get_heap_offset(set, type, ptr, false); - if (heap_offset == ~0) - return; - - mtx_lock(&set->pool->defragment_lock); - dzn_descriptor_heap_write_buffer_desc(&set->pool->heaps[type], - set->heap_offsets[type] + heap_offset, - false, bdesc); - - VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr); - if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) { - heap_offset = - dzn_descriptor_set_get_heap_offset(set, type, ptr, true); - assert(heap_offset != ~0); - dzn_descriptor_heap_write_buffer_desc(&set->pool->heaps[type], - set->heap_offsets[type] + heap_offset, - true, bdesc); - } - mtx_unlock(&set->pool->defragment_lock); -} - -static void -dzn_descriptor_set_init(struct dzn_descriptor_set *set, - struct dzn_device *device, - struct dzn_descriptor_pool *pool, - struct dzn_descriptor_set_layout *layout) -{ - vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET); - - set->pool = pool; - set->layout = layout; - - mtx_lock(&pool->defragment_lock); - dzn_foreach_pool_type(type) { - set->heap_offsets[type] = pool->free_offset[type]; - set->heap_sizes[type] = layout->range_desc_count[type]; - set->pool->free_offset[type] += layout->range_desc_count[type]; - } - mtx_unlock(&pool->defragment_lock); - - /* Pre-fill the immutable samplers */ - if (layout->immutable_sampler_count) { - for (uint32_t b = 0; b < layout->binding_count; b++) { - bool has_samplers = - layout->bindings[b].type == VK_DESCRIPTOR_TYPE_SAMPLER || - layout->bindings[b].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - - if (!has_samplers || layout->bindings[b].immutable_sampler_idx == ~0) - continue; - - struct dzn_descriptor_set_ptr ptr; - const struct dzn_sampler **sampler = - &layout->immutable_samplers[layout->bindings[b].immutable_sampler_idx]; - for (dzn_descriptor_set_ptr_init(set, &ptr, b, 0); - dzn_descriptor_set_ptr_is_valid(&ptr); - dzn_descriptor_set_ptr_move(set, &ptr, 1)) { - dzn_descriptor_set_write_sampler_desc(set, &ptr, *sampler); - sampler++; - } - } - } -} - -static void -dzn_descriptor_set_finish(struct dzn_descriptor_set *set) -{ - vk_object_base_finish(&set->base); - set->pool = NULL; - set->layout = NULL; -} - -static void -dzn_descriptor_pool_destroy(struct dzn_descriptor_pool *pool, - const VkAllocationCallbacks *pAllocator) -{ - if (!pool) - return; - - struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk); - - dzn_foreach_pool_type (type) { - if (pool->desc_count[type]) - dzn_descriptor_heap_finish(&pool->heaps[type]); - } - - vk_object_base_finish(&pool->base); - vk_free2(&device->vk.alloc, pAllocator, pool); -} - -static VkResult -dzn_descriptor_pool_create(struct dzn_device *device, - const VkDescriptorPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorPool *out) -{ - VK_MULTIALLOC(ma); - VK_MULTIALLOC_DECL(&ma, struct dzn_descriptor_pool, pool, 1); - VK_MULTIALLOC_DECL(&ma, struct dzn_descriptor_set, sets, pCreateInfo->maxSets); - - if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - pool->alloc = pAllocator ? *pAllocator : device->vk.alloc; - pool->sets = sets; - pool->set_count = pCreateInfo->maxSets; - mtx_init(&pool->defragment_lock, mtx_plain); - - vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_DESCRIPTOR_POOL); - - for (uint32_t p = 0; p < pCreateInfo->poolSizeCount; p++) { - VkDescriptorType type = pCreateInfo->pPoolSizes[p].type; - uint32_t num_desc = pCreateInfo->pPoolSizes[p].descriptorCount; - - switch (type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] += num_desc; - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc; - pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] += num_desc; - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - /* Reserve one UAV and one SRV slot for those. */ - pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc * 2; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - break; - default: - unreachable("Unsupported desc type"); - } - } - - dzn_foreach_pool_type (type) { - if (!pool->desc_count[type]) - continue; - - VkResult result = - dzn_descriptor_heap_init(&pool->heaps[type], device, type, pool->desc_count[type], false); - if (result != VK_SUCCESS) { - dzn_descriptor_pool_destroy(pool, pAllocator); - return result; - } - } - - *out = dzn_descriptor_pool_to_handle(pool); - return VK_SUCCESS; -} - -static VkResult -dzn_descriptor_pool_defragment_heap(struct dzn_descriptor_pool *pool, - D3D12_DESCRIPTOR_HEAP_TYPE type) -{ - struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk); - struct dzn_descriptor_heap new_heap; - - VkResult result = - dzn_descriptor_heap_init(&new_heap, device, type, - pool->heaps[type].desc_count, - false); - if (result != VK_SUCCESS) - return result; - - mtx_lock(&pool->defragment_lock); - uint32_t heap_offset = 0; - for (uint32_t s = 0; s < pool->set_count; s++) { - if (!pool->sets[s].layout) - continue; - - dzn_descriptor_heap_copy(&new_heap, heap_offset, - &pool->heaps[type], - pool->sets[s].heap_offsets[type], - pool->sets[s].heap_sizes[type]); - pool->sets[s].heap_offsets[type] = heap_offset; - heap_offset += pool->sets[s].heap_sizes[type]; - } - mtx_unlock(&pool->defragment_lock); - - dzn_descriptor_heap_finish(&pool->heaps[type]); - pool->heaps[type] = new_heap; - - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateDescriptorPool(VkDevice device, - const VkDescriptorPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorPool *pDescriptorPool) -{ - return dzn_descriptor_pool_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pDescriptorPool); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyDescriptorPool(VkDevice device, - VkDescriptorPool descriptorPool, - const VkAllocationCallbacks *pAllocator) -{ - dzn_descriptor_pool_destroy(dzn_descriptor_pool_from_handle(descriptorPool), - pAllocator); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_ResetDescriptorPool(VkDevice device, - VkDescriptorPool descriptorPool, - VkDescriptorPoolResetFlags flags) -{ - VK_FROM_HANDLE(dzn_descriptor_pool, pool, descriptorPool); - - for (uint32_t s = 0; s < pool->set_count; s++) - dzn_descriptor_set_finish(&pool->sets[s]); - - dzn_foreach_pool_type(type) - pool->free_offset[type] = 0; - - return VK_SUCCESS; -} - -void -dzn_descriptor_heap_pool_finish(struct dzn_descriptor_heap_pool *pool) -{ - list_splicetail(&pool->active_heaps, &pool->free_heaps); - list_for_each_entry_safe(struct dzn_descriptor_heap_pool_entry, entry, &pool->free_heaps, link) { - list_del(&entry->link); - dzn_descriptor_heap_finish(&entry->heap); - vk_free(pool->alloc, entry); - } -} - -void -dzn_descriptor_heap_pool_init(struct dzn_descriptor_heap_pool *pool, - struct dzn_device *device, - D3D12_DESCRIPTOR_HEAP_TYPE type, - bool shader_visible, - const VkAllocationCallbacks *alloc) -{ - assert(!shader_visible || - type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || - type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - - pool->alloc = alloc; - pool->type = type; - pool->shader_visible = shader_visible; - list_inithead(&pool->active_heaps); - list_inithead(&pool->free_heaps); - pool->offset = 0; - pool->desc_sz = ID3D12Device1_GetDescriptorHandleIncrementSize(device->dev, type); -} - -VkResult -dzn_descriptor_heap_pool_alloc_slots(struct dzn_descriptor_heap_pool *pool, - struct dzn_device *device, uint32_t desc_count, - struct dzn_descriptor_heap **heap, - uint32_t *first_slot) -{ - struct dzn_descriptor_heap *last_heap = - list_is_empty(&pool->active_heaps) ? - NULL : - &(list_last_entry(&pool->active_heaps, struct dzn_descriptor_heap_pool_entry, link)->heap); - uint32_t last_heap_desc_count = - last_heap ? last_heap->desc_count : 0; - - if (pool->offset + desc_count > last_heap_desc_count) { - uint32_t granularity = - (pool->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || - pool->type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) ? - 64 * 1024 : 4 * 1024; - uint32_t alloc_step = ALIGN_POT(desc_count * pool->desc_sz, granularity); - uint32_t heap_desc_count = MAX2(alloc_step / pool->desc_sz, 16); - - /* Maximum of 2048 samplers per heap when shader_visible is true. */ - if (pool->shader_visible && - pool->type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) { - assert(desc_count <= 2048); - heap_desc_count = MIN2(heap_desc_count, 2048); - } - - struct dzn_descriptor_heap_pool_entry *new_heap = NULL; - - list_for_each_entry_safe(struct dzn_descriptor_heap_pool_entry, entry, &pool->free_heaps, link) { - if (entry->heap.desc_count >= heap_desc_count) { - new_heap = entry; - list_del(&entry->link); - break; - } - } - - if (!new_heap) { - new_heap = (struct dzn_descriptor_heap_pool_entry *) - vk_zalloc(pool->alloc, sizeof(*new_heap), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!new_heap) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - VkResult result = - dzn_descriptor_heap_init(&new_heap->heap, device, pool->type, - heap_desc_count, pool->shader_visible); - if (result != VK_SUCCESS) { - vk_free(&device->vk.alloc, new_heap); - return result; - } - } - - list_addtail(&new_heap->link, &pool->active_heaps); - pool->offset = 0; - last_heap = &new_heap->heap; - } - - *heap = last_heap; - *first_slot = pool->offset; - pool->offset += desc_count; - return VK_SUCCESS; -} - -void -dzn_descriptor_heap_pool_reset(struct dzn_descriptor_heap_pool *pool) -{ - pool->offset = 0; - list_splicetail(&pool->active_heaps, &pool->free_heaps); - list_inithead(&pool->free_heaps); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_AllocateDescriptorSets(VkDevice dev, - const VkDescriptorSetAllocateInfo *pAllocateInfo, - VkDescriptorSet *pDescriptorSets) -{ - VK_FROM_HANDLE(dzn_descriptor_pool, pool, pAllocateInfo->descriptorPool); - VK_FROM_HANDLE(dzn_device, device, dev); - VkResult result; - unsigned i; - - if (pAllocateInfo->descriptorSetCount > (pool->set_count - pool->used_set_count)) - return VK_ERROR_OUT_OF_POOL_MEMORY; - - uint32_t set_idx = 0; - for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { - VK_FROM_HANDLE(dzn_descriptor_set_layout, layout, pAllocateInfo->pSetLayouts[i]); - - dzn_foreach_pool_type(type) { - if (pool->used_desc_count[type] + layout->range_desc_count[type] > pool->desc_count[type]) { - dzn_FreeDescriptorSets(dev, pAllocateInfo->descriptorPool, i, pDescriptorSets); - return vk_error(device, VK_ERROR_OUT_OF_POOL_MEMORY); - } - - if (pool->free_offset[type] + layout->range_desc_count[type] > pool->desc_count[type]) { - result = dzn_descriptor_pool_defragment_heap(pool, type); - if (result != VK_SUCCESS) { - dzn_FreeDescriptorSets(dev, pAllocateInfo->descriptorPool, i, pDescriptorSets); - return vk_error(device, VK_ERROR_FRAGMENTED_POOL); - } - } - } - - struct dzn_descriptor_set *set = NULL; - for (; set_idx < pool->set_count; set_idx++) { - if (!pool->sets[set_idx].layout) { - set = &pool->sets[set_idx]; - break; - } - } - - dzn_descriptor_set_init(set, device, pool, layout); - pDescriptorSets[i] = dzn_descriptor_set_to_handle(set); - } - - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_FreeDescriptorSets(VkDevice dev, - VkDescriptorPool descriptorPool, - uint32_t count, - const VkDescriptorSet *pDescriptorSets) -{ - VK_FROM_HANDLE(dzn_descriptor_pool, pool, descriptorPool); - VK_FROM_HANDLE(dzn_device, device, dev); - - for (uint32_t s = 0; s < count; s++) { - VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[s]); - - if (!set) - continue; - - assert(set->pool == pool); - - dzn_descriptor_set_finish(set); - } - - mtx_lock(&pool->defragment_lock); - dzn_foreach_pool_type(type) - pool->free_offset[type] = 0; - - for (uint32_t s = 0; s < pool->set_count; s++) { - const struct dzn_descriptor_set *set = &pool->sets[s]; - - if (set->layout) { - dzn_foreach_pool_type (type) { - pool->free_offset[type] = - MAX2(pool->free_offset[type], - set->heap_offsets[type] + - set->layout->range_desc_count[type]); - } - } - } - mtx_unlock(&pool->defragment_lock); - - return VK_SUCCESS; -} - -static void -dzn_descriptor_set_write(const VkWriteDescriptorSet *pDescriptorWrite) -{ - VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorWrite->dstSet); - - struct dzn_descriptor_set_ptr ptr; - - dzn_descriptor_set_ptr_init(set, &ptr, - pDescriptorWrite->dstBinding, - pDescriptorWrite->dstArrayElement); - uint32_t desc_count = pDescriptorWrite->descriptorCount; - - uint32_t d = 0; - bool cube_as_2darray = - pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - - switch (pDescriptorWrite->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; - dzn_descriptor_set_ptr_move(set, &ptr, 1)) { - assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); - const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d; - VK_FROM_HANDLE(dzn_sampler, sampler, pImageInfo->sampler); - - if (sampler) - dzn_descriptor_set_write_sampler_desc(set, &ptr, sampler); - - d++; - } - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; - dzn_descriptor_set_ptr_move(set, &ptr, 1)) { - assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); - const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d; - VK_FROM_HANDLE(dzn_sampler, sampler, pImageInfo->sampler); - VK_FROM_HANDLE(dzn_image_view, iview, pImageInfo->imageView); - - if (sampler) - dzn_descriptor_set_write_sampler_desc(set, &ptr, sampler); - - if (iview) - dzn_descriptor_set_write_image_view_desc(set, &ptr, cube_as_2darray, iview); - - d++; - } - break; - - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; - dzn_descriptor_set_ptr_move(set, &ptr, 1)) { - assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); - const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d; - VK_FROM_HANDLE(dzn_image_view, iview, pImageInfo->imageView); - - if (iview) - dzn_descriptor_set_write_image_view_desc(set, &ptr, cube_as_2darray, iview); - - d++; - } - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; - dzn_descriptor_set_ptr_move(set, &ptr, 1)) { - assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); - const VkDescriptorBufferInfo *binfo = &pDescriptorWrite->pBufferInfo[d]; - struct dzn_buffer_desc desc = { - pDescriptorWrite->descriptorType, - dzn_buffer_from_handle(binfo->buffer), - binfo->range, binfo->offset - }; - - if (desc.buffer) - dzn_descriptor_set_write_buffer_desc(set, &ptr, &desc); - - d++; - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; - dzn_descriptor_set_ptr_move(set, &ptr, 1)) { - assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); - const VkDescriptorBufferInfo *binfo = &pDescriptorWrite->pBufferInfo[d]; - struct dzn_buffer_desc desc = { - pDescriptorWrite->descriptorType, - dzn_buffer_from_handle(binfo->buffer), - binfo->range, binfo->offset - }; - - if (desc.buffer) - dzn_descriptor_set_write_dynamic_buffer_desc(set, &ptr, &desc); - - d++; - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; - dzn_descriptor_set_ptr_move(set, &ptr, 1)) { - assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); - VK_FROM_HANDLE(dzn_buffer_view, bview, pDescriptorWrite->pTexelBufferView[d]); - - if (bview) - dzn_descriptor_set_write_buffer_view_desc(set, &ptr, bview); - - d++; - } - break; - - default: - unreachable("invalid descriptor type"); - break; - } - - assert(d == pDescriptorWrite->descriptorCount); -} - -static void -dzn_descriptor_set_copy(const VkCopyDescriptorSet *pDescriptorCopy) -{ - VK_FROM_HANDLE(dzn_descriptor_set, src_set, pDescriptorCopy->srcSet); - VK_FROM_HANDLE(dzn_descriptor_set, dst_set, pDescriptorCopy->dstSet); - struct dzn_descriptor_set_ptr src_ptr, dst_ptr; - - dzn_descriptor_set_ptr_init(src_set, &src_ptr, - pDescriptorCopy->srcBinding, - pDescriptorCopy->srcArrayElement); - dzn_descriptor_set_ptr_init(dst_set, &dst_ptr, - pDescriptorCopy->dstBinding, - pDescriptorCopy->dstArrayElement); - - uint32_t copied_count = 0; - - while (dzn_descriptor_set_ptr_is_valid(&src_ptr) && - dzn_descriptor_set_ptr_is_valid(&dst_ptr) && - copied_count < pDescriptorCopy->descriptorCount) { - VkDescriptorType src_type = - dzn_descriptor_set_get_desc_vk_type(src_set, &src_ptr); - VkDescriptorType dst_type = - dzn_descriptor_set_get_desc_vk_type(dst_set, &dst_ptr); - - assert(src_type == dst_type); - uint32_t count = - MIN2(dzn_descriptor_set_remaining_descs_in_binding(src_set, &src_ptr), - dzn_descriptor_set_remaining_descs_in_binding(dst_set, &dst_ptr)); - - if (src_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || - src_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { - uint32_t src_idx = - dzn_descriptor_set_get_dynamic_buffer_idx(src_set, &src_ptr); - uint32_t dst_idx = - dzn_descriptor_set_get_dynamic_buffer_idx(dst_set, &dst_ptr); - - memcpy(&dst_set->dynamic_buffers[dst_idx], - &src_set->dynamic_buffers[src_idx], - sizeof(*dst_set->dynamic_buffers) * count); - } else { - dzn_foreach_pool_type(type) { - uint32_t src_heap_offset = - dzn_descriptor_set_get_heap_offset(src_set, type, &src_ptr, false); - uint32_t dst_heap_offset = - dzn_descriptor_set_get_heap_offset(dst_set, type, &dst_ptr, false); - - if (src_heap_offset == ~0) { - assert(dst_heap_offset == ~0); - continue; - } - - mtx_lock(&src_set->pool->defragment_lock); - mtx_lock(&dst_set->pool->defragment_lock); - dzn_descriptor_heap_copy(&dst_set->pool->heaps[type], - dst_set->heap_offsets[type] + dst_heap_offset, - &src_set->pool->heaps[type], - src_set->heap_offsets[type] + src_heap_offset, - count); - - if (dzn_descriptor_type_depends_on_shader_usage(src_type)) { - src_heap_offset = - dzn_descriptor_set_get_heap_offset(src_set, type, &src_ptr, true); - dst_heap_offset = - dzn_descriptor_set_get_heap_offset(dst_set, type, &dst_ptr, true); - assert(src_heap_offset != ~0); - assert(dst_heap_offset != ~0); - dzn_descriptor_heap_copy(&dst_set->pool->heaps[type], - dst_set->heap_offsets[type] + dst_heap_offset, - &src_set->pool->heaps[type], - src_set->heap_offsets[type] + src_heap_offset, - count); - } - mtx_unlock(&dst_set->pool->defragment_lock); - mtx_unlock(&src_set->pool->defragment_lock); - } - } - - dzn_descriptor_set_ptr_move(src_set, &src_ptr, count); - dzn_descriptor_set_ptr_move(dst_set, &dst_ptr, count); - copied_count += count; - } - - assert(copied_count == pDescriptorCopy->descriptorCount); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_UpdateDescriptorSets(VkDevice _device, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites, - uint32_t descriptorCopyCount, - const VkCopyDescriptorSet *pDescriptorCopies) -{ - VK_FROM_HANDLE(dzn_device, dev, _device); - - for (unsigned i = 0; i < descriptorWriteCount; i++) - dzn_descriptor_set_write(&pDescriptorWrites[i]); - - for (unsigned i = 0; i < descriptorCopyCount; i++) - dzn_descriptor_set_copy(&pDescriptorCopies[i]); -} diff --git a/src/microsoft/vulkan/dzn_device.c b/src/microsoft/vulkan/dzn_device.c new file mode 100644 index 00000000000..480ae263f82 --- /dev/null +++ b/src/microsoft/vulkan/dzn_device.c @@ -0,0 +1,2650 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_cmd_enqueue_entrypoints.h" +#include "vk_debug_report.h" +#include "vk_format.h" +#include "vk_sync_dummy.h" +#include "vk_util.h" + +#include "util/debug.h" +#include "util/macros.h" + +#include "glsl_types.h" + +#include "dxil_validator.h" + +#include +#include +#include + +#include + +#define CINTERFACE +#include +#undef CINTERFACE + +#if defined(VK_USE_PLATFORM_WIN32_KHR) || \ + defined(VK_USE_PLATFORM_DISPLAY_KHR) +#define DZN_USE_WSI_PLATFORM +#endif + +#define DZN_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION) + +#define MAX_TIER2_MEMORY_TYPES 3 + +static const struct vk_instance_extension_table instance_extensions = { + .KHR_get_physical_device_properties2 = true, +#ifdef DZN_USE_WSI_PLATFORM + .KHR_surface = true, +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + .KHR_win32_surface = true, +#endif +#ifdef VK_USE_PLATFORM_DISPLAY_KHR + .KHR_display = true, + .KHR_get_display_properties2 = true, + .EXT_direct_mode_display = true, + .EXT_display_surface_counter = true, +#endif + .EXT_debug_report = true, + .EXT_debug_utils = true, +}; + +static void +dzn_physical_device_get_extensions(struct dzn_physical_device *pdev) +{ + pdev->vk.supported_extensions = (struct vk_device_extension_table) { +#ifdef DZN_USE_WSI_PLATFORM + .KHR_swapchain = true, +#endif + }; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_EnumerateInstanceExtensionProperties(const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + /* We don't support any layers */ + if (pLayerName) + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); + + return vk_enumerate_instance_extension_properties( + &instance_extensions, pPropertyCount, pProperties); +} + +static const struct debug_control dzn_debug_options[] = { + { "sync", DZN_DEBUG_SYNC }, + { "nir", DZN_DEBUG_NIR }, + { "dxil", DZN_DEBUG_DXIL }, + { "warp", DZN_DEBUG_WARP }, + { "internal", DZN_DEBUG_INTERNAL }, + { "signature", DZN_DEBUG_SIG }, + { "gbv", DZN_DEBUG_GBV }, + { "d3d12", DZN_DEBUG_D3D12 }, + { NULL, 0 } +}; + +static void +dzn_physical_device_destroy(struct dzn_physical_device *pdev) +{ + struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk); + + list_del(&pdev->link); + + if (pdev->dev) + ID3D12Device1_Release(pdev->dev); + + if (pdev->adapter) + IDXGIAdapter1_Release(pdev->adapter); + + dzn_wsi_finish(pdev); + vk_physical_device_finish(&pdev->vk); + vk_free(&instance->vk.alloc, pdev); +} + +static void +dzn_instance_destroy(struct dzn_instance *instance, const VkAllocationCallbacks *alloc) +{ + if (!instance) + return; + + if (instance->dxil_validator) + dxil_destroy_validator(instance->dxil_validator); + + list_for_each_entry_safe(struct dzn_physical_device, pdev, + &instance->physical_devices, link) { + dzn_physical_device_destroy(pdev); + } + + vk_instance_finish(&instance->vk); + vk_free2(vk_default_allocator(), alloc, instance); +} + +static VkResult +dzn_instance_create(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *out) +{ + struct dzn_instance *instance = (struct dzn_instance *) + vk_zalloc2(vk_default_allocator(), pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct vk_instance_dispatch_table dispatch_table; + vk_instance_dispatch_table_from_entrypoints(&dispatch_table, + &dzn_instance_entrypoints, + true); + + VkResult result = + vk_instance_init(&instance->vk, &instance_extensions, + &dispatch_table, pCreateInfo, + pAllocator ? pAllocator : vk_default_allocator()); + if (result != VK_SUCCESS) { + vk_free2(vk_default_allocator(), pAllocator, instance); + return result; + } + + list_inithead(&instance->physical_devices); + instance->physical_devices_enumerated = false; + instance->debug_flags = + parse_debug_string(getenv("DZN_DEBUG"), dzn_debug_options); + + instance->dxil_validator = dxil_create_validator(NULL); + instance->d3d12.serialize_root_sig = d3d12_get_serialize_root_sig(); + + if (!instance->dxil_validator || + !instance->d3d12.serialize_root_sig) { + dzn_instance_destroy(instance, pAllocator); + return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); + } + + if (instance->debug_flags & DZN_DEBUG_D3D12) + d3d12_enable_debug_layer(); + if (instance->debug_flags & DZN_DEBUG_GBV) + d3d12_enable_gpu_validation(); + + *out = dzn_instance_to_handle(instance); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance) +{ + return dzn_instance_create(pCreateInfo, pAllocator, pInstance); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyInstance(VkInstance instance, + const VkAllocationCallbacks *pAllocator) +{ + dzn_instance_destroy(dzn_instance_from_handle(instance), pAllocator); +} + +static VkResult +dzn_physical_device_create(struct dzn_instance *instance, + IDXGIAdapter1 *adapter, + const DXGI_ADAPTER_DESC1 *adapter_desc) +{ + struct dzn_physical_device *pdev = (struct dzn_physical_device *) + vk_zalloc(&instance->vk.alloc, sizeof(*pdev), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + + if (!pdev) + return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct vk_physical_device_dispatch_table dispatch_table; + vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, + &dzn_physical_device_entrypoints, + true); + vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, + &wsi_physical_device_entrypoints, + false); + + VkResult result = + vk_physical_device_init(&pdev->vk, &instance->vk, + NULL, /* We set up extensions later */ + &dispatch_table); + if (result != VK_SUCCESS) { + vk_free(&instance->vk.alloc, pdev); + return result; + } + + mtx_init(&pdev->dev_lock, mtx_plain); + pdev->adapter_desc = *adapter_desc; + pdev->adapter = adapter; + IDXGIAdapter1_AddRef(adapter); + list_addtail(&pdev->link, &instance->physical_devices); + + vk_warn_non_conformant_implementation("dzn"); + + /* TODO: correct UUIDs */ + memset(pdev->pipeline_cache_uuid, 0, VK_UUID_SIZE); + memset(pdev->driver_uuid, 0, VK_UUID_SIZE); + memset(pdev->device_uuid, 0, VK_UUID_SIZE); + + /* TODO: something something queue families */ + + result = dzn_wsi_init(pdev); + if (result != VK_SUCCESS) { + dzn_physical_device_destroy(pdev); + return result; + } + + dzn_physical_device_get_extensions(pdev); + + uint32_t num_sync_types = 0; + pdev->sync_types[num_sync_types++] = &dzn_sync_type; + pdev->sync_types[num_sync_types++] = &vk_sync_dummy_type; + pdev->sync_types[num_sync_types] = NULL; + assert(num_sync_types <= MAX_SYNC_TYPES); + pdev->vk.supported_sync_types = pdev->sync_types; + + return VK_SUCCESS; +} + +static void +dzn_physical_device_cache_caps(struct dzn_physical_device *pdev) +{ + D3D_FEATURE_LEVEL checklist[] = { + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_2, + }; + + D3D12_FEATURE_DATA_FEATURE_LEVELS levels = { + .NumFeatureLevels = ARRAY_SIZE(checklist), + .pFeatureLevelsRequested = checklist, + }; + + ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_FEATURE_LEVELS, &levels, sizeof(levels)); + pdev->feature_level = levels.MaxSupportedFeatureLevel; + + ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_ARCHITECTURE1, &pdev->architecture, sizeof(pdev->architecture)); + ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS, &pdev->options, sizeof(pdev->options)); + + pdev->queue_families[pdev->queue_family_count++] = (struct dzn_queue_family) { + .props = { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 64, + .minImageTransferGranularity = { 0, 0, 0 }, + }, + .desc = { + .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, + }, + }; + + pdev->queue_families[pdev->queue_family_count++] = (struct dzn_queue_family) { + .props = { + .queueFlags = VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 8, + .timestampValidBits = 64, + .minImageTransferGranularity = { 0, 0, 0 }, + }, + .desc = { + .Type = D3D12_COMMAND_LIST_TYPE_COMPUTE, + }, + }; + + pdev->queue_families[pdev->queue_family_count++] = (struct dzn_queue_family) { + .props = { + .queueFlags = VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 0, + .minImageTransferGranularity = { 0, 0, 0 }, + }, + .desc = { + .Type = D3D12_COMMAND_LIST_TYPE_COPY, + }, + }; + + assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families)); + + D3D12_COMMAND_QUEUE_DESC queue_desc = { + .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, + .Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, + .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, + .NodeMask = 0, + }; + + ID3D12CommandQueue *cmdqueue; + ID3D12Device1_CreateCommandQueue(pdev->dev, &queue_desc, + &IID_ID3D12CommandQueue, + &cmdqueue); + + uint64_t ts_freq; + ID3D12CommandQueue_GetTimestampFrequency(cmdqueue, &ts_freq); + pdev->timestamp_period = 1000000000.0f / ts_freq; + ID3D12CommandQueue_Release(cmdqueue); +} + +static void +dzn_physical_device_init_memory(struct dzn_physical_device *pdev) +{ + VkPhysicalDeviceMemoryProperties *mem = &pdev->memory; + const DXGI_ADAPTER_DESC1 *desc = &pdev->adapter_desc; + + mem->memoryHeapCount = 1; + mem->memoryHeaps[0] = (VkMemoryHeap) { + .size = desc->SharedSystemMemory, + .flags = 0, + }; + + mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + + if (!pdev->architecture.UMA) { + mem->memoryHeaps[mem->memoryHeapCount++] = (VkMemoryHeap) { + .size = desc->DedicatedVideoMemory, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + }; + mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .heapIndex = mem->memoryHeapCount - 1, + }; + } else { + mem->memoryHeaps[0].flags |= VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; + mem->memoryTypes[0].propertyFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + mem->memoryTypes[1].propertyFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + + assert(mem->memoryTypeCount <= MAX_TIER2_MEMORY_TYPES); + + if (pdev->options.ResourceHeapTier == D3D12_RESOURCE_HEAP_TIER_1) { + unsigned oldMemoryTypeCount = mem->memoryTypeCount; + VkMemoryType oldMemoryTypes[MAX_TIER2_MEMORY_TYPES]; + + memcpy(oldMemoryTypes, mem->memoryTypes, oldMemoryTypeCount * sizeof(VkMemoryType)); + + mem->memoryTypeCount = 0; + for (unsigned oldMemoryTypeIdx = 0; oldMemoryTypeIdx < oldMemoryTypeCount; ++oldMemoryTypeIdx) { + D3D12_HEAP_FLAGS flags[] = { + D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS, + D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES, + /* Note: Vulkan requires *all* images to come from the same memory type as long as + * the tiling property (and a few other misc properties) are the same. So, this + * non-RT/DS texture flag will only be used for TILING_LINEAR textures, which + * can't be render targets. + */ + D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES + }; + for (int i = 0; i < ARRAY_SIZE(flags); ++i) { + D3D12_HEAP_FLAGS flag = flags[i]; + pdev->heap_flags_for_mem_type[mem->memoryTypeCount] = flag; + mem->memoryTypes[mem->memoryTypeCount] = oldMemoryTypes[oldMemoryTypeIdx]; + mem->memoryTypeCount++; + } + } + } +} + +static D3D12_HEAP_FLAGS +dzn_physical_device_get_heap_flags_for_mem_type(const struct dzn_physical_device *pdev, + uint32_t mem_type) +{ + return pdev->heap_flags_for_mem_type[mem_type]; +} + +uint32_t +dzn_physical_device_get_mem_type_mask_for_resource(const struct dzn_physical_device *pdev, + const D3D12_RESOURCE_DESC *desc) +{ + if (pdev->options.ResourceHeapTier > D3D12_RESOURCE_HEAP_TIER_1) + return (1u << pdev->memory.memoryTypeCount) - 1; + + D3D12_HEAP_FLAGS deny_flag; + if (desc->Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + deny_flag = D3D12_HEAP_FLAG_DENY_BUFFERS; + else if (desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + deny_flag = D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES; + else + deny_flag = D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; + + uint32_t mask = 0; + for (unsigned i = 0; i < pdev->memory.memoryTypeCount; ++i) { + if ((pdev->heap_flags_for_mem_type[i] & deny_flag) == D3D12_HEAP_FLAG_NONE) + mask |= (1 << i); + } + return mask; +} + +static uint32_t +dzn_physical_device_get_max_mip_level(bool is_3d) +{ + return is_3d ? 11 : 14; +} + +static uint32_t +dzn_physical_device_get_max_extent(bool is_3d) +{ + uint32_t max_mip = dzn_physical_device_get_max_mip_level(is_3d); + + return 1 << max_mip; +} + +static uint32_t +dzn_physical_device_get_max_array_layers() +{ + return dzn_physical_device_get_max_extent(false); +} + +static ID3D12Device1 * +dzn_physical_device_get_d3d12_dev(struct dzn_physical_device *pdev) +{ + struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk); + + mtx_lock(&pdev->dev_lock); + if (!pdev->dev) { + pdev->dev = d3d12_create_device(pdev->adapter, !instance->dxil_validator); + + dzn_physical_device_cache_caps(pdev); + dzn_physical_device_init_memory(pdev); + } + mtx_unlock(&pdev->dev_lock); + + return pdev->dev; +} + +D3D12_FEATURE_DATA_FORMAT_SUPPORT +dzn_physical_device_get_format_support(struct dzn_physical_device *pdev, + VkFormat format) +{ + VkImageUsageFlags usage = + vk_format_is_depth_or_stencil(format) ? + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : 0; + VkImageAspectFlags aspects = 0; + + if (vk_format_has_depth(format)) + aspects = VK_IMAGE_ASPECT_DEPTH_BIT; + if (vk_format_has_stencil(format)) + aspects = VK_IMAGE_ASPECT_STENCIL_BIT; + + D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = { + .Format = dzn_image_get_dxgi_format(format, usage, aspects), + }; + + ID3D12Device1 *dev = dzn_physical_device_get_d3d12_dev(pdev); + HRESULT hres = + ID3D12Device1_CheckFeatureSupport(dev, D3D12_FEATURE_FORMAT_SUPPORT, + &dfmt_info, sizeof(dfmt_info)); + assert(!FAILED(hres)); + + if (usage != VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + return dfmt_info; + + /* Depth/stencil resources have different format when they're accessed + * as textures, query the capabilities for this format too. + */ + dzn_foreach_aspect(aspect, aspects) { + D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info2 = { + .Format = dzn_image_get_dxgi_format(format, 0, aspect), + }; + + hres = ID3D12Device1_CheckFeatureSupport(dev, D3D12_FEATURE_FORMAT_SUPPORT, + &dfmt_info2, sizeof(dfmt_info2)); + assert(!FAILED(hres)); + +#define DS_SRV_FORMAT_SUPPORT1_MASK \ + (D3D12_FORMAT_SUPPORT1_SHADER_LOAD | \ + D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE | \ + D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON | \ + D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_MONO_TEXT | \ + D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE | \ + D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD | \ + D3D12_FORMAT_SUPPORT1_SHADER_GATHER | \ + D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW | \ + D3D12_FORMAT_SUPPORT1_SHADER_GATHER_COMPARISON) + + dfmt_info.Support1 |= dfmt_info2.Support1 & DS_SRV_FORMAT_SUPPORT1_MASK; + dfmt_info.Support2 |= dfmt_info2.Support2; + } + + return dfmt_info; +} + +static void +dzn_physical_device_get_format_properties(struct dzn_physical_device *pdev, + VkFormat format, + VkFormatProperties2 *properties) +{ + D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = + dzn_physical_device_get_format_support(pdev, format); + VkFormatProperties *base_props = &properties->formatProperties; + + vk_foreach_struct(ext, properties->pNext) { + dzn_debug_ignored_stype(ext->sType); + } + + if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN) { + *base_props = (VkFormatProperties) { 0 }; + return; + } + + ID3D12Device1 *dev = dzn_physical_device_get_d3d12_dev(pdev); + + *base_props = (VkFormatProperties) { + .linearTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, + .optimalTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, + .bufferFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, + }; + + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER) + base_props->bufferFeatures |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + +#define TEX_FLAGS (D3D12_FORMAT_SUPPORT1_TEXTURE1D | \ + D3D12_FORMAT_SUPPORT1_TEXTURE2D | \ + D3D12_FORMAT_SUPPORT1_TEXTURE3D | \ + D3D12_FORMAT_SUPPORT1_TEXTURECUBE) + if (dfmt_info.Support1 & TEX_FLAGS) { + base_props->optimalTilingFeatures |= + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT; + } + + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) { + base_props->optimalTilingFeatures |= + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } + + if ((dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) && + (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW)) { + base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + } + +#define ATOMIC_FLAGS (D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD | \ + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS | \ + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE | \ + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE | \ + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX | \ + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX) + if ((dfmt_info.Support2 & ATOMIC_FLAGS) == ATOMIC_FLAGS) { + base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; + base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + } + + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) + base_props->bufferFeatures |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + + /* Color/depth/stencil attachment cap implies input attachement cap, and input + * attachment loads are lowered to texture loads in dozen, hence the requirement + * to have shader-load support. + */ + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) { + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) { + base_props->optimalTilingFeatures |= + VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BLENDABLE) + base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) { + base_props->optimalTilingFeatures |= + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + } + + /* B4G4R4A4 support is required, but d3d12 doesn't support it. We map this + * format to R4G4B4A4 and adjust the SRV component-mapping to fake + * B4G4R4A4, but that forces us to limit the usage to sampling, which, + * luckily, is exactly what we need to support the required features. + */ + if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { + VkFormatFeatureFlags bgra4_req_features = + VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + base_props->optimalTilingFeatures &= bgra4_req_features; + base_props->bufferFeatures = + VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; + } + + /* depth/stencil format shouldn't advertise buffer features */ + if (vk_format_is_depth_or_stencil(format)) + base_props->bufferFeatures = 0; +} + +static VkResult +dzn_physical_device_get_image_format_properties(struct dzn_physical_device *pdev, + const VkPhysicalDeviceImageFormatInfo2 *info, + VkImageFormatProperties2 *properties) +{ + const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; + VkExternalImageFormatProperties *external_props = NULL; + + *properties = (VkImageFormatProperties2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + }; + + /* Extract input structs */ + vk_foreach_struct_const(s, info->pNext) { + switch (s->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: + external_info = (const VkPhysicalDeviceExternalImageFormatInfo *)s; + break; + default: + dzn_debug_ignored_stype(s->sType); + break; + } + } + + assert(info->tiling == VK_IMAGE_TILING_OPTIMAL || info->tiling == VK_IMAGE_TILING_LINEAR); + + /* Extract output structs */ + vk_foreach_struct(s, properties->pNext) { + switch (s->sType) { + case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: + external_props = (VkExternalImageFormatProperties *)s; + break; + default: + dzn_debug_ignored_stype(s->sType); + break; + } + } + + assert((external_props != NULL) == (external_info != NULL)); + + /* TODO: support image import */ + if (external_info && external_info->handleType != 0) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if (info->tiling != VK_IMAGE_TILING_OPTIMAL && + (info->usage & ~(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT))) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if (info->tiling != VK_IMAGE_TILING_OPTIMAL && + vk_format_is_depth_or_stencil(info->format)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = + dzn_physical_device_get_format_support(pdev, info->format); + if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + bool is_bgra4 = info->format == VK_FORMAT_B4G4R4A4_UNORM_PACK16; + ID3D12Device1 *dev = dzn_physical_device_get_d3d12_dev(pdev); + + if ((info->type == VK_IMAGE_TYPE_1D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE1D)) || + (info->type == VK_IMAGE_TYPE_2D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D)) || + (info->type == VK_IMAGE_TYPE_3D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE3D)) || + ((info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURECUBE))) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) && + !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) && + (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) || is_bgra4)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && + (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) || is_bgra4)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && + (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) || is_bgra4)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((info->usage & VK_IMAGE_USAGE_STORAGE_BIT) && + (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) || is_bgra4)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if (info->type == VK_IMAGE_TYPE_3D && info->tiling != VK_IMAGE_TILING_OPTIMAL) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + bool is_3d = info->type == VK_IMAGE_TYPE_3D; + uint32_t max_extent = dzn_physical_device_get_max_extent(is_3d); + + if (info->tiling == VK_IMAGE_TILING_OPTIMAL && + dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_MIP) + properties->imageFormatProperties.maxMipLevels = dzn_physical_device_get_max_mip_level(is_3d) + 1; + else + properties->imageFormatProperties.maxMipLevels = 1; + + if (info->tiling == VK_IMAGE_TILING_OPTIMAL && info->type != VK_IMAGE_TYPE_3D) + properties->imageFormatProperties.maxArrayLayers = dzn_physical_device_get_max_array_layers(); + else + properties->imageFormatProperties.maxArrayLayers = 1; + + switch (info->type) { + case VK_IMAGE_TYPE_1D: + properties->imageFormatProperties.maxExtent.width = max_extent; + properties->imageFormatProperties.maxExtent.height = 1; + properties->imageFormatProperties.maxExtent.depth = 1; + break; + case VK_IMAGE_TYPE_2D: + properties->imageFormatProperties.maxExtent.width = max_extent; + properties->imageFormatProperties.maxExtent.height = max_extent; + properties->imageFormatProperties.maxExtent.depth = 1; + break; + case VK_IMAGE_TYPE_3D: + properties->imageFormatProperties.maxExtent.width = max_extent; + properties->imageFormatProperties.maxExtent.height = max_extent; + properties->imageFormatProperties.maxExtent.depth = max_extent; + break; + default: + unreachable("bad VkImageType"); + } + + /* From the Vulkan 1.0 spec, section 34.1.1. Supported Sample Counts: + * + * sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of the + * following conditions is true: + * + * - tiling is VK_IMAGE_TILING_LINEAR + * - type is not VK_IMAGE_TYPE_2D + * - flags contains VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT + * - neither the VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT flag nor the + * VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT flag in + * VkFormatProperties::optimalTilingFeatures returned by + * vkGetPhysicalDeviceFormatProperties is set. + * + * D3D12 has a few more constraints: + * - no UAVs on multisample resources + */ + bool rt_or_ds_cap = + dfmt_info.Support1 & + (D3D12_FORMAT_SUPPORT1_RENDER_TARGET | D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL); + + properties->imageFormatProperties.sampleCounts = VK_SAMPLE_COUNT_1_BIT; + if (info->tiling != VK_IMAGE_TILING_LINEAR && + info->type == VK_IMAGE_TYPE_2D && + !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + rt_or_ds_cap && !is_bgra4 && + !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + for (uint32_t s = VK_SAMPLE_COUNT_2_BIT; s < VK_SAMPLE_COUNT_64_BIT; s <<= 1) { + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS ms_info = { + .Format = dfmt_info.Format, + .SampleCount = s, + }; + + HRESULT hres = + ID3D12Device1_CheckFeatureSupport(dev, D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + &ms_info, sizeof(ms_info)); + if (!FAILED(hres) && ms_info.NumQualityLevels > 0) + properties->imageFormatProperties.sampleCounts |= s; + } + } + + /* TODO: set correct value here */ + properties->imageFormatProperties.maxResourceSize = UINT32_MAX; + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties2 *pFormatProperties) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + + dzn_physical_device_get_format_properties(pdev, format, pFormatProperties); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceImageFormatInfo2 *info, + VkImageFormatProperties2 *props) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + + return dzn_physical_device_get_image_format_properties(pdev, info, props); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags createFlags, + VkImageFormatProperties *pImageFormatProperties) +{ + const VkPhysicalDeviceImageFormatInfo2 info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .format = format, + .type = type, + .tiling = tiling, + .usage = usage, + .flags = createFlags, + }; + + VkImageFormatProperties2 props = { 0 }; + + VkResult result = + dzn_GetPhysicalDeviceImageFormatProperties2(physicalDevice, &info, &props); + *pImageFormatProperties = props.imageFormatProperties; + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkSampleCountFlagBits samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t *pPropertyCount, + VkSparseImageFormatProperties *pProperties) +{ + *pPropertyCount = 0; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, + uint32_t *pPropertyCount, + VkSparseImageFormatProperties2 *pProperties) +{ + *pPropertyCount = 0; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, + VkExternalBufferProperties *pExternalBufferProperties) +{ + pExternalBufferProperties->externalMemoryProperties = + (VkExternalMemoryProperties) { + .compatibleHandleTypes = (VkExternalMemoryHandleTypeFlags)pExternalBufferInfo->handleType, + }; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_EnumeratePhysicalDevices(VkInstance inst, + uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices) +{ + VK_FROM_HANDLE(dzn_instance, instance, inst); + + if (!instance->physical_devices_enumerated) { + IDXGIFactory4 *factory = dxgi_get_factory(false); + IDXGIAdapter1 *adapter = NULL; + for (UINT i = 0; SUCCEEDED(IDXGIFactory4_EnumAdapters1(factory, i, &adapter)); ++i) { + DXGI_ADAPTER_DESC1 desc; + IDXGIAdapter1_GetDesc1(adapter, &desc); + if (instance->debug_flags & DZN_DEBUG_WARP) { + if ((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) == 0) { + IDXGIAdapter1_Release(adapter); + continue; + } + } + + VkResult result = + dzn_physical_device_create(instance, adapter, &desc); + + IDXGIAdapter1_Release(adapter); + if (result != VK_SUCCESS) { + IDXGIFactory4_Release(factory); + return result; + } + } + IDXGIFactory4_Release(factory); + } + + VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, + pPhysicalDeviceCount); + + list_for_each_entry(struct dzn_physical_device, pdev, &instance->physical_devices, link) { + vk_outarray_append_typed(VkPhysicalDevice, &out, i) + *i = dzn_physical_device_to_handle(pdev); + } + + instance->physical_devices_enumerated = true; + return vk_outarray_status(&out); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_EnumerateInstanceVersion(uint32_t *pApiVersion) +{ + *pApiVersion = DZN_API_VERSION; + return VK_SUCCESS; +} + +static bool +dzn_physical_device_supports_compressed_format(struct dzn_physical_device *pdev, + const VkFormat *formats, + uint32_t format_count) +{ +#define REQUIRED_COMPRESSED_CAPS \ + (VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | \ + VK_FORMAT_FEATURE_BLIT_SRC_BIT | \ + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT) + for (uint32_t i = 0; i < format_count; i++) { + VkFormatProperties2 props = { 0 }; + dzn_physical_device_get_format_properties(pdev, formats[i], &props); + if ((props.formatProperties.optimalTilingFeatures & REQUIRED_COMPRESSED_CAPS) != REQUIRED_COMPRESSED_CAPS) + return false; + } + + return true; +} + +static bool +dzn_physical_device_supports_bc(struct dzn_physical_device *pdev) +{ + static const VkFormat formats[] = { + VK_FORMAT_BC1_RGB_UNORM_BLOCK, + VK_FORMAT_BC1_RGB_SRGB_BLOCK, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC6H_SFLOAT_BLOCK, + VK_FORMAT_BC7_UNORM_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, + }; + + return dzn_physical_device_supports_compressed_format(pdev, formats, ARRAY_SIZE(formats)); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2 *pFeatures) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + + pFeatures->features = (VkPhysicalDeviceFeatures) { + .robustBufferAccess = true, /* This feature is mandatory */ + .fullDrawIndexUint32 = false, + .imageCubeArray = true, + .independentBlend = false, + .geometryShader = false, + .tessellationShader = false, + .sampleRateShading = true, + .dualSrcBlend = false, + .logicOp = false, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = false, + .depthBiasClamp = false, + .fillModeNonSolid = false, + .depthBounds = false, + .wideLines = false, + .largePoints = false, + .alphaToOne = false, + .multiViewport = false, + .samplerAnisotropy = false, + .textureCompressionETC2 = false, + .textureCompressionASTC_LDR = false, + .textureCompressionBC = dzn_physical_device_supports_bc(pdev), + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = true, + .vertexPipelineStoresAndAtomics = true, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = false, + .shaderImageGatherExtended = false, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderStorageImageReadWithoutFormat = false, + .shaderStorageImageWriteWithoutFormat = false, + .shaderUniformBufferArrayDynamicIndexing = false, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .shaderResourceResidency = false, + .shaderResourceMinLod = false, + .sparseBinding = false, + .sparseResidencyBuffer = false, + .sparseResidencyImage2D = false, + .sparseResidencyImage3D = false, + .sparseResidency2Samples = false, + .sparseResidency4Samples = false, + .sparseResidency8Samples = false, + .sparseResidency16Samples = false, + .sparseResidencyAliased = false, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; + + + vk_foreach_struct(ext, pFeatures->pNext) { + dzn_debug_ignored_stype(ext->sType); + } +} + + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +dzn_GetInstanceProcAddr(VkInstance _instance, + const char *pName) +{ + VK_FROM_HANDLE(dzn_instance, instance, _instance); + return vk_instance_get_proc_addr(&instance->vk, + &dzn_instance_entrypoints, + pName); +} + +/* Windows will use a dll definition file to avoid build errors. */ +#ifdef _WIN32 +#undef PUBLIC +#define PUBLIC +#endif + +/* With version 1+ of the loader interface the ICD should expose + * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps. + */ +PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetInstanceProcAddr(VkInstance instance, + const char *pName); + +PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetInstanceProcAddr(VkInstance instance, + const char *pName) +{ + return dzn_GetInstanceProcAddr(instance, pName); +} + +/* With version 4+ of the loader interface the ICD should expose + * vk_icdGetPhysicalDeviceProcAddr() + */ +PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, + const char* pName); + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, + const char* pName) +{ + VK_FROM_HANDLE(dzn_instance, instance, _instance); + return vk_instance_get_physical_device_proc_addr(&instance->vk, pName); +} + +/* vk_icd.h does not declare this function, so we declare it here to + * suppress Wmissing-prototypes. + */ +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion); + +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion) +{ + /* For the full details on loader interface versioning, see + * . + * What follows is a condensed summary, to help you navigate the large and + * confusing official doc. + * + * - Loader interface v0 is incompatible with later versions. We don't + * support it. + * + * - In loader interface v1: + * - The first ICD entrypoint called by the loader is + * vk_icdGetInstanceProcAddr(). The ICD must statically expose this + * entrypoint. + * - The ICD must statically expose no other Vulkan symbol unless it is + * linked with -Bsymbolic. + * - Each dispatchable Vulkan handle created by the ICD must be + * a pointer to a struct whose first member is VK_LOADER_DATA. The + * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. + * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and + * vkDestroySurfaceKHR(). The ICD must be capable of working with + * such loader-managed surfaces. + * + * - Loader interface v2 differs from v1 in: + * - The first ICD entrypoint called by the loader is + * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must + * statically expose this entrypoint. + * + * - Loader interface v3 differs from v2 in: + * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), + * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, + * because the loader no longer does so. + * + * - Loader interface v4 differs from v3 in: + * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr(). + */ + *pSupportedVersion = MIN2(*pSupportedVersion, 4u); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties2 *pProperties) +{ + VK_FROM_HANDLE(dzn_physical_device, pdevice, physicalDevice); + + /* minimum from the spec */ + const VkSampleCountFlags supported_sample_counts = + VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; + + /* FIXME: this is mostly bunk for now */ + VkPhysicalDeviceLimits limits = { + + /* TODO: support older feature levels */ + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 11), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 11), + + /* from here on, we simply use the minimum values from the spec for now */ + .maxTexelBufferElements = 65536, + .maxUniformBufferRange = 16384, + .maxStorageBufferRange = (1ul << 27), + .maxPushConstantsSize = 128, + .maxMemoryAllocationCount = 4096, + .maxSamplerAllocationCount = 4000, + .bufferImageGranularity = 131072, + .sparseAddressSpaceSize = 0, + .maxBoundDescriptorSets = MAX_SETS, + .maxPerStageDescriptorSamplers = 16, + .maxPerStageDescriptorUniformBuffers = 12, + .maxPerStageDescriptorStorageBuffers = 4, + .maxPerStageDescriptorSampledImages = 16, + .maxPerStageDescriptorStorageImages = 4, + .maxPerStageDescriptorInputAttachments = 4, + .maxPerStageResources = 128, + .maxDescriptorSetSamplers = 96, + .maxDescriptorSetUniformBuffers = 72, + .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, + .maxDescriptorSetStorageBuffers = 24, + .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, + .maxDescriptorSetSampledImages = 96, + .maxDescriptorSetStorageImages = 24, + .maxDescriptorSetInputAttachments = 4, + .maxVertexInputAttributes = 16, + .maxVertexInputBindings = 16, + .maxVertexInputAttributeOffset = 2047, + .maxVertexInputBindingStride = 2048, + .maxVertexOutputComponents = 64, + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, + .maxGeometryShaderInvocations = 0, + .maxGeometryInputComponents = 0, + .maxGeometryOutputComponents = 0, + .maxGeometryOutputVertices = 0, + .maxGeometryTotalOutputComponents = 0, + .maxFragmentInputComponents = 64, + .maxFragmentOutputAttachments = 4, + .maxFragmentDualSrcAttachments = 0, + .maxFragmentCombinedOutputResources = 4, + .maxComputeSharedMemorySize = 16384, + .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, + .maxComputeWorkGroupInvocations = 128, + .maxComputeWorkGroupSize = { 128, 128, 64 }, + .subPixelPrecisionBits = 4, + .subTexelPrecisionBits = 4, + .mipmapPrecisionBits = 4, + .maxDrawIndexedIndexValue = 0x00ffffff, + .maxDrawIndirectCount = 1, + .maxSamplerLodBias = 2.0f, + .maxSamplerAnisotropy = 1.0f, + .maxViewports = 1, + .maxViewportDimensions = { 4096, 4096 }, + .viewportBoundsRange = { -8192, 8191 }, + .viewportSubPixelBits = 0, + .minMemoryMapAlignment = 64, + .minTexelBufferOffsetAlignment = 256, + .minUniformBufferOffsetAlignment = 256, + .minStorageBufferOffsetAlignment = 256, + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = 0, + .maxTexelGatherOffset = 0, + .minInterpolationOffset = -0.5f, + .maxInterpolationOffset = 0.5f, + .subPixelInterpolationOffsetBits = 4, + .maxFramebufferWidth = 4096, + .maxFramebufferHeight = 4096, + .maxFramebufferLayers = 256, + .framebufferColorSampleCounts = supported_sample_counts, + .framebufferDepthSampleCounts = supported_sample_counts, + .framebufferStencilSampleCounts = supported_sample_counts, + .framebufferNoAttachmentsSampleCounts = supported_sample_counts, + .maxColorAttachments = 4, + .sampledImageColorSampleCounts = supported_sample_counts, + .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .sampledImageDepthSampleCounts = supported_sample_counts, + .sampledImageStencilSampleCounts = supported_sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = true, + .timestampPeriod = pdevice->timestamp_period, + .maxClipDistances = 8, + .maxCullDistances = 8, + .maxCombinedClipAndCullDistances = 8, + .discreteQueuePriorities = 2, + .pointSizeRange = { 1.0f, 1.0f }, + .lineWidthRange = { 1.0f, 1.0f }, + .pointSizeGranularity = 0.0f, + .lineWidthGranularity = 0.0f, + .strictLines = 0, + .standardSampleLocations = false, + .optimalBufferCopyOffsetAlignment = 1, + .optimalBufferCopyRowPitchAlignment = 1, + .nonCoherentAtomSize = 256, + }; + + const DXGI_ADAPTER_DESC1 *desc = &pdevice->adapter_desc; + + VkPhysicalDeviceType devtype = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; + if (desc->Flags == DXGI_ADAPTER_FLAG_SOFTWARE) + devtype = VK_PHYSICAL_DEVICE_TYPE_CPU; + else if (false) { // TODO: detect discreete GPUs + /* This is a tad tricky to get right, because we need to have the + * actual ID3D12Device before we can query the + * D3D12_FEATURE_DATA_ARCHITECTURE structure... So for now, let's + * just pretend everything is integrated, because... well, that's + * what I have at hand right now ;) + */ + devtype = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; + } + + pProperties->properties = (VkPhysicalDeviceProperties) { + .apiVersion = DZN_API_VERSION, + .driverVersion = vk_get_driver_version(), + + .vendorID = desc->VendorId, + .deviceID = desc->DeviceId, + .deviceType = devtype, + + .limits = limits, + .sparseProperties = { 0 }, + }; + + snprintf(pProperties->properties.deviceName, + sizeof(pProperties->properties.deviceName), + "Microsoft Direct3D12 (%S)", desc->Description); + + memcpy(pProperties->properties.pipelineCacheUUID, + pdevice->pipeline_cache_uuid, VK_UUID_SIZE); + + vk_foreach_struct(ext, pProperties->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { + VkPhysicalDeviceIDProperties *id_props = + (VkPhysicalDeviceIDProperties *)ext; + memcpy(id_props->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); + memcpy(id_props->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); + /* The LUID is for Windows. */ + id_props->deviceLUIDValid = false; + break; + } + default: + dzn_debug_ignored_stype(ext->sType); + break; + } + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, + uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties2 *pQueueFamilyProperties) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, + pQueueFamilyProperties, pQueueFamilyPropertyCount); + + (void)dzn_physical_device_get_d3d12_dev(pdev); + + for (uint32_t i = 0; i < pdev->queue_family_count; i++) { + vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) { + p->queueFamilyProperties = pdev->queue_families[i].props; + + vk_foreach_struct(ext, pQueueFamilyProperties->pNext) { + dzn_debug_ignored_stype(ext->sType); + } + } + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties *pMemoryProperties) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + + // Ensure memory caps are up-to-date + (void)dzn_physical_device_get_d3d12_dev(pdev); + *pMemoryProperties = pdev->memory; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) +{ + dzn_GetPhysicalDeviceMemoryProperties(physicalDevice, + &pMemoryProperties->memoryProperties); + + vk_foreach_struct(ext, pMemoryProperties->pNext) { + dzn_debug_ignored_stype(ext->sType); + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, + VkLayerProperties *pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); +} + +static VkResult +dzn_queue_sync_wait(struct dzn_queue *queue, const struct vk_sync_wait *wait) +{ + if (wait->sync->type == &vk_sync_dummy_type) + return VK_SUCCESS; + + struct dzn_device *device = container_of(queue->vk.base.device, struct dzn_device, vk); + assert(wait->sync->type == &dzn_sync_type); + struct dzn_sync *sync = container_of(wait->sync, struct dzn_sync, vk); + uint64_t value = + (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? wait->wait_value : 1; + + assert(sync->fence != NULL); + + if (value > 0 && FAILED(ID3D12CommandQueue_Wait(queue->cmdqueue, sync->fence, value))) + return vk_error(device, VK_ERROR_UNKNOWN); + + return VK_SUCCESS; +} + +static VkResult +dzn_queue_sync_signal(struct dzn_queue *queue, const struct vk_sync_signal *signal) +{ + if (signal->sync->type == &vk_sync_dummy_type) + return VK_SUCCESS; + + struct dzn_device *device = container_of(queue->vk.base.device, struct dzn_device, vk); + assert(signal->sync->type == &dzn_sync_type); + struct dzn_sync *sync = container_of(signal->sync, struct dzn_sync, vk); + uint64_t value = + (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? signal->signal_value : 1; + assert(value > 0); + + assert(sync->fence != NULL); + + if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, sync->fence, value))) + return vk_error(device, VK_ERROR_UNKNOWN); + + return VK_SUCCESS; +} + +static VkResult +dzn_queue_submit(struct vk_queue *q, + struct vk_queue_submit *info) +{ + struct dzn_queue *queue = container_of(q, struct dzn_queue, vk); + struct dzn_device *device = container_of(q->base.device, struct dzn_device, vk); + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0; i < info->wait_count; i++) { + result = dzn_queue_sync_wait(queue, &info->waits[i]); + if (result != VK_SUCCESS) + return result; + } + + for (uint32_t i = 0; i < info->command_buffer_count; i++) { + struct dzn_cmd_buffer *cmd_buffer = + container_of(info->command_buffers[i], struct dzn_cmd_buffer, vk); + + ID3D12CommandList *cmdlists[] = { (ID3D12CommandList *)cmd_buffer->cmdlist }; + + util_dynarray_foreach(&cmd_buffer->events.wait, struct dzn_event *, evt) { + if (FAILED(ID3D12CommandQueue_Wait(queue->cmdqueue, (*evt)->fence, 1))) + return vk_error(device, VK_ERROR_UNKNOWN); + } + + util_dynarray_foreach(&cmd_buffer->queries.wait, struct dzn_cmd_buffer_query_range, range) { + mtx_lock(&range->qpool->queries_lock); + for (uint32_t q = range->start; q < range->start + range->count; q++) { + struct dzn_query *query = &range->qpool->queries[q]; + + if (query->fence && + FAILED(ID3D12CommandQueue_Wait(queue->cmdqueue, query->fence, query->fence_value))) + return vk_error(device, VK_ERROR_UNKNOWN); + } + mtx_unlock(&range->qpool->queries_lock); + } + + util_dynarray_foreach(&cmd_buffer->queries.reset, struct dzn_cmd_buffer_query_range, range) { + mtx_lock(&range->qpool->queries_lock); + for (uint32_t q = range->start; q < range->start + range->count; q++) { + struct dzn_query *query = &range->qpool->queries[q]; + if (query->fence) { + ID3D12Fence_Release(query->fence); + query->fence = NULL; + } + query->fence_value = 0; + } + mtx_unlock(&range->qpool->queries_lock); + } + + ID3D12CommandQueue_ExecuteCommandLists(queue->cmdqueue, 1, cmdlists); + + util_dynarray_foreach(&cmd_buffer->events.signal, struct dzn_cmd_event_signal, evt) { + if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, evt->event->fence, evt->value ? 1 : 0))) + return vk_error(device, VK_ERROR_UNKNOWN); + } + + util_dynarray_foreach(&cmd_buffer->queries.signal, struct dzn_cmd_buffer_query_range, range) { + mtx_lock(&range->qpool->queries_lock); + for (uint32_t q = range->start; q < range->start + range->count; q++) { + struct dzn_query *query = &range->qpool->queries[q]; + query->fence_value = queue->fence_point + 1; + query->fence = queue->fence; + ID3D12Fence_AddRef(query->fence); + } + mtx_unlock(&range->qpool->queries_lock); + } + } + + for (uint32_t i = 0; i < info->signal_count; i++) { + result = dzn_queue_sync_signal(queue, &info->signals[i]); + if (result != VK_SUCCESS) + return vk_error(device, VK_ERROR_UNKNOWN); + } + + if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, queue->fence, ++queue->fence_point))) + return vk_error(device, VK_ERROR_UNKNOWN); + + return VK_SUCCESS; +} + +static void +dzn_queue_finish(struct dzn_queue *queue) +{ + if (queue->cmdqueue) + ID3D12CommandQueue_Release(queue->cmdqueue); + + if (queue->fence) + ID3D12Fence_Release(queue->fence); + + vk_queue_finish(&queue->vk); +} + +static VkResult +dzn_queue_init(struct dzn_queue *queue, + struct dzn_device *device, + const VkDeviceQueueCreateInfo *pCreateInfo, + uint32_t index_in_family) +{ + struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); + + VkResult result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family); + if (result != VK_SUCCESS) + return result; + + queue->vk.driver_submit = dzn_queue_submit; + + assert(pCreateInfo->queueFamilyIndex < pdev->queue_family_count); + + D3D12_COMMAND_QUEUE_DESC queue_desc = + pdev->queue_families[pCreateInfo->queueFamilyIndex].desc; + + queue_desc.Priority = + (INT)(pCreateInfo->pQueuePriorities[index_in_family] * (float)D3D12_COMMAND_QUEUE_PRIORITY_HIGH); + queue_desc.NodeMask = 0; + + if (FAILED(ID3D12Device1_CreateCommandQueue(device->dev, &queue_desc, + &IID_ID3D12CommandQueue, + &queue->cmdqueue))) { + dzn_queue_finish(queue); + return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED); + } + + if (FAILED(ID3D12Device1_CreateFence(device->dev, 0, D3D12_FENCE_FLAG_NONE, + &IID_ID3D12Fence, + &queue->fence))) { + dzn_queue_finish(queue); + return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED); + } + + return VK_SUCCESS; +} + +static VkResult +check_physical_device_features(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceFeatures *features) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + + VkPhysicalDeviceFeatures supported_features; + + pdev->vk.dispatch_table.GetPhysicalDeviceFeatures(physicalDevice, &supported_features); + + VkBool32 *supported_feature = (VkBool32 *)&supported_features; + VkBool32 *enabled_feature = (VkBool32 *)features; + unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); + for (uint32_t i = 0; i < num_features; i++) { + if (enabled_feature[i] && !supported_feature[i]) + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + return VK_SUCCESS; +} + +static VkResult +dzn_device_create_sync_for_memory(struct vk_device *device, + VkDeviceMemory memory, + bool signal_memory, + struct vk_sync **sync_out) +{ + return vk_sync_create(device, &vk_sync_dummy_type, + (enum vk_sync_flags)0, 1, sync_out); +} + +static void +dzn_device_ref_pipeline_layout(struct vk_device *dev, VkPipelineLayout layout) +{ + VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); + + dzn_pipeline_layout_ref(playout); +} + +static void +dzn_device_unref_pipeline_layout(struct vk_device *dev, VkPipelineLayout layout) +{ + VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); + + dzn_pipeline_layout_unref(playout); +} + +static VkResult +dzn_device_query_init(struct dzn_device *device) +{ + /* FIXME: create the resource in the default heap */ + D3D12_HEAP_PROPERTIES hprops; + ID3D12Device1_GetCustomHeapProperties(device->dev, &hprops, 0, D3D12_HEAP_TYPE_UPLOAD); + D3D12_RESOURCE_DESC rdesc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = DZN_QUERY_REFS_RES_SIZE, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE, + }; + + if (FAILED(ID3D12Device1_CreateCommittedResource(device->dev, &hprops, + D3D12_HEAP_FLAG_NONE, + &rdesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + NULL, + &IID_ID3D12Resource, + &device->queries.refs))) + return vk_error(device->vk.physical, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + uint8_t *queries_ref; + if (FAILED(ID3D12Resource_Map(device->queries.refs, 0, NULL, &queries_ref))) + return vk_error(device->vk.physical, VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(queries_ref + DZN_QUERY_REFS_ALL_ONES_OFFSET, 0xff, DZN_QUERY_REFS_SECTION_SIZE); + memset(queries_ref + DZN_QUERY_REFS_ALL_ZEROS_OFFSET, 0x0, DZN_QUERY_REFS_SECTION_SIZE); + ID3D12Resource_Unmap(device->queries.refs, 0, NULL); + + return VK_SUCCESS; +} + +static void +dzn_device_query_finish(struct dzn_device *device) +{ + if (device->queries.refs) + ID3D12Resource_Release(device->queries.refs); +} + +static void +dzn_device_destroy(struct dzn_device *device, const VkAllocationCallbacks *pAllocator) +{ + if (!device) + return; + + struct dzn_instance *instance = + container_of(device->vk.physical->instance, struct dzn_instance, vk); + + vk_foreach_queue_safe(q, &device->vk) { + struct dzn_queue *queue = container_of(q, struct dzn_queue, vk); + + dzn_queue_finish(queue); + } + + dzn_device_query_finish(device); + dzn_meta_finish(device); + + if (device->dev) + ID3D12Device1_Release(device->dev); + + vk_device_finish(&device->vk); + vk_free2(&instance->vk.alloc, pAllocator, device); +} + +static VkResult +dzn_device_create(struct dzn_physical_device *pdev, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *out) +{ + struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk); + + uint32_t queue_count = 0; + for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) { + const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf]; + queue_count += qinfo->queueCount; + } + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct dzn_device, device, 1); + VK_MULTIALLOC_DECL(&ma, struct dzn_queue, queues, queue_count); + + if (!vk_multialloc_zalloc2(&ma, &instance->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return vk_error(pdev, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct vk_device_dispatch_table dispatch_table; + + /* For secondary command buffer support, overwrite any command entrypoints + * in the main device-level dispatch table with + * vk_cmd_enqueue_unless_primary_Cmd*. + */ + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &vk_cmd_enqueue_unless_primary_device_entrypoints, true); + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &dzn_device_entrypoints, false); + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &wsi_device_entrypoints, false); + + /* Populate our primary cmd_dispatch table. */ + vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch, + &dzn_device_entrypoints, true); + vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch, + &vk_common_device_entrypoints, + false); + + VkResult result = + vk_device_init(&device->vk, &pdev->vk, &dispatch_table, pCreateInfo, pAllocator); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, pAllocator, device); + return result; + } + + /* Must be done after vk_device_init() because this function memset(0) the + * whole struct. + */ + device->vk.command_dispatch_table = &device->cmd_dispatch; + device->vk.ref_pipeline_layout = dzn_device_ref_pipeline_layout; + device->vk.unref_pipeline_layout = dzn_device_unref_pipeline_layout; + device->vk.create_sync_for_memory = dzn_device_create_sync_for_memory; + + device->dev = dzn_physical_device_get_d3d12_dev(pdev); + if (!device->dev) { + dzn_device_destroy(device, pAllocator); + return vk_error(pdev, VK_ERROR_INITIALIZATION_FAILED); + } + + ID3D12Device1_AddRef(device->dev); + + ID3D12InfoQueue *info_queue; + if (SUCCEEDED(ID3D12Device1_QueryInterface(device->dev, + &IID_ID3D12InfoQueue, + &info_queue))) { + D3D12_MESSAGE_SEVERITY severities[] = { + D3D12_MESSAGE_SEVERITY_INFO, + D3D12_MESSAGE_SEVERITY_WARNING, + }; + + D3D12_MESSAGE_ID msg_ids[] = { + D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, + }; + + D3D12_INFO_QUEUE_FILTER NewFilter = { 0 }; + NewFilter.DenyList.NumSeverities = ARRAY_SIZE(severities); + NewFilter.DenyList.pSeverityList = severities; + NewFilter.DenyList.NumIDs = ARRAY_SIZE(msg_ids); + NewFilter.DenyList.pIDList = msg_ids; + + ID3D12InfoQueue_PushStorageFilter(info_queue, &NewFilter); + } + + result = dzn_meta_init(device); + if (result != VK_SUCCESS) { + dzn_device_destroy(device, pAllocator); + return result; + } + + result = dzn_device_query_init(device); + if (result != VK_SUCCESS) { + dzn_device_destroy(device, pAllocator); + return result; + } + + uint32_t qindex = 0; + for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) { + const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf]; + + for (uint32_t q = 0; q < qinfo->queueCount; q++) { + result = + dzn_queue_init(&queues[qindex++], device, qinfo, q); + if (result != VK_SUCCESS) { + dzn_device_destroy(device, pAllocator); + return result; + } + } + } + + assert(queue_count == qindex); + *out = dzn_device_to_handle(device); + return VK_SUCCESS; +} + +ID3D12RootSignature * +dzn_device_create_root_sig(struct dzn_device *device, + const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc) +{ + struct dzn_instance *instance = + container_of(device->vk.physical->instance, struct dzn_instance, vk); + ID3D10Blob *sig, *error; + + if (FAILED(instance->d3d12.serialize_root_sig(desc, + &sig, &error))) { + if (instance->debug_flags & DZN_DEBUG_SIG) { + const char* error_msg = (const char*)ID3D10Blob_GetBufferPointer(error); + fprintf(stderr, + "== SERIALIZE ROOT SIG ERROR =============================================\n" + "%s\n" + "== END ==========================================================\n", + error_msg); + } + + ID3D10Blob_Release(error); + return NULL; + } + + ID3D12RootSignature *root_sig; + if (FAILED(ID3D12Device1_CreateRootSignature(device->dev, 0, + ID3D10Blob_GetBufferPointer(sig), + ID3D10Blob_GetBufferSize(sig), + &IID_ID3D12RootSignature, + &root_sig))) { + ID3D10Blob_Release(sig); + return NULL; + } + + ID3D10Blob_Release(sig); + return root_sig; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateDevice(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *pDevice) +{ + VK_FROM_HANDLE(dzn_physical_device, physical_device, physicalDevice); + struct dzn_instance *instance = + container_of(physical_device->vk.instance, struct dzn_instance, vk); + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + /* Check enabled features */ + if (pCreateInfo->pEnabledFeatures) { + result = check_physical_device_features(physicalDevice, + pCreateInfo->pEnabledFeatures); + if (result != VK_SUCCESS) + return vk_error(physical_device, result); + } + + /* Check requested queues and fail if we are requested to create any + * queues with flags we don't support. + */ + assert(pCreateInfo->queueCreateInfoCount > 0); + for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { + if (pCreateInfo->pQueueCreateInfos[i].flags != 0) + return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED); + } + + return dzn_device_create(physical_device, pCreateInfo, pAllocator, pDevice); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyDevice(VkDevice dev, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + + device->vk.dispatch_table.DeviceWaitIdle(dev); + + dzn_device_destroy(device, pAllocator); +} + +static void +dzn_device_memory_destroy(struct dzn_device_memory *mem, + const VkAllocationCallbacks *pAllocator) +{ + if (!mem) + return; + + struct dzn_device *device = container_of(mem->base.device, struct dzn_device, vk); + + if (mem->map) + ID3D12Resource_Unmap(mem->map_res, 0, NULL); + + if (mem->map_res) + ID3D12Resource_Release(mem->map_res); + + if (mem->heap) + ID3D12Heap_Release(mem->heap); + + vk_object_base_finish(&mem->base); + vk_free2(&device->vk.alloc, pAllocator, mem); +} + +static VkResult +dzn_device_memory_create(struct dzn_device *device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, + VkDeviceMemory *out) +{ + struct dzn_physical_device *pdevice = + container_of(device->vk.physical, struct dzn_physical_device, vk); + + struct dzn_device_memory *mem = (struct dzn_device_memory *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!mem) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY); + + /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */ + assert(pAllocateInfo->allocationSize > 0); + + mem->size = pAllocateInfo->allocationSize; + +#if 0 + const VkExportMemoryAllocateInfo *export_info = NULL; + VkMemoryAllocateFlags vk_flags = 0; +#endif + + vk_foreach_struct_const(ext, pAllocateInfo->pNext) { + dzn_debug_ignored_stype(ext->sType); + } + + const VkMemoryType *mem_type = + &pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex]; + + D3D12_HEAP_DESC heap_desc = { 0 }; + // TODO: fix all of these: + heap_desc.SizeInBytes = pAllocateInfo->allocationSize; + heap_desc.Alignment = + heap_desc.SizeInBytes >= D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT ? + D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heap_desc.Flags = + dzn_physical_device_get_heap_flags_for_mem_type(pdevice, + pAllocateInfo->memoryTypeIndex); + + /* TODO: Unsure about this logic??? */ + mem->initial_state = D3D12_RESOURCE_STATE_COMMON; + heap_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; + heap_desc.Properties.MemoryPoolPreference = + ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && + !pdevice->architecture.UMA) ? + D3D12_MEMORY_POOL_L1 : D3D12_MEMORY_POOL_L0; + if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { + heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; + } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; + } else { + heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE; + } + + if (FAILED(ID3D12Device1_CreateHeap(device->dev, &heap_desc, + &IID_ID3D12Heap, + &mem->heap))) { + dzn_device_memory_destroy(mem, pAllocator); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + if ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && + !(heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS)){ + D3D12_RESOURCE_DESC res_desc = { 0 }; + res_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + res_desc.Format = DXGI_FORMAT_UNKNOWN; + res_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + res_desc.Width = heap_desc.SizeInBytes; + res_desc.Height = 1; + res_desc.DepthOrArraySize = 1; + res_desc.MipLevels = 1; + res_desc.SampleDesc.Count = 1; + res_desc.SampleDesc.Quality = 0; + res_desc.Flags = D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + res_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + HRESULT hr = ID3D12Device1_CreatePlacedResource(device->dev, mem->heap, 0, &res_desc, + mem->initial_state, + NULL, + &IID_ID3D12Resource, + &mem->map_res); + if (FAILED(hr)) { + dzn_device_memory_destroy(mem, pAllocator); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + } + + *out = dzn_device_memory_to_handle(mem); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_AllocateMemory(VkDevice device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, + VkDeviceMemory *pMem) +{ + return dzn_device_memory_create(dzn_device_from_handle(device), + pAllocateInfo, pAllocator, pMem); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_FreeMemory(VkDevice device, + VkDeviceMemory mem, + const VkAllocationCallbacks *pAllocator) +{ + dzn_device_memory_destroy(dzn_device_memory_from_handle(mem), pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_MapMemory(VkDevice _device, + VkDeviceMemory _memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void **ppData) +{ + VK_FROM_HANDLE(dzn_device, device, _device); + VK_FROM_HANDLE(dzn_device_memory, mem, _memory); + + if (mem == NULL) { + *ppData = NULL; + return VK_SUCCESS; + } + + if (size == VK_WHOLE_SIZE) + size = mem->size - offset; + + /* From the Vulkan spec version 1.0.32 docs for MapMemory: + * + * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0 + * assert(size != 0); + * * If size is not equal to VK_WHOLE_SIZE, size must be less than or + * equal to the size of the memory minus offset + */ + assert(size > 0); + assert(offset + size <= mem->size); + + assert(mem->map_res); + D3D12_RANGE range = { 0 }; + range.Begin = offset; + range.End = offset + size; + void *map = NULL; + if (FAILED(ID3D12Resource_Map(mem->map_res, 0, &range, &map))) + return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED); + + mem->map = map; + mem->map_size = size; + + *ppData = ((uint8_t*) map) + offset; + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_UnmapMemory(VkDevice _device, + VkDeviceMemory _memory) +{ + VK_FROM_HANDLE(dzn_device, device, _device); + VK_FROM_HANDLE(dzn_device_memory, mem, _memory); + + if (mem == NULL) + return; + + assert(mem->map_res); + ID3D12Resource_Unmap(mem->map_res, 0, NULL); + + mem->map = NULL; + mem->map_size = 0; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_FlushMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_InvalidateMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +static void +dzn_buffer_destroy(struct dzn_buffer *buf, const VkAllocationCallbacks *pAllocator) +{ + if (!buf) + return; + + struct dzn_device *device = container_of(buf->base.device, struct dzn_device, vk); + + if (buf->res) + ID3D12Resource_Release(buf->res); + + vk_object_base_finish(&buf->base); + vk_free2(&device->vk.alloc, pAllocator, buf); +} + +static VkResult +dzn_buffer_create(struct dzn_device *device, + const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBuffer *out) +{ + struct dzn_buffer *buf = (struct dzn_buffer *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*buf), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!buf) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &buf->base, VK_OBJECT_TYPE_BUFFER); + buf->create_flags = pCreateInfo->flags; + buf->size = pCreateInfo->size; + buf->usage = pCreateInfo->usage; + + if (buf->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) + buf->size = ALIGN_POT(buf->size, 256); + + buf->desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + buf->desc.Format = DXGI_FORMAT_UNKNOWN; + buf->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + buf->desc.Width = buf->size; + buf->desc.Height = 1; + buf->desc.DepthOrArraySize = 1; + buf->desc.MipLevels = 1; + buf->desc.SampleDesc.Count = 1; + buf->desc.SampleDesc.Quality = 0; + buf->desc.Flags = D3D12_RESOURCE_FLAG_NONE; + buf->desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + if (buf->usage & + (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) + buf->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + *out = dzn_buffer_to_handle(buf); + return VK_SUCCESS; +} + +DXGI_FORMAT +dzn_buffer_get_dxgi_format(VkFormat format) +{ + enum pipe_format pfmt = vk_format_to_pipe_format(format); + + return dzn_pipe_to_dxgi_format(pfmt); +} + +D3D12_TEXTURE_COPY_LOCATION +dzn_buffer_get_copy_loc(const struct dzn_buffer *buf, + VkFormat format, + const VkBufferImageCopy2KHR *region, + VkImageAspectFlagBits aspect, + uint32_t layer) +{ + const uint32_t buffer_row_length = + region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width; + const uint32_t buffer_image_height = + region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height; + + VkFormat plane_format = dzn_image_get_plane_format(format, aspect); + + enum pipe_format pfmt = vk_format_to_pipe_format(plane_format); + uint32_t blksz = util_format_get_blocksize(pfmt); + uint32_t blkw = util_format_get_blockwidth(pfmt); + uint32_t blkh = util_format_get_blockheight(pfmt); + + D3D12_TEXTURE_COPY_LOCATION loc = { + .pResource = buf->res, + .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + .PlacedFootprint = { + .Footprint = { + .Format = + dzn_image_get_placed_footprint_format(format, aspect), + .Width = region->imageExtent.width, + .Height = region->imageExtent.height, + .Depth = region->imageExtent.depth, + .RowPitch = blksz * DIV_ROUND_UP(buffer_row_length, blkw), + }, + }, + }; + + uint32_t buffer_layer_stride = + loc.PlacedFootprint.Footprint.RowPitch * + DIV_ROUND_UP(loc.PlacedFootprint.Footprint.Height, blkh); + + loc.PlacedFootprint.Offset = + region->bufferOffset + (layer * buffer_layer_stride); + + return loc; +} + +D3D12_TEXTURE_COPY_LOCATION +dzn_buffer_get_line_copy_loc(const struct dzn_buffer *buf, VkFormat format, + const VkBufferImageCopy2KHR *region, + const D3D12_TEXTURE_COPY_LOCATION *loc, + uint32_t y, uint32_t z, uint32_t *start_x) +{ + uint32_t buffer_row_length = + region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width; + uint32_t buffer_image_height = + region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height; + + format = dzn_image_get_plane_format(format, region->imageSubresource.aspectMask); + + enum pipe_format pfmt = vk_format_to_pipe_format(format); + uint32_t blksz = util_format_get_blocksize(pfmt); + uint32_t blkw = util_format_get_blockwidth(pfmt); + uint32_t blkh = util_format_get_blockheight(pfmt); + uint32_t blkd = util_format_get_blockdepth(pfmt); + D3D12_TEXTURE_COPY_LOCATION new_loc = *loc; + uint32_t buffer_row_stride = + DIV_ROUND_UP(buffer_row_length, blkw) * blksz; + uint32_t buffer_layer_stride = + buffer_row_stride * + DIV_ROUND_UP(buffer_image_height, blkh); + + uint64_t tex_offset = + ((y / blkh) * buffer_row_stride) + + ((z / blkd) * buffer_layer_stride); + uint64_t offset = loc->PlacedFootprint.Offset + tex_offset; + uint32_t offset_alignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; + + while (offset_alignment % blksz) + offset_alignment += D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; + + new_loc.PlacedFootprint.Footprint.Height = blkh; + new_loc.PlacedFootprint.Footprint.Depth = 1; + new_loc.PlacedFootprint.Offset = (offset / offset_alignment) * offset_alignment; + *start_x = ((offset % offset_alignment) / blksz) * blkw; + new_loc.PlacedFootprint.Footprint.Width = *start_x + region->imageExtent.width; + new_loc.PlacedFootprint.Footprint.RowPitch = + ALIGN_POT(DIV_ROUND_UP(new_loc.PlacedFootprint.Footprint.Width, blkw) * blksz, + D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + return new_loc; +} + +bool +dzn_buffer_supports_region_copy(const D3D12_TEXTURE_COPY_LOCATION *loc) +{ + return !(loc->PlacedFootprint.Offset & (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1)) && + !(loc->PlacedFootprint.Footprint.RowPitch & (D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateBuffer(VkDevice device, + const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBuffer *pBuffer) +{ + return dzn_buffer_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pBuffer); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyBuffer(VkDevice device, + VkBuffer buffer, + const VkAllocationCallbacks *pAllocator) +{ + dzn_buffer_destroy(dzn_buffer_from_handle(buffer), pAllocator); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetBufferMemoryRequirements2(VkDevice dev, + const VkBufferMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VK_FROM_HANDLE(dzn_buffer, buffer, pInfo->buffer); + struct dzn_physical_device *pdev = + container_of(device->vk.physical, struct dzn_physical_device, vk); + + /* uh, this is grossly over-estimating things */ + uint32_t alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + VkDeviceSize size = buffer->size; + + if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { + alignment = MAX2(alignment, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + size = ALIGN_POT(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + } + + pMemoryRequirements->memoryRequirements.size = size; + pMemoryRequirements->memoryRequirements.alignment = alignment; + pMemoryRequirements->memoryRequirements.memoryTypeBits = + dzn_physical_device_get_mem_type_mask_for_resource(pdev, &buffer->desc); + + vk_foreach_struct(ext, pMemoryRequirements->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { + VkMemoryDedicatedRequirements *requirements = + (VkMemoryDedicatedRequirements *)ext; + /* TODO: figure out dedicated allocations */ + requirements->prefersDedicatedAllocation = false; + requirements->requiresDedicatedAllocation = false; + break; + } + + default: + dzn_debug_ignored_stype(ext->sType); + break; + } + } + +#if 0 + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo( + UINT visibleMask, + UINT numResourceDescs, + const D3D12_RESOURCE_DESC *pResourceDescs); +#endif +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_BindBufferMemory2(VkDevice _device, + uint32_t bindInfoCount, + const VkBindBufferMemoryInfo *pBindInfos) +{ + VK_FROM_HANDLE(dzn_device, device, _device); + + for (uint32_t i = 0; i < bindInfoCount; i++) { + assert(pBindInfos[i].sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO); + + VK_FROM_HANDLE(dzn_device_memory, mem, pBindInfos[i].memory); + VK_FROM_HANDLE(dzn_buffer, buffer, pBindInfos[i].buffer); + + if (FAILED(ID3D12Device1_CreatePlacedResource(device->dev, mem->heap, + pBindInfos[i].memoryOffset, + &buffer->desc, + mem->initial_state, + NULL, + &IID_ID3D12Resource, + &buffer->res))) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + return VK_SUCCESS; +} + +static VkResult +dzn_framebuffer_create(struct dzn_device *device, + const VkFramebufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFramebuffer *out) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct dzn_framebuffer, framebuffer, 1); + VK_MULTIALLOC_DECL(&ma, struct dzn_image_view *, attachments, pCreateInfo->attachmentCount); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + framebuffer->attachments = attachments; + framebuffer->attachment_count = pCreateInfo->attachmentCount; + for (uint32_t i = 0; i < framebuffer->attachment_count; i++) { + VK_FROM_HANDLE(dzn_image_view, iview, pCreateInfo->pAttachments[i]); + framebuffer->attachments[i] = iview; + } + + vk_object_base_init(&device->vk, &framebuffer->base, VK_OBJECT_TYPE_FRAMEBUFFER); + *out = dzn_framebuffer_to_handle(framebuffer); + return VK_SUCCESS; +} + +static void +dzn_framebuffer_destroy(struct dzn_framebuffer *framebuffer, + const VkAllocationCallbacks *pAllocator) +{ + if (!framebuffer) + return; + + struct dzn_device *device = + container_of(framebuffer->base.device, struct dzn_device, vk); + + vk_object_base_finish(&framebuffer->base); + vk_free2(&device->vk.alloc, pAllocator, framebuffer); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateFramebuffer(VkDevice device, + const VkFramebufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFramebuffer *pFramebuffer) +{ + return dzn_framebuffer_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pFramebuffer); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyFramebuffer(VkDevice device, + VkFramebuffer fb, + const VkAllocationCallbacks *pAllocator) +{ + dzn_framebuffer_destroy(dzn_framebuffer_from_handle(fb), pAllocator); +} + +static void +dzn_event_destroy(struct dzn_event *event, + const VkAllocationCallbacks *pAllocator) +{ + if (!event) + return; + + struct dzn_device *device = + container_of(event->base.device, struct dzn_device, vk); + + if (event->fence) + ID3D12Fence_Release(event->fence); + + vk_object_base_finish(&event->base); + vk_free2(&device->vk.alloc, pAllocator, event); +} + +static VkResult +dzn_event_create(struct dzn_device *device, + const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkEvent *out) +{ + struct dzn_event *event = (struct dzn_event *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!event) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT); + + if (FAILED(ID3D12Device1_CreateFence(device->dev, 0, D3D12_FENCE_FLAG_NONE, + &IID_ID3D12Fence, + &event->fence))) { + dzn_event_destroy(event, pAllocator); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + *out = dzn_event_to_handle(event); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateEvent(VkDevice device, + const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkEvent *pEvent) +{ + return dzn_event_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pEvent); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyEvent(VkDevice device, + VkEvent event, + const VkAllocationCallbacks *pAllocator) +{ + dzn_event_destroy(dzn_event_from_handle(event), pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_ResetEvent(VkDevice dev, + VkEvent evt) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VK_FROM_HANDLE(dzn_event, event, evt); + + if (FAILED(ID3D12Fence_Signal(event->fence, 0))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_SetEvent(VkDevice dev, + VkEvent evt) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VK_FROM_HANDLE(dzn_event, event, evt); + + if (FAILED(ID3D12Fence_Signal(event->fence, 1))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_GetEventStatus(VkDevice device, + VkEvent evt) +{ + VK_FROM_HANDLE(dzn_event, event, evt); + + return ID3D12Fence_GetCompletedValue(event->fence) == 0 ? + VK_EVENT_RESET : VK_EVENT_SET; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetDeviceMemoryCommitment(VkDevice device, + VkDeviceMemory memory, + VkDeviceSize *pCommittedMemoryInBytes) +{ + VK_FROM_HANDLE(dzn_device_memory, mem, memory); + + // TODO: find if there's a way to query/track actual heap residency + *pCommittedMemoryInBytes = mem->size; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_QueueBindSparse(VkQueue queue, + uint32_t bindInfoCount, + const VkBindSparseInfo *pBindInfo, + VkFence fence) +{ + // FIXME: add proper implem + dzn_stub(); + return VK_SUCCESS; +} + +static D3D12_TEXTURE_ADDRESS_MODE +dzn_sampler_translate_addr_mode(VkSamplerAddressMode in) +{ + switch (in) { + case VK_SAMPLER_ADDRESS_MODE_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP; + case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER; + default: unreachable("Invalid address mode"); + } +} + +static void +dzn_sampler_destroy(struct dzn_sampler *sampler, + const VkAllocationCallbacks *pAllocator) +{ + if (!sampler) + return; + + struct dzn_device *device = + container_of(sampler->base.device, struct dzn_device, vk); + + vk_object_base_finish(&sampler->base); + vk_free2(&device->vk.alloc, pAllocator, sampler); +} + +static VkResult +dzn_sampler_create(struct dzn_device *device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSampler *out) +{ + struct dzn_sampler *sampler = (struct dzn_sampler *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER); + + const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = (const VkSamplerCustomBorderColorCreateInfoEXT *) + vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); + + /* TODO: have a sampler pool to allocate shader-invisible descs which we + * can copy to the desc_set when UpdateDescriptorSets() is called. + */ + sampler->desc.Filter = dzn_translate_sampler_filter(pCreateInfo); + sampler->desc.AddressU = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeU); + sampler->desc.AddressV = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeV); + sampler->desc.AddressW = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeW); + sampler->desc.MipLODBias = pCreateInfo->mipLodBias; + sampler->desc.MaxAnisotropy = pCreateInfo->maxAnisotropy; + sampler->desc.MinLOD = pCreateInfo->minLod; + sampler->desc.MaxLOD = pCreateInfo->maxLod; + + if (pCreateInfo->compareEnable) + sampler->desc.ComparisonFunc = dzn_translate_compare_op(pCreateInfo->compareOp); + + bool reads_border_color = + pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || + pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || + pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + + if (reads_border_color) { + switch (pCreateInfo->borderColor) { + case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + sampler->desc.BorderColor[0] = 0.0f; + sampler->desc.BorderColor[1] = 0.0f; + sampler->desc.BorderColor[2] = 0.0f; + sampler->desc.BorderColor[3] = + pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ? 0.0f : 1.0f; + sampler->static_border_color = + pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ? + D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK : + D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK; + break; + case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: + sampler->desc.BorderColor[0] = sampler->desc.BorderColor[1] = 1.0f; + sampler->desc.BorderColor[2] = sampler->desc.BorderColor[3] = 1.0f; + sampler->static_border_color = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE; + break; + case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: + sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1; + for (unsigned i = 0; i < ARRAY_SIZE(sampler->desc.BorderColor); i++) + sampler->desc.BorderColor[i] = pBorderColor->customBorderColor.float32[i]; + break; + case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_INT_OPAQUE_BLACK: + case VK_BORDER_COLOR_INT_OPAQUE_WHITE: + case VK_BORDER_COLOR_INT_CUSTOM_EXT: + /* FIXME: sampling from integer textures is not supported yet. */ + sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1; + break; + default: + unreachable("Unsupported border color"); + } + } + + *out = dzn_sampler_to_handle(sampler); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateSampler(VkDevice device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSampler *pSampler) +{ + return dzn_sampler_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pSampler); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroySampler(VkDevice device, + VkSampler sampler, + const VkAllocationCallbacks *pAllocator) +{ + dzn_sampler_destroy(dzn_sampler_from_handle(sampler), pAllocator); +} diff --git a/src/microsoft/vulkan/dzn_device.cpp b/src/microsoft/vulkan/dzn_device.cpp deleted file mode 100644 index f18413dcf0e..00000000000 --- a/src/microsoft/vulkan/dzn_device.cpp +++ /dev/null @@ -1,2649 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" - -#include "vk_alloc.h" -#include "vk_common_entrypoints.h" -#include "vk_cmd_enqueue_entrypoints.h" -#include "vk_debug_report.h" -#include "vk_format.h" -#include "vk_sync_dummy.h" -#include "vk_util.h" - -#include "util/debug.h" -#include "util/macros.h" - -#include "glsl_types.h" - -#include "dxil_validator.h" - -#include -#include -#include - -#include - -#define CINTERFACE -#include -#undef CINTERFACE - -#if defined(VK_USE_PLATFORM_WIN32_KHR) || \ - defined(VK_USE_PLATFORM_DISPLAY_KHR) -#define DZN_USE_WSI_PLATFORM -#endif - -#define DZN_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION) - -#define MAX_TIER2_MEMORY_TYPES 3 - -static const struct vk_instance_extension_table instance_extensions = { - .KHR_get_physical_device_properties2 = true, -#ifdef DZN_USE_WSI_PLATFORM - .KHR_surface = true, -#endif -#ifdef VK_USE_PLATFORM_WIN32_KHR - .KHR_win32_surface = true, -#endif -#ifdef VK_USE_PLATFORM_DISPLAY_KHR - .KHR_display = true, - .KHR_get_display_properties2 = true, - .EXT_direct_mode_display = true, - .EXT_display_surface_counter = true, -#endif - .EXT_debug_report = true, - .EXT_debug_utils = true, -}; - -static void -dzn_physical_device_get_extensions(struct dzn_physical_device *pdev) -{ - pdev->vk.supported_extensions = vk_device_extension_table { -#ifdef DZN_USE_WSI_PLATFORM - .KHR_swapchain = true, -#endif - }; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_EnumerateInstanceExtensionProperties(const char *pLayerName, - uint32_t *pPropertyCount, - VkExtensionProperties *pProperties) -{ - /* We don't support any layers */ - if (pLayerName) - return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); - - return vk_enumerate_instance_extension_properties( - &instance_extensions, pPropertyCount, pProperties); -} - -static const struct debug_control dzn_debug_options[] = { - { "sync", DZN_DEBUG_SYNC }, - { "nir", DZN_DEBUG_NIR }, - { "dxil", DZN_DEBUG_DXIL }, - { "warp", DZN_DEBUG_WARP }, - { "internal", DZN_DEBUG_INTERNAL }, - { "signature", DZN_DEBUG_SIG }, - { "gbv", DZN_DEBUG_GBV }, - { "d3d12", DZN_DEBUG_D3D12 }, - { NULL, 0 } -}; - -static void -dzn_physical_device_destroy(struct dzn_physical_device *pdev) -{ - struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk); - - list_del(&pdev->link); - - if (pdev->dev) - ID3D12Device1_Release(pdev->dev); - - if (pdev->adapter) - IDXGIAdapter1_Release(pdev->adapter); - - dzn_wsi_finish(pdev); - vk_physical_device_finish(&pdev->vk); - vk_free(&instance->vk.alloc, pdev); -} - -static void -dzn_instance_destroy(struct dzn_instance *instance, const VkAllocationCallbacks *alloc) -{ - if (!instance) - return; - - if (instance->dxil_validator) - dxil_destroy_validator(instance->dxil_validator); - - list_for_each_entry_safe(struct dzn_physical_device, pdev, - &instance->physical_devices, link) { - dzn_physical_device_destroy(pdev); - } - - vk_instance_finish(&instance->vk); - vk_free2(vk_default_allocator(), alloc, instance); -} - -static VkResult -dzn_instance_create(const VkInstanceCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkInstance *out) -{ - struct dzn_instance *instance = (struct dzn_instance *) - vk_zalloc2(vk_default_allocator(), pAllocator, sizeof(*instance), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!instance) - return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - - struct vk_instance_dispatch_table dispatch_table; - vk_instance_dispatch_table_from_entrypoints(&dispatch_table, - &dzn_instance_entrypoints, - true); - - VkResult result = - vk_instance_init(&instance->vk, &instance_extensions, - &dispatch_table, pCreateInfo, - pAllocator ? pAllocator : vk_default_allocator()); - if (result != VK_SUCCESS) { - vk_free2(vk_default_allocator(), pAllocator, instance); - return result; - } - - list_inithead(&instance->physical_devices); - instance->physical_devices_enumerated = false; - instance->debug_flags = - parse_debug_string(getenv("DZN_DEBUG"), dzn_debug_options); - - instance->dxil_validator = dxil_create_validator(NULL); - instance->d3d12.serialize_root_sig = d3d12_get_serialize_root_sig(); - - if (!instance->dxil_validator || - !instance->d3d12.serialize_root_sig) { - dzn_instance_destroy(instance, pAllocator); - return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); - } - - if (instance->debug_flags & DZN_DEBUG_D3D12) - d3d12_enable_debug_layer(); - if (instance->debug_flags & DZN_DEBUG_GBV) - d3d12_enable_gpu_validation(); - - *out = dzn_instance_to_handle(instance); - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkInstance *pInstance) -{ - return dzn_instance_create(pCreateInfo, pAllocator, pInstance); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyInstance(VkInstance instance, - const VkAllocationCallbacks *pAllocator) -{ - dzn_instance_destroy(dzn_instance_from_handle(instance), pAllocator); -} - -static VkResult -dzn_physical_device_create(struct dzn_instance *instance, - IDXGIAdapter1 *adapter, - const DXGI_ADAPTER_DESC1 *adapter_desc) -{ - struct dzn_physical_device *pdev = (struct dzn_physical_device *) - vk_zalloc(&instance->vk.alloc, sizeof(*pdev), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - - if (!pdev) - return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - struct vk_physical_device_dispatch_table dispatch_table; - vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, - &dzn_physical_device_entrypoints, - true); - vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, - &wsi_physical_device_entrypoints, - false); - - VkResult result = - vk_physical_device_init(&pdev->vk, &instance->vk, - NULL, /* We set up extensions later */ - &dispatch_table); - if (result != VK_SUCCESS) { - vk_free(&instance->vk.alloc, pdev); - return result; - } - - mtx_init(&pdev->dev_lock, mtx_plain); - pdev->adapter_desc = *adapter_desc; - pdev->adapter = adapter; - IDXGIAdapter1_AddRef(adapter); - list_addtail(&pdev->link, &instance->physical_devices); - - vk_warn_non_conformant_implementation("dzn"); - - /* TODO: correct UUIDs */ - memset(pdev->pipeline_cache_uuid, 0, VK_UUID_SIZE); - memset(pdev->driver_uuid, 0, VK_UUID_SIZE); - memset(pdev->device_uuid, 0, VK_UUID_SIZE); - - /* TODO: something something queue families */ - - result = dzn_wsi_init(pdev); - if (result != VK_SUCCESS) { - dzn_physical_device_destroy(pdev); - return result; - } - - dzn_physical_device_get_extensions(pdev); - - uint32_t num_sync_types = 0; - pdev->sync_types[num_sync_types++] = &dzn_sync_type; - pdev->sync_types[num_sync_types++] = &vk_sync_dummy_type; - pdev->sync_types[num_sync_types] = NULL; - assert(num_sync_types <= MAX_SYNC_TYPES); - pdev->vk.supported_sync_types = pdev->sync_types; - - return VK_SUCCESS; -} - -static void -dzn_physical_device_cache_caps(struct dzn_physical_device *pdev) -{ - D3D_FEATURE_LEVEL checklist[] = { - D3D_FEATURE_LEVEL_11_0, - D3D_FEATURE_LEVEL_11_1, - D3D_FEATURE_LEVEL_12_0, - D3D_FEATURE_LEVEL_12_1, - D3D_FEATURE_LEVEL_12_2, - }; - - D3D12_FEATURE_DATA_FEATURE_LEVELS levels = { - .NumFeatureLevels = ARRAY_SIZE(checklist), - .pFeatureLevelsRequested = checklist, - }; - - ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_FEATURE_LEVELS, &levels, sizeof(levels)); - pdev->feature_level = levels.MaxSupportedFeatureLevel; - - ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_ARCHITECTURE1, &pdev->architecture, sizeof(pdev->architecture)); - ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS, &pdev->options, sizeof(pdev->options)); - - pdev->queue_families[pdev->queue_family_count++] = { - .props = { - .queueFlags = VK_QUEUE_GRAPHICS_BIT | - VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT, - .queueCount = 1, - .timestampValidBits = 64, - .minImageTransferGranularity = { 0, 0, 0 }, - }, - .desc = { - .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, - }, - }; - - pdev->queue_families[pdev->queue_family_count++] = { - .props = { - .queueFlags = VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT, - .queueCount = 8, - .timestampValidBits = 64, - .minImageTransferGranularity = { 0, 0, 0 }, - }, - .desc = { - .Type = D3D12_COMMAND_LIST_TYPE_COMPUTE, - }, - }; - - pdev->queue_families[pdev->queue_family_count++] = { - .props = { - .queueFlags = VK_QUEUE_TRANSFER_BIT, - .queueCount = 1, - .timestampValidBits = 0, - .minImageTransferGranularity = { 0, 0, 0 }, - }, - .desc = { - .Type = D3D12_COMMAND_LIST_TYPE_COPY, - }, - }; - - assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families)); - - D3D12_COMMAND_QUEUE_DESC queue_desc = { - .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, - .Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, - .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, - .NodeMask = 0, - }; - - ID3D12CommandQueue *cmdqueue; - ID3D12Device1_CreateCommandQueue(pdev->dev, &queue_desc, - IID_ID3D12CommandQueue, - (void **)&cmdqueue); - - uint64_t ts_freq; - ID3D12CommandQueue_GetTimestampFrequency(cmdqueue, &ts_freq); - pdev->timestamp_period = 1000000000.0f / ts_freq; - ID3D12CommandQueue_Release(cmdqueue); -} - -static void -dzn_physical_device_init_memory(struct dzn_physical_device *pdev) -{ - VkPhysicalDeviceMemoryProperties *mem = &pdev->memory; - const DXGI_ADAPTER_DESC1 *desc = &pdev->adapter_desc; - - mem->memoryHeapCount = 1; - mem->memoryHeaps[0] = VkMemoryHeap { - .size = desc->SharedSystemMemory, - .flags = 0, - }; - - mem->memoryTypes[mem->memoryTypeCount++] = VkMemoryType { - .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - .heapIndex = 0, - }; - mem->memoryTypes[mem->memoryTypeCount++] = VkMemoryType { - .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - .heapIndex = 0, - }; - - if (!pdev->architecture.UMA) { - mem->memoryHeaps[mem->memoryHeapCount++] = VkMemoryHeap { - .size = desc->DedicatedVideoMemory, - .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - }; - mem->memoryTypes[mem->memoryTypeCount++] = VkMemoryType { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - .heapIndex = mem->memoryHeapCount - 1, - }; - } else { - mem->memoryHeaps[0].flags |= VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; - mem->memoryTypes[0].propertyFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - mem->memoryTypes[1].propertyFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - } - - assert(mem->memoryTypeCount <= MAX_TIER2_MEMORY_TYPES); - - if (pdev->options.ResourceHeapTier == D3D12_RESOURCE_HEAP_TIER_1) { - unsigned oldMemoryTypeCount = mem->memoryTypeCount; - VkMemoryType oldMemoryTypes[MAX_TIER2_MEMORY_TYPES]; - - memcpy(oldMemoryTypes, mem->memoryTypes, oldMemoryTypeCount * sizeof(VkMemoryType)); - - mem->memoryTypeCount = 0; - for (unsigned oldMemoryTypeIdx = 0; oldMemoryTypeIdx < oldMemoryTypeCount; ++oldMemoryTypeIdx) { - D3D12_HEAP_FLAGS flags[] = { - D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS, - D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES, - /* Note: Vulkan requires *all* images to come from the same memory type as long as - * the tiling property (and a few other misc properties) are the same. So, this - * non-RT/DS texture flag will only be used for TILING_LINEAR textures, which - * can't be render targets. - */ - D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES - }; - for (int i = 0; i < ARRAY_SIZE(flags); ++i) { - D3D12_HEAP_FLAGS flag = flags[i]; - pdev->heap_flags_for_mem_type[mem->memoryTypeCount] = flag; - mem->memoryTypes[mem->memoryTypeCount] = oldMemoryTypes[oldMemoryTypeIdx]; - mem->memoryTypeCount++; - } - } - } -} - -static D3D12_HEAP_FLAGS -dzn_physical_device_get_heap_flags_for_mem_type(const struct dzn_physical_device *pdev, - uint32_t mem_type) -{ - return pdev->heap_flags_for_mem_type[mem_type]; -} - -uint32_t -dzn_physical_device_get_mem_type_mask_for_resource(const struct dzn_physical_device *pdev, - const D3D12_RESOURCE_DESC *desc) -{ - if (pdev->options.ResourceHeapTier > D3D12_RESOURCE_HEAP_TIER_1) - return (1u << pdev->memory.memoryTypeCount) - 1; - - D3D12_HEAP_FLAGS deny_flag; - if (desc->Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) - deny_flag = D3D12_HEAP_FLAG_DENY_BUFFERS; - else if (desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) - deny_flag = D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES; - else - deny_flag = D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; - - uint32_t mask = 0; - for (unsigned i = 0; i < pdev->memory.memoryTypeCount; ++i) { - if ((pdev->heap_flags_for_mem_type[i] & deny_flag) == D3D12_HEAP_FLAG_NONE) - mask |= (1 << i); - } - return mask; -} - -static uint32_t -dzn_physical_device_get_max_mip_level(bool is_3d) -{ - return is_3d ? 11 : 14; -} - -static uint32_t -dzn_physical_device_get_max_extent(bool is_3d) -{ - uint32_t max_mip = dzn_physical_device_get_max_mip_level(is_3d); - - return 1 << max_mip; -} - -static uint32_t -dzn_physical_device_get_max_array_layers() -{ - return dzn_physical_device_get_max_extent(false); -} - -static ID3D12Device1 * -dzn_physical_device_get_d3d12_dev(struct dzn_physical_device *pdev) -{ - struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk); - - mtx_lock(&pdev->dev_lock); - if (!pdev->dev) { - pdev->dev = d3d12_create_device(pdev->adapter, !instance->dxil_validator); - - dzn_physical_device_cache_caps(pdev); - dzn_physical_device_init_memory(pdev); - } - mtx_unlock(&pdev->dev_lock); - - return pdev->dev; -} - -D3D12_FEATURE_DATA_FORMAT_SUPPORT -dzn_physical_device_get_format_support(struct dzn_physical_device *pdev, - VkFormat format) -{ - VkImageUsageFlags usage = - vk_format_is_depth_or_stencil(format) ? - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : 0; - VkImageAspectFlags aspects = 0; - - if (vk_format_has_depth(format)) - aspects = VK_IMAGE_ASPECT_DEPTH_BIT; - if (vk_format_has_stencil(format)) - aspects = VK_IMAGE_ASPECT_STENCIL_BIT; - - D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = { - .Format = dzn_image_get_dxgi_format(format, usage, aspects), - }; - - ID3D12Device1 *dev = dzn_physical_device_get_d3d12_dev(pdev); - HRESULT hres = - ID3D12Device1_CheckFeatureSupport(dev, D3D12_FEATURE_FORMAT_SUPPORT, - &dfmt_info, sizeof(dfmt_info)); - assert(!FAILED(hres)); - - if (usage != VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) - return dfmt_info; - - /* Depth/stencil resources have different format when they're accessed - * as textures, query the capabilities for this format too. - */ - dzn_foreach_aspect(aspect, aspects) { - D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info2 = { - .Format = dzn_image_get_dxgi_format(format, 0, aspect), - }; - - hres = ID3D12Device1_CheckFeatureSupport(dev, D3D12_FEATURE_FORMAT_SUPPORT, - &dfmt_info2, sizeof(dfmt_info2)); - assert(!FAILED(hres)); - -#define DS_SRV_FORMAT_SUPPORT1_MASK \ - (D3D12_FORMAT_SUPPORT1_SHADER_LOAD | \ - D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE | \ - D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON | \ - D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_MONO_TEXT | \ - D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE | \ - D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD | \ - D3D12_FORMAT_SUPPORT1_SHADER_GATHER | \ - D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW | \ - D3D12_FORMAT_SUPPORT1_SHADER_GATHER_COMPARISON) - - dfmt_info.Support1 |= dfmt_info2.Support1 & DS_SRV_FORMAT_SUPPORT1_MASK; - dfmt_info.Support2 |= dfmt_info2.Support2; - } - - return dfmt_info; -} - -static void -dzn_physical_device_get_format_properties(struct dzn_physical_device *pdev, - VkFormat format, - VkFormatProperties2 *properties) -{ - D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = - dzn_physical_device_get_format_support(pdev, format); - VkFormatProperties *base_props = &properties->formatProperties; - - vk_foreach_struct(ext, properties->pNext) { - dzn_debug_ignored_stype(ext->sType); - } - - if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN) { - *base_props = VkFormatProperties { }; - return; - } - - ID3D12Device1 *dev = dzn_physical_device_get_d3d12_dev(pdev); - - *base_props = VkFormatProperties { - .linearTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, - .optimalTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, - .bufferFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, - }; - - if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER) - base_props->bufferFeatures |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; - -#define TEX_FLAGS (D3D12_FORMAT_SUPPORT1_TEXTURE1D | \ - D3D12_FORMAT_SUPPORT1_TEXTURE2D | \ - D3D12_FORMAT_SUPPORT1_TEXTURE3D | \ - D3D12_FORMAT_SUPPORT1_TEXTURECUBE) - if (dfmt_info.Support1 & TEX_FLAGS) { - base_props->optimalTilingFeatures |= - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT; - } - - if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) { - base_props->optimalTilingFeatures |= - VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; - } - - if ((dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) && - (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW)) { - base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; - base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; - } - -#define ATOMIC_FLAGS (D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD | \ - D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS | \ - D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE | \ - D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE | \ - D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX | \ - D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX) - if ((dfmt_info.Support2 & ATOMIC_FLAGS) == ATOMIC_FLAGS) { - base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; - base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; - } - - if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) - base_props->bufferFeatures |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; - - /* Color/depth/stencil attachment cap implies input attachement cap, and input - * attachment loads are lowered to texture loads in dozen, hence the requirement - * to have shader-load support. - */ - if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) { - if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) { - base_props->optimalTilingFeatures |= - VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - } - - if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BLENDABLE) - base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - - if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) { - base_props->optimalTilingFeatures |= - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - } - } - - /* B4G4R4A4 support is required, but d3d12 doesn't support it. We map this - * format to R4G4B4A4 and adjust the SRV component-mapping to fake - * B4G4R4A4, but that forces us to limit the usage to sampling, which, - * luckily, is exactly what we need to support the required features. - */ - if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { - VkFormatFeatureFlags bgra4_req_features = - VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | - VK_FORMAT_FEATURE_TRANSFER_DST_BIT | - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | - VK_FORMAT_FEATURE_BLIT_SRC_BIT | - VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; - base_props->optimalTilingFeatures &= bgra4_req_features; - base_props->bufferFeatures = - VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; - } - - /* depth/stencil format shouldn't advertise buffer features */ - if (vk_format_is_depth_or_stencil(format)) - base_props->bufferFeatures = 0; -} - -static VkResult -dzn_physical_device_get_image_format_properties(struct dzn_physical_device *pdev, - const VkPhysicalDeviceImageFormatInfo2 *info, - VkImageFormatProperties2 *properties) -{ - const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; - VkExternalImageFormatProperties *external_props = NULL; - - *properties = VkImageFormatProperties2 { - .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, - }; - - /* Extract input structs */ - vk_foreach_struct_const(s, info->pNext) { - switch (s->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: - external_info = (const VkPhysicalDeviceExternalImageFormatInfo *)s; - break; - default: - dzn_debug_ignored_stype(s->sType); - break; - } - } - - assert(info->tiling == VK_IMAGE_TILING_OPTIMAL || info->tiling == VK_IMAGE_TILING_LINEAR); - - /* Extract output structs */ - vk_foreach_struct(s, properties->pNext) { - switch (s->sType) { - case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: - external_props = (VkExternalImageFormatProperties *)s; - break; - default: - dzn_debug_ignored_stype(s->sType); - break; - } - } - - assert((external_props != NULL) == (external_info != NULL)); - - /* TODO: support image import */ - if (external_info && external_info->handleType != 0) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - if (info->tiling != VK_IMAGE_TILING_OPTIMAL && - (info->usage & ~(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT))) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - if (info->tiling != VK_IMAGE_TILING_OPTIMAL && - vk_format_is_depth_or_stencil(info->format)) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = - dzn_physical_device_get_format_support(pdev, info->format); - if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - bool is_bgra4 = info->format == VK_FORMAT_B4G4R4A4_UNORM_PACK16; - ID3D12Device1 *dev = dzn_physical_device_get_d3d12_dev(pdev); - - if ((info->type == VK_IMAGE_TYPE_1D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE1D)) || - (info->type == VK_IMAGE_TYPE_2D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D)) || - (info->type == VK_IMAGE_TYPE_3D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE3D)) || - ((info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && - !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURECUBE))) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - if ((info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) && - !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE)) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - if ((info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) && - (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) || is_bgra4)) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - if ((info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && - (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) || is_bgra4)) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - if ((info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && - (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) || is_bgra4)) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - if ((info->usage & VK_IMAGE_USAGE_STORAGE_BIT) && - (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) || is_bgra4)) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - if (info->type == VK_IMAGE_TYPE_3D && info->tiling != VK_IMAGE_TILING_OPTIMAL) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - bool is_3d = info->type == VK_IMAGE_TYPE_3D; - uint32_t max_extent = dzn_physical_device_get_max_extent(is_3d); - - if (info->tiling == VK_IMAGE_TILING_OPTIMAL && - dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_MIP) - properties->imageFormatProperties.maxMipLevels = dzn_physical_device_get_max_mip_level(is_3d) + 1; - else - properties->imageFormatProperties.maxMipLevels = 1; - - if (info->tiling == VK_IMAGE_TILING_OPTIMAL && info->type != VK_IMAGE_TYPE_3D) - properties->imageFormatProperties.maxArrayLayers = dzn_physical_device_get_max_array_layers(); - else - properties->imageFormatProperties.maxArrayLayers = 1; - - switch (info->type) { - case VK_IMAGE_TYPE_1D: - properties->imageFormatProperties.maxExtent.width = max_extent; - properties->imageFormatProperties.maxExtent.height = 1; - properties->imageFormatProperties.maxExtent.depth = 1; - break; - case VK_IMAGE_TYPE_2D: - properties->imageFormatProperties.maxExtent.width = max_extent; - properties->imageFormatProperties.maxExtent.height = max_extent; - properties->imageFormatProperties.maxExtent.depth = 1; - break; - case VK_IMAGE_TYPE_3D: - properties->imageFormatProperties.maxExtent.width = max_extent; - properties->imageFormatProperties.maxExtent.height = max_extent; - properties->imageFormatProperties.maxExtent.depth = max_extent; - break; - default: - unreachable("bad VkImageType"); - } - - /* From the Vulkan 1.0 spec, section 34.1.1. Supported Sample Counts: - * - * sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of the - * following conditions is true: - * - * - tiling is VK_IMAGE_TILING_LINEAR - * - type is not VK_IMAGE_TYPE_2D - * - flags contains VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT - * - neither the VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT flag nor the - * VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT flag in - * VkFormatProperties::optimalTilingFeatures returned by - * vkGetPhysicalDeviceFormatProperties is set. - * - * D3D12 has a few more constraints: - * - no UAVs on multisample resources - */ - bool rt_or_ds_cap = - dfmt_info.Support1 & - (D3D12_FORMAT_SUPPORT1_RENDER_TARGET | D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL); - - properties->imageFormatProperties.sampleCounts = VK_SAMPLE_COUNT_1_BIT; - if (info->tiling != VK_IMAGE_TILING_LINEAR && - info->type == VK_IMAGE_TYPE_2D && - !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && - rt_or_ds_cap && !is_bgra4 && - !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { - for (uint32_t s = VK_SAMPLE_COUNT_2_BIT; s < VK_SAMPLE_COUNT_64_BIT; s <<= 1) { - D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS ms_info = { - .Format = dfmt_info.Format, - .SampleCount = s, - }; - - HRESULT hres = - ID3D12Device1_CheckFeatureSupport(dev, D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, - &ms_info, sizeof(ms_info)); - if (!FAILED(hres) && ms_info.NumQualityLevels > 0) - properties->imageFormatProperties.sampleCounts |= s; - } - } - - /* TODO: set correct value here */ - properties->imageFormatProperties.maxResourceSize = UINT32_MAX; - - return VK_SUCCESS; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, - VkFormat format, - VkFormatProperties2 *pFormatProperties) -{ - VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); - - dzn_physical_device_get_format_properties(pdev, format, pFormatProperties); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceImageFormatInfo2 *info, - VkImageFormatProperties2 *props) -{ - VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); - - return dzn_physical_device_get_image_format_properties(pdev, info, props); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - VkImageTiling tiling, - VkImageUsageFlags usage, - VkImageCreateFlags createFlags, - VkImageFormatProperties *pImageFormatProperties) -{ - const VkPhysicalDeviceImageFormatInfo2 info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, - .format = format, - .type = type, - .tiling = tiling, - .usage = usage, - .flags = createFlags, - }; - - VkImageFormatProperties2 props = {}; - - VkResult result = - dzn_GetPhysicalDeviceImageFormatProperties2(physicalDevice, &info, &props); - *pImageFormatProperties = props.imageFormatProperties; - - return result; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - VkSampleCountFlagBits samples, - VkImageUsageFlags usage, - VkImageTiling tiling, - uint32_t *pPropertyCount, - VkSparseImageFormatProperties *pProperties) -{ - *pPropertyCount = 0; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, - uint32_t *pPropertyCount, - VkSparseImageFormatProperties2 *pProperties) -{ - *pPropertyCount = 0; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, - VkExternalBufferProperties *pExternalBufferProperties) -{ - pExternalBufferProperties->externalMemoryProperties = - VkExternalMemoryProperties { - .compatibleHandleTypes = (VkExternalMemoryHandleTypeFlags)pExternalBufferInfo->handleType, - }; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_EnumeratePhysicalDevices(VkInstance inst, - uint32_t *pPhysicalDeviceCount, - VkPhysicalDevice *pPhysicalDevices) -{ - VK_FROM_HANDLE(dzn_instance, instance, inst); - - if (!instance->physical_devices_enumerated) { - IDXGIFactory4 *factory = dxgi_get_factory(false); - IDXGIAdapter1 *adapter = NULL; - for (UINT i = 0; SUCCEEDED(IDXGIFactory4_EnumAdapters1(factory, i, &adapter)); ++i) { - DXGI_ADAPTER_DESC1 desc; - IDXGIAdapter1_GetDesc1(adapter, &desc); - if (instance->debug_flags & DZN_DEBUG_WARP) { - if ((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) == 0) { - IDXGIAdapter1_Release(adapter); - continue; - } - } - - VkResult result = - dzn_physical_device_create(instance, adapter, &desc); - - IDXGIAdapter1_Release(adapter); - if (result != VK_SUCCESS) { - IDXGIFactory4_Release(factory); - return result; - } - } - IDXGIFactory4_Release(factory); - } - - VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, - pPhysicalDeviceCount); - - list_for_each_entry(struct dzn_physical_device, pdev, &instance->physical_devices, link) { - vk_outarray_append_typed(VkPhysicalDevice, &out, i) - *i = dzn_physical_device_to_handle(pdev); - } - - instance->physical_devices_enumerated = true; - return vk_outarray_status(&out); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_EnumerateInstanceVersion(uint32_t *pApiVersion) -{ - *pApiVersion = DZN_API_VERSION; - return VK_SUCCESS; -} - -static bool -dzn_physical_device_supports_compressed_format(struct dzn_physical_device *pdev, - const VkFormat *formats, - uint32_t format_count) -{ -#define REQUIRED_COMPRESSED_CAPS \ - (VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | \ - VK_FORMAT_FEATURE_BLIT_SRC_BIT | \ - VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT) - for (uint32_t i = 0; i < format_count; i++) { - VkFormatProperties2 props = {}; - dzn_physical_device_get_format_properties(pdev, formats[i], &props); - if ((props.formatProperties.optimalTilingFeatures & REQUIRED_COMPRESSED_CAPS) != REQUIRED_COMPRESSED_CAPS) - return false; - } - - return true; -} - -static bool -dzn_physical_device_supports_bc(struct dzn_physical_device *pdev) -{ - static const VkFormat formats[] = { - VK_FORMAT_BC1_RGB_UNORM_BLOCK, - VK_FORMAT_BC1_RGB_SRGB_BLOCK, - VK_FORMAT_BC1_RGBA_UNORM_BLOCK, - VK_FORMAT_BC1_RGBA_SRGB_BLOCK, - VK_FORMAT_BC2_UNORM_BLOCK, - VK_FORMAT_BC2_SRGB_BLOCK, - VK_FORMAT_BC3_UNORM_BLOCK, - VK_FORMAT_BC3_SRGB_BLOCK, - VK_FORMAT_BC4_UNORM_BLOCK, - VK_FORMAT_BC4_SNORM_BLOCK, - VK_FORMAT_BC5_UNORM_BLOCK, - VK_FORMAT_BC5_SNORM_BLOCK, - VK_FORMAT_BC6H_UFLOAT_BLOCK, - VK_FORMAT_BC6H_SFLOAT_BLOCK, - VK_FORMAT_BC7_UNORM_BLOCK, - VK_FORMAT_BC7_SRGB_BLOCK, - }; - - return dzn_physical_device_supports_compressed_format(pdev, formats, ARRAY_SIZE(formats)); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures2 *pFeatures) -{ - VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); - - pFeatures->features = VkPhysicalDeviceFeatures { - .robustBufferAccess = true, /* This feature is mandatory */ - .fullDrawIndexUint32 = false, - .imageCubeArray = true, - .independentBlend = false, - .geometryShader = false, - .tessellationShader = false, - .sampleRateShading = true, - .dualSrcBlend = false, - .logicOp = false, - .multiDrawIndirect = false, - .drawIndirectFirstInstance = false, - .depthClamp = false, - .depthBiasClamp = false, - .fillModeNonSolid = false, - .depthBounds = false, - .wideLines = false, - .largePoints = false, - .alphaToOne = false, - .multiViewport = false, - .samplerAnisotropy = false, - .textureCompressionETC2 = false, - .textureCompressionASTC_LDR = false, - .textureCompressionBC = dzn_physical_device_supports_bc(pdev), - .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = true, - .vertexPipelineStoresAndAtomics = true, - .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = false, - .shaderImageGatherExtended = false, - .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = false, - .shaderStorageImageReadWithoutFormat = false, - .shaderStorageImageWriteWithoutFormat = false, - .shaderUniformBufferArrayDynamicIndexing = false, - .shaderSampledImageArrayDynamicIndexing = false, - .shaderStorageBufferArrayDynamicIndexing = false, - .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = false, - .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, - .shaderResourceResidency = false, - .shaderResourceMinLod = false, - .sparseBinding = false, - .sparseResidencyBuffer = false, - .sparseResidencyImage2D = false, - .sparseResidencyImage3D = false, - .sparseResidency2Samples = false, - .sparseResidency4Samples = false, - .sparseResidency8Samples = false, - .sparseResidency16Samples = false, - .sparseResidencyAliased = false, - .variableMultisampleRate = false, - .inheritedQueries = false, - }; - - - vk_foreach_struct(ext, pFeatures->pNext) { - dzn_debug_ignored_stype(ext->sType); - } -} - - -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL -dzn_GetInstanceProcAddr(VkInstance _instance, - const char *pName) -{ - VK_FROM_HANDLE(dzn_instance, instance, _instance); - return vk_instance_get_proc_addr(&instance->vk, - &dzn_instance_entrypoints, - pName); -} - -/* Windows will use a dll definition file to avoid build errors. */ -#ifdef _WIN32 -#undef PUBLIC -#define PUBLIC -#endif - -/* With version 1+ of the loader interface the ICD should expose - * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps. - */ -PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL -vk_icdGetInstanceProcAddr(VkInstance instance, - const char *pName); - -PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL -vk_icdGetInstanceProcAddr(VkInstance instance, - const char *pName) -{ - return dzn_GetInstanceProcAddr(instance, pName); -} - -/* With version 4+ of the loader interface the ICD should expose - * vk_icdGetPhysicalDeviceProcAddr() - */ -PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL -vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, - const char* pName); - -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL -vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, - const char* pName) -{ - VK_FROM_HANDLE(dzn_instance, instance, _instance); - return vk_instance_get_physical_device_proc_addr(&instance->vk, pName); -} - -/* vk_icd.h does not declare this function, so we declare it here to - * suppress Wmissing-prototypes. - */ -PUBLIC VKAPI_ATTR VkResult VKAPI_CALL -vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion); - -PUBLIC VKAPI_ATTR VkResult VKAPI_CALL -vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion) -{ - /* For the full details on loader interface versioning, see - * . - * What follows is a condensed summary, to help you navigate the large and - * confusing official doc. - * - * - Loader interface v0 is incompatible with later versions. We don't - * support it. - * - * - In loader interface v1: - * - The first ICD entrypoint called by the loader is - * vk_icdGetInstanceProcAddr(). The ICD must statically expose this - * entrypoint. - * - The ICD must statically expose no other Vulkan symbol unless it is - * linked with -Bsymbolic. - * - Each dispatchable Vulkan handle created by the ICD must be - * a pointer to a struct whose first member is VK_LOADER_DATA. The - * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. - * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and - * vkDestroySurfaceKHR(). The ICD must be capable of working with - * such loader-managed surfaces. - * - * - Loader interface v2 differs from v1 in: - * - The first ICD entrypoint called by the loader is - * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must - * statically expose this entrypoint. - * - * - Loader interface v3 differs from v2 in: - * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), - * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, - * because the loader no longer does so. - * - * - Loader interface v4 differs from v3 in: - * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr(). - */ - *pSupportedVersion = MIN2(*pSupportedVersion, 4u); - return VK_SUCCESS; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties2 *pProperties) -{ - VK_FROM_HANDLE(dzn_physical_device, pdevice, physicalDevice); - - /* minimum from the spec */ - const VkSampleCountFlags supported_sample_counts = - VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; - - /* FIXME: this is mostly bunk for now */ - VkPhysicalDeviceLimits limits = { - - /* TODO: support older feature levels */ - .maxImageDimension1D = (1 << 14), - .maxImageDimension2D = (1 << 14), - .maxImageDimension3D = (1 << 11), - .maxImageDimensionCube = (1 << 14), - .maxImageArrayLayers = (1 << 11), - - /* from here on, we simply use the minimum values from the spec for now */ - .maxTexelBufferElements = 65536, - .maxUniformBufferRange = 16384, - .maxStorageBufferRange = (1ul << 27), - .maxPushConstantsSize = 128, - .maxMemoryAllocationCount = 4096, - .maxSamplerAllocationCount = 4000, - .bufferImageGranularity = 131072, - .sparseAddressSpaceSize = 0, - .maxBoundDescriptorSets = MAX_SETS, - .maxPerStageDescriptorSamplers = 16, - .maxPerStageDescriptorUniformBuffers = 12, - .maxPerStageDescriptorStorageBuffers = 4, - .maxPerStageDescriptorSampledImages = 16, - .maxPerStageDescriptorStorageImages = 4, - .maxPerStageDescriptorInputAttachments = 4, - .maxPerStageResources = 128, - .maxDescriptorSetSamplers = 96, - .maxDescriptorSetUniformBuffers = 72, - .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, - .maxDescriptorSetStorageBuffers = 24, - .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, - .maxDescriptorSetSampledImages = 96, - .maxDescriptorSetStorageImages = 24, - .maxDescriptorSetInputAttachments = 4, - .maxVertexInputAttributes = 16, - .maxVertexInputBindings = 16, - .maxVertexInputAttributeOffset = 2047, - .maxVertexInputBindingStride = 2048, - .maxVertexOutputComponents = 64, - .maxTessellationGenerationLevel = 0, - .maxTessellationPatchSize = 0, - .maxTessellationControlPerVertexInputComponents = 0, - .maxTessellationControlPerVertexOutputComponents = 0, - .maxTessellationControlPerPatchOutputComponents = 0, - .maxTessellationControlTotalOutputComponents = 0, - .maxTessellationEvaluationInputComponents = 0, - .maxTessellationEvaluationOutputComponents = 0, - .maxGeometryShaderInvocations = 0, - .maxGeometryInputComponents = 0, - .maxGeometryOutputComponents = 0, - .maxGeometryOutputVertices = 0, - .maxGeometryTotalOutputComponents = 0, - .maxFragmentInputComponents = 64, - .maxFragmentOutputAttachments = 4, - .maxFragmentDualSrcAttachments = 0, - .maxFragmentCombinedOutputResources = 4, - .maxComputeSharedMemorySize = 16384, - .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, - .maxComputeWorkGroupInvocations = 128, - .maxComputeWorkGroupSize = { 128, 128, 64 }, - .subPixelPrecisionBits = 4, - .subTexelPrecisionBits = 4, - .mipmapPrecisionBits = 4, - .maxDrawIndexedIndexValue = 0x00ffffff, - .maxDrawIndirectCount = 1, - .maxSamplerLodBias = 2.0f, - .maxSamplerAnisotropy = 1.0f, - .maxViewports = 1, - .maxViewportDimensions = { 4096, 4096 }, - .viewportBoundsRange = { -8192, 8191 }, - .viewportSubPixelBits = 0, - .minMemoryMapAlignment = 64, - .minTexelBufferOffsetAlignment = 256, - .minUniformBufferOffsetAlignment = 256, - .minStorageBufferOffsetAlignment = 256, - .minTexelOffset = -8, - .maxTexelOffset = 7, - .minTexelGatherOffset = 0, - .maxTexelGatherOffset = 0, - .minInterpolationOffset = -0.5f, - .maxInterpolationOffset = 0.5f, - .subPixelInterpolationOffsetBits = 4, - .maxFramebufferWidth = 4096, - .maxFramebufferHeight = 4096, - .maxFramebufferLayers = 256, - .framebufferColorSampleCounts = supported_sample_counts, - .framebufferDepthSampleCounts = supported_sample_counts, - .framebufferStencilSampleCounts = supported_sample_counts, - .framebufferNoAttachmentsSampleCounts = supported_sample_counts, - .maxColorAttachments = 4, - .sampledImageColorSampleCounts = supported_sample_counts, - .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, - .sampledImageDepthSampleCounts = supported_sample_counts, - .sampledImageStencilSampleCounts = supported_sample_counts, - .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, - .maxSampleMaskWords = 1, - .timestampComputeAndGraphics = true, - .timestampPeriod = pdevice->timestamp_period, - .maxClipDistances = 8, - .maxCullDistances = 8, - .maxCombinedClipAndCullDistances = 8, - .discreteQueuePriorities = 2, - .pointSizeRange = { 1.0f, 1.0f }, - .lineWidthRange = { 1.0f, 1.0f }, - .pointSizeGranularity = 0.0f, - .lineWidthGranularity = 0.0f, - .strictLines = 0, - .standardSampleLocations = false, - .optimalBufferCopyOffsetAlignment = 1, - .optimalBufferCopyRowPitchAlignment = 1, - .nonCoherentAtomSize = 256, - }; - - const DXGI_ADAPTER_DESC1 *desc = &pdevice->adapter_desc; - - VkPhysicalDeviceType devtype = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; - if (desc->Flags == DXGI_ADAPTER_FLAG_SOFTWARE) - devtype = VK_PHYSICAL_DEVICE_TYPE_CPU; - else if (false) { // TODO: detect discreete GPUs - /* This is a tad tricky to get right, because we need to have the - * actual ID3D12Device before we can query the - * D3D12_FEATURE_DATA_ARCHITECTURE structure... So for now, let's - * just pretend everything is integrated, because... well, that's - * what I have at hand right now ;) - */ - devtype = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; - } - - pProperties->properties = VkPhysicalDeviceProperties { - .apiVersion = DZN_API_VERSION, - .driverVersion = vk_get_driver_version(), - - .vendorID = desc->VendorId, - .deviceID = desc->DeviceId, - .deviceType = devtype, - - .limits = limits, - .sparseProperties = { 0 }, - }; - - snprintf(pProperties->properties.deviceName, - sizeof(pProperties->properties.deviceName), - "Microsoft Direct3D12 (%S)", desc->Description); - - memcpy(pProperties->properties.pipelineCacheUUID, - pdevice->pipeline_cache_uuid, VK_UUID_SIZE); - - vk_foreach_struct(ext, pProperties->pNext) { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { - VkPhysicalDeviceIDProperties *id_props = - (VkPhysicalDeviceIDProperties *)ext; - memcpy(id_props->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); - memcpy(id_props->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); - /* The LUID is for Windows. */ - id_props->deviceLUIDValid = false; - break; - } - default: - dzn_debug_ignored_stype(ext->sType); - break; - } - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, - uint32_t *pQueueFamilyPropertyCount, - VkQueueFamilyProperties2 *pQueueFamilyProperties) -{ - VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); - VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, - pQueueFamilyProperties, pQueueFamilyPropertyCount); - - (void)dzn_physical_device_get_d3d12_dev(pdev); - - for (uint32_t i = 0; i < pdev->queue_family_count; i++) { - vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) { - p->queueFamilyProperties = pdev->queue_families[i].props; - - vk_foreach_struct(ext, pQueueFamilyProperties->pNext) { - dzn_debug_ignored_stype(ext->sType); - } - } - } -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties *pMemoryProperties) -{ - VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); - - // Ensure memory caps are up-to-date - (void)dzn_physical_device_get_d3d12_dev(pdev); - *pMemoryProperties = pdev->memory; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) -{ - dzn_GetPhysicalDeviceMemoryProperties(physicalDevice, - &pMemoryProperties->memoryProperties); - - vk_foreach_struct(ext, pMemoryProperties->pNext) { - dzn_debug_ignored_stype(ext->sType); - } -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, - VkLayerProperties *pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = 0; - return VK_SUCCESS; - } - - return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); -} - -static VkResult -dzn_queue_sync_wait(struct dzn_queue *queue, const struct vk_sync_wait *wait) -{ - if (wait->sync->type == &vk_sync_dummy_type) - return VK_SUCCESS; - - struct dzn_device *device = container_of(queue->vk.base.device, struct dzn_device, vk); - assert(wait->sync->type == &dzn_sync_type); - struct dzn_sync *sync = container_of(wait->sync, struct dzn_sync, vk); - uint64_t value = - (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? wait->wait_value : 1; - - assert(sync->fence != NULL); - - if (value > 0 && FAILED(ID3D12CommandQueue_Wait(queue->cmdqueue, sync->fence, value))) - return vk_error(device, VK_ERROR_UNKNOWN); - - return VK_SUCCESS; -} - -static VkResult -dzn_queue_sync_signal(struct dzn_queue *queue, const struct vk_sync_signal *signal) -{ - if (signal->sync->type == &vk_sync_dummy_type) - return VK_SUCCESS; - - struct dzn_device *device = container_of(queue->vk.base.device, struct dzn_device, vk); - assert(signal->sync->type == &dzn_sync_type); - struct dzn_sync *sync = container_of(signal->sync, struct dzn_sync, vk); - uint64_t value = - (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? signal->signal_value : 1; - assert(value > 0); - - assert(sync->fence != NULL); - - if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, sync->fence, value))) - return vk_error(device, VK_ERROR_UNKNOWN); - - return VK_SUCCESS; -} - -static VkResult -dzn_queue_submit(struct vk_queue *q, - struct vk_queue_submit *info) -{ - struct dzn_queue *queue = container_of(q, struct dzn_queue, vk); - struct dzn_device *device = container_of(q->base.device, struct dzn_device, vk); - VkResult result = VK_SUCCESS; - - for (uint32_t i = 0; i < info->wait_count; i++) { - result = dzn_queue_sync_wait(queue, &info->waits[i]); - if (result != VK_SUCCESS) - return result; - } - - for (uint32_t i = 0; i < info->command_buffer_count; i++) { - struct dzn_cmd_buffer *cmd_buffer = - container_of(info->command_buffers[i], struct dzn_cmd_buffer, vk); - - ID3D12CommandList *cmdlists[] = { (ID3D12CommandList *)cmd_buffer->cmdlist }; - - util_dynarray_foreach(&cmd_buffer->events.wait, struct dzn_event *, evt) { - if (FAILED(ID3D12CommandQueue_Wait(queue->cmdqueue, (*evt)->fence, 1))) - return vk_error(device, VK_ERROR_UNKNOWN); - } - - util_dynarray_foreach(&cmd_buffer->queries.wait, struct dzn_cmd_buffer_query_range, range) { - mtx_lock(&range->qpool->queries_lock); - for (uint32_t q = range->start; q < range->start + range->count; q++) { - struct dzn_query *query = &range->qpool->queries[q]; - - if (query->fence && - FAILED(ID3D12CommandQueue_Wait(queue->cmdqueue, query->fence, query->fence_value))) - return vk_error(device, VK_ERROR_UNKNOWN); - } - mtx_unlock(&range->qpool->queries_lock); - } - - util_dynarray_foreach(&cmd_buffer->queries.reset, struct dzn_cmd_buffer_query_range, range) { - mtx_lock(&range->qpool->queries_lock); - for (uint32_t q = range->start; q < range->start + range->count; q++) { - struct dzn_query *query = &range->qpool->queries[q]; - if (query->fence) { - ID3D12Fence_Release(query->fence); - query->fence = NULL; - } - query->fence_value = 0; - } - mtx_unlock(&range->qpool->queries_lock); - } - - ID3D12CommandQueue_ExecuteCommandLists(queue->cmdqueue, 1, cmdlists); - - util_dynarray_foreach(&cmd_buffer->events.signal, struct dzn_cmd_event_signal, evt) { - if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, evt->event->fence, evt->value ? 1 : 0))) - return vk_error(device, VK_ERROR_UNKNOWN); - } - - util_dynarray_foreach(&cmd_buffer->queries.signal, struct dzn_cmd_buffer_query_range, range) { - mtx_lock(&range->qpool->queries_lock); - for (uint32_t q = range->start; q < range->start + range->count; q++) { - struct dzn_query *query = &range->qpool->queries[q]; - query->fence_value = queue->fence_point + 1; - query->fence = queue->fence; - ID3D12Fence_AddRef(query->fence); - } - mtx_unlock(&range->qpool->queries_lock); - } - } - - for (uint32_t i = 0; i < info->signal_count; i++) { - result = dzn_queue_sync_signal(queue, &info->signals[i]); - if (result != VK_SUCCESS) - return vk_error(device, VK_ERROR_UNKNOWN); - } - - if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, queue->fence, ++queue->fence_point))) - return vk_error(device, VK_ERROR_UNKNOWN); - - return VK_SUCCESS; -} - -static void -dzn_queue_finish(struct dzn_queue *queue) -{ - if (queue->cmdqueue) - ID3D12CommandQueue_Release(queue->cmdqueue); - - if (queue->fence) - ID3D12Fence_Release(queue->fence); - - vk_queue_finish(&queue->vk); -} - -static VkResult -dzn_queue_init(struct dzn_queue *queue, - struct dzn_device *device, - const VkDeviceQueueCreateInfo *pCreateInfo, - uint32_t index_in_family) -{ - struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); - - VkResult result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family); - if (result != VK_SUCCESS) - return result; - - queue->vk.driver_submit = dzn_queue_submit; - - assert(pCreateInfo->queueFamilyIndex < pdev->queue_family_count); - - D3D12_COMMAND_QUEUE_DESC queue_desc = - pdev->queue_families[pCreateInfo->queueFamilyIndex].desc; - - queue_desc.Priority = - (INT)(pCreateInfo->pQueuePriorities[index_in_family] * (float)D3D12_COMMAND_QUEUE_PRIORITY_HIGH); - queue_desc.NodeMask = 0; - - if (FAILED(ID3D12Device1_CreateCommandQueue(device->dev, &queue_desc, - IID_ID3D12CommandQueue, - (void **)&queue->cmdqueue))) { - dzn_queue_finish(queue); - return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED); - } - - if (FAILED(ID3D12Device1_CreateFence(device->dev, 0, D3D12_FENCE_FLAG_NONE, - IID_ID3D12Fence, - (void **)&queue->fence))) { - dzn_queue_finish(queue); - return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED); - } - - return VK_SUCCESS; -} - -static VkResult -check_physical_device_features(VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceFeatures *features) -{ - VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); - - VkPhysicalDeviceFeatures supported_features; - - pdev->vk.dispatch_table.GetPhysicalDeviceFeatures(physicalDevice, &supported_features); - - VkBool32 *supported_feature = (VkBool32 *)&supported_features; - VkBool32 *enabled_feature = (VkBool32 *)features; - unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); - for (uint32_t i = 0; i < num_features; i++) { - if (enabled_feature[i] && !supported_feature[i]) - return VK_ERROR_FEATURE_NOT_PRESENT; - } - - return VK_SUCCESS; -} - -static VkResult -dzn_device_create_sync_for_memory(struct vk_device *device, - VkDeviceMemory memory, - bool signal_memory, - struct vk_sync **sync_out) -{ - return vk_sync_create(device, &vk_sync_dummy_type, - (enum vk_sync_flags)0, 1, sync_out); -} - -static void -dzn_device_ref_pipeline_layout(struct vk_device *dev, VkPipelineLayout layout) -{ - VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); - - dzn_pipeline_layout_ref(playout); -} - -static void -dzn_device_unref_pipeline_layout(struct vk_device *dev, VkPipelineLayout layout) -{ - VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); - - dzn_pipeline_layout_unref(playout); -} - -static VkResult -dzn_device_query_init(struct dzn_device *device) -{ - /* FIXME: create the resource in the default heap */ - D3D12_HEAP_PROPERTIES hprops; - ID3D12Device1_GetCustomHeapProperties(device->dev, &hprops, 0, D3D12_HEAP_TYPE_UPLOAD); - D3D12_RESOURCE_DESC rdesc = { - .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, - .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, - .Width = DZN_QUERY_REFS_RES_SIZE, - .Height = 1, - .DepthOrArraySize = 1, - .MipLevels = 1, - .Format = DXGI_FORMAT_UNKNOWN, - .SampleDesc = { .Count = 1, .Quality = 0 }, - .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - .Flags = D3D12_RESOURCE_FLAG_NONE, - }; - - if (FAILED(ID3D12Device1_CreateCommittedResource(device->dev, &hprops, - D3D12_HEAP_FLAG_NONE, - &rdesc, - D3D12_RESOURCE_STATE_GENERIC_READ, - NULL, - IID_ID3D12Resource, - (void **)&device->queries.refs))) - return vk_error(device->vk.physical, VK_ERROR_OUT_OF_DEVICE_MEMORY); - - uint8_t *queries_ref; - if (FAILED(ID3D12Resource_Map(device->queries.refs, 0, NULL, (void **)&queries_ref))) - return vk_error(device->vk.physical, VK_ERROR_OUT_OF_HOST_MEMORY); - - memset(queries_ref + DZN_QUERY_REFS_ALL_ONES_OFFSET, 0xff, DZN_QUERY_REFS_SECTION_SIZE); - memset(queries_ref + DZN_QUERY_REFS_ALL_ZEROS_OFFSET, 0x0, DZN_QUERY_REFS_SECTION_SIZE); - ID3D12Resource_Unmap(device->queries.refs, 0, NULL); - - return VK_SUCCESS; -} - -static void -dzn_device_query_finish(struct dzn_device *device) -{ - if (device->queries.refs) - ID3D12Resource_Release(device->queries.refs); -} - -static void -dzn_device_destroy(struct dzn_device *device, const VkAllocationCallbacks *pAllocator) -{ - if (!device) - return; - - struct dzn_instance *instance = - container_of(device->vk.physical->instance, struct dzn_instance, vk); - - vk_foreach_queue_safe(q, &device->vk) { - struct dzn_queue *queue = container_of(q, struct dzn_queue, vk); - - dzn_queue_finish(queue); - } - - dzn_device_query_finish(device); - dzn_meta_finish(device); - - if (device->dev) - ID3D12Device1_Release(device->dev); - - vk_device_finish(&device->vk); - vk_free2(&instance->vk.alloc, pAllocator, device); -} - -static VkResult -dzn_device_create(struct dzn_physical_device *pdev, - const VkDeviceCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDevice *out) -{ - struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk); - - uint32_t queue_count = 0; - for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) { - const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf]; - queue_count += qinfo->queueCount; - } - - VK_MULTIALLOC(ma); - VK_MULTIALLOC_DECL(&ma, struct dzn_device, device, 1); - VK_MULTIALLOC_DECL(&ma, struct dzn_queue, queues, queue_count); - - if (!vk_multialloc_zalloc2(&ma, &instance->vk.alloc, pAllocator, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) - return vk_error(pdev, VK_ERROR_OUT_OF_HOST_MEMORY); - - struct vk_device_dispatch_table dispatch_table; - - /* For secondary command buffer support, overwrite any command entrypoints - * in the main device-level dispatch table with - * vk_cmd_enqueue_unless_primary_Cmd*. - */ - vk_device_dispatch_table_from_entrypoints(&dispatch_table, - &vk_cmd_enqueue_unless_primary_device_entrypoints, true); - vk_device_dispatch_table_from_entrypoints(&dispatch_table, - &dzn_device_entrypoints, false); - vk_device_dispatch_table_from_entrypoints(&dispatch_table, - &wsi_device_entrypoints, false); - - /* Populate our primary cmd_dispatch table. */ - vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch, - &dzn_device_entrypoints, true); - vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch, - &vk_common_device_entrypoints, - false); - - VkResult result = - vk_device_init(&device->vk, &pdev->vk, &dispatch_table, pCreateInfo, pAllocator); - if (result != VK_SUCCESS) { - vk_free2(&device->vk.alloc, pAllocator, device); - return result; - } - - /* Must be done after vk_device_init() because this function memset(0) the - * whole struct. - */ - device->vk.command_dispatch_table = &device->cmd_dispatch; - device->vk.ref_pipeline_layout = dzn_device_ref_pipeline_layout; - device->vk.unref_pipeline_layout = dzn_device_unref_pipeline_layout; - device->vk.create_sync_for_memory = dzn_device_create_sync_for_memory; - - device->dev = dzn_physical_device_get_d3d12_dev(pdev); - if (!device->dev) { - dzn_device_destroy(device, pAllocator); - return vk_error(pdev, VK_ERROR_INITIALIZATION_FAILED); - } - - ID3D12Device1_AddRef(device->dev); - - ID3D12InfoQueue *info_queue; - if (SUCCEEDED(ID3D12Device1_QueryInterface(device->dev, IID_ID3D12InfoQueue, - (void **)&info_queue))) { - D3D12_MESSAGE_SEVERITY severities[] = { - D3D12_MESSAGE_SEVERITY_INFO, - D3D12_MESSAGE_SEVERITY_WARNING, - }; - - D3D12_MESSAGE_ID msg_ids[] = { - D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, - }; - - D3D12_INFO_QUEUE_FILTER NewFilter = {}; - NewFilter.DenyList.NumSeverities = ARRAY_SIZE(severities); - NewFilter.DenyList.pSeverityList = severities; - NewFilter.DenyList.NumIDs = ARRAY_SIZE(msg_ids); - NewFilter.DenyList.pIDList = msg_ids; - - ID3D12InfoQueue_PushStorageFilter(info_queue, &NewFilter); - } - - result = dzn_meta_init(device); - if (result != VK_SUCCESS) { - dzn_device_destroy(device, pAllocator); - return result; - } - - result = dzn_device_query_init(device); - if (result != VK_SUCCESS) { - dzn_device_destroy(device, pAllocator); - return result; - } - - uint32_t qindex = 0; - for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) { - const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf]; - - for (uint32_t q = 0; q < qinfo->queueCount; q++) { - result = - dzn_queue_init(&queues[qindex++], device, qinfo, q); - if (result != VK_SUCCESS) { - dzn_device_destroy(device, pAllocator); - return result; - } - } - } - - assert(queue_count == qindex); - *out = dzn_device_to_handle(device); - return VK_SUCCESS; -} - -ID3D12RootSignature * -dzn_device_create_root_sig(struct dzn_device *device, - const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc) -{ - struct dzn_instance *instance = - container_of(device->vk.physical->instance, struct dzn_instance, vk); - ID3D10Blob *sig, *error; - - if (FAILED(instance->d3d12.serialize_root_sig(desc, - &sig, &error))) { - if (instance->debug_flags & DZN_DEBUG_SIG) { - const char* error_msg = (const char*)ID3D10Blob_GetBufferPointer(error); - fprintf(stderr, - "== SERIALIZE ROOT SIG ERROR =============================================\n" - "%s\n" - "== END ==========================================================\n", - error_msg); - } - - ID3D10Blob_Release(error); - return NULL; - } - - ID3D12RootSignature *root_sig; - if (FAILED(ID3D12Device1_CreateRootSignature(device->dev, 0, - ID3D10Blob_GetBufferPointer(sig), - ID3D10Blob_GetBufferSize(sig), - IID_ID3D12RootSignature, - (void **)&root_sig))) { - ID3D10Blob_Release(sig); - return NULL; - } - - ID3D10Blob_Release(sig); - return root_sig; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateDevice(VkPhysicalDevice physicalDevice, - const VkDeviceCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDevice *pDevice) -{ - VK_FROM_HANDLE(dzn_physical_device, physical_device, physicalDevice); - struct dzn_instance *instance = - container_of(physical_device->vk.instance, struct dzn_instance, vk); - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); - - /* Check enabled features */ - if (pCreateInfo->pEnabledFeatures) { - result = check_physical_device_features(physicalDevice, - pCreateInfo->pEnabledFeatures); - if (result != VK_SUCCESS) - return vk_error(physical_device, result); - } - - /* Check requested queues and fail if we are requested to create any - * queues with flags we don't support. - */ - assert(pCreateInfo->queueCreateInfoCount > 0); - for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { - if (pCreateInfo->pQueueCreateInfos[i].flags != 0) - return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED); - } - - return dzn_device_create(physical_device, pCreateInfo, pAllocator, pDevice); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyDevice(VkDevice dev, - const VkAllocationCallbacks *pAllocator) -{ - VK_FROM_HANDLE(dzn_device, device, dev); - - device->vk.dispatch_table.DeviceWaitIdle(dev); - - dzn_device_destroy(device, pAllocator); -} - -static void -dzn_device_memory_destroy(struct dzn_device_memory *mem, - const VkAllocationCallbacks *pAllocator) -{ - if (!mem) - return; - - struct dzn_device *device = container_of(mem->base.device, struct dzn_device, vk); - - if (mem->map) - ID3D12Resource_Unmap(mem->map_res, 0, NULL); - - if (mem->map_res) - ID3D12Resource_Release(mem->map_res); - - if (mem->heap) - ID3D12Heap_Release(mem->heap); - - vk_object_base_finish(&mem->base); - vk_free2(&device->vk.alloc, pAllocator, mem); -} - -static VkResult -dzn_device_memory_create(struct dzn_device *device, - const VkMemoryAllocateInfo *pAllocateInfo, - const VkAllocationCallbacks *pAllocator, - VkDeviceMemory *out) -{ - struct dzn_physical_device *pdevice = - container_of(device->vk.physical, struct dzn_physical_device, vk); - - struct dzn_device_memory *mem = (struct dzn_device_memory *) - vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!mem) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY); - - /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */ - assert(pAllocateInfo->allocationSize > 0); - - mem->size = pAllocateInfo->allocationSize; - -#if 0 - const VkExportMemoryAllocateInfo *export_info = NULL; - VkMemoryAllocateFlags vk_flags = 0; -#endif - - vk_foreach_struct_const(ext, pAllocateInfo->pNext) { - dzn_debug_ignored_stype(ext->sType); - } - - const VkMemoryType *mem_type = - &pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex]; - - D3D12_HEAP_DESC heap_desc = {}; - // TODO: fix all of these: - heap_desc.SizeInBytes = pAllocateInfo->allocationSize; - heap_desc.Alignment = - heap_desc.SizeInBytes >= D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT ? - D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : - D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heap_desc.Flags = - dzn_physical_device_get_heap_flags_for_mem_type(pdevice, - pAllocateInfo->memoryTypeIndex); - - /* TODO: Unsure about this logic??? */ - mem->initial_state = D3D12_RESOURCE_STATE_COMMON; - heap_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; - heap_desc.Properties.MemoryPoolPreference = - ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && - !pdevice->architecture.UMA) ? - D3D12_MEMORY_POOL_L1 : D3D12_MEMORY_POOL_L0; - if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { - heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; - } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { - heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; - } else { - heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE; - } - - if (FAILED(ID3D12Device1_CreateHeap(device->dev, &heap_desc, - IID_ID3D12Heap, - (void **)&mem->heap))) { - dzn_device_memory_destroy(mem, pAllocator); - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - - if ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && - !(heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS)){ - D3D12_RESOURCE_DESC res_desc = {}; - res_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - res_desc.Format = DXGI_FORMAT_UNKNOWN; - res_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - res_desc.Width = heap_desc.SizeInBytes; - res_desc.Height = 1; - res_desc.DepthOrArraySize = 1; - res_desc.MipLevels = 1; - res_desc.SampleDesc.Count = 1; - res_desc.SampleDesc.Quality = 0; - res_desc.Flags = D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; - res_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - HRESULT hr = ID3D12Device1_CreatePlacedResource(device->dev, mem->heap, 0, &res_desc, - mem->initial_state, - NULL, - IID_ID3D12Resource, - (void **)&mem->map_res); - if (FAILED(hr)) { - dzn_device_memory_destroy(mem, pAllocator); - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - } - - *out = dzn_device_memory_to_handle(mem); - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_AllocateMemory(VkDevice device, - const VkMemoryAllocateInfo *pAllocateInfo, - const VkAllocationCallbacks *pAllocator, - VkDeviceMemory *pMem) -{ - return dzn_device_memory_create(dzn_device_from_handle(device), - pAllocateInfo, pAllocator, pMem); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_FreeMemory(VkDevice device, - VkDeviceMemory mem, - const VkAllocationCallbacks *pAllocator) -{ - dzn_device_memory_destroy(dzn_device_memory_from_handle(mem), pAllocator); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_MapMemory(VkDevice _device, - VkDeviceMemory _memory, - VkDeviceSize offset, - VkDeviceSize size, - VkMemoryMapFlags flags, - void **ppData) -{ - VK_FROM_HANDLE(dzn_device, device, _device); - VK_FROM_HANDLE(dzn_device_memory, mem, _memory); - - if (mem == NULL) { - *ppData = NULL; - return VK_SUCCESS; - } - - if (size == VK_WHOLE_SIZE) - size = mem->size - offset; - - /* From the Vulkan spec version 1.0.32 docs for MapMemory: - * - * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0 - * assert(size != 0); - * * If size is not equal to VK_WHOLE_SIZE, size must be less than or - * equal to the size of the memory minus offset - */ - assert(size > 0); - assert(offset + size <= mem->size); - - assert(mem->map_res); - D3D12_RANGE range = {}; - range.Begin = offset; - range.End = offset + size; - void *map = NULL; - if (FAILED(ID3D12Resource_Map(mem->map_res, 0, &range, &map))) - return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED); - - mem->map = map; - mem->map_size = size; - - *ppData = ((uint8_t*) map) + offset; - - return VK_SUCCESS; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_UnmapMemory(VkDevice _device, - VkDeviceMemory _memory) -{ - VK_FROM_HANDLE(dzn_device, device, _device); - VK_FROM_HANDLE(dzn_device_memory, mem, _memory); - - if (mem == NULL) - return; - - assert(mem->map_res); - ID3D12Resource_Unmap(mem->map_res, 0, NULL); - - mem->map = NULL; - mem->map_size = 0; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_FlushMappedMemoryRanges(VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) -{ - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_InvalidateMappedMemoryRanges(VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) -{ - return VK_SUCCESS; -} - -static void -dzn_buffer_destroy(struct dzn_buffer *buf, const VkAllocationCallbacks *pAllocator) -{ - if (!buf) - return; - - struct dzn_device *device = container_of(buf->base.device, struct dzn_device, vk); - - if (buf->res) - ID3D12Resource_Release(buf->res); - - vk_object_base_finish(&buf->base); - vk_free2(&device->vk.alloc, pAllocator, buf); -} - -static VkResult -dzn_buffer_create(struct dzn_device *device, - const VkBufferCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkBuffer *out) -{ - struct dzn_buffer *buf = (struct dzn_buffer *) - vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*buf), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!buf) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &buf->base, VK_OBJECT_TYPE_BUFFER); - buf->create_flags = pCreateInfo->flags; - buf->size = pCreateInfo->size; - buf->usage = pCreateInfo->usage; - - if (buf->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) - buf->size = ALIGN_POT(buf->size, 256); - - buf->desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - buf->desc.Format = DXGI_FORMAT_UNKNOWN; - buf->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - buf->desc.Width = buf->size; - buf->desc.Height = 1; - buf->desc.DepthOrArraySize = 1; - buf->desc.MipLevels = 1; - buf->desc.SampleDesc.Count = 1; - buf->desc.SampleDesc.Quality = 0; - buf->desc.Flags = D3D12_RESOURCE_FLAG_NONE; - buf->desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - if (buf->usage & - (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) - buf->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - - *out = dzn_buffer_to_handle(buf); - return VK_SUCCESS; -} - -DXGI_FORMAT -dzn_buffer_get_dxgi_format(VkFormat format) -{ - enum pipe_format pfmt = vk_format_to_pipe_format(format); - - return dzn_pipe_to_dxgi_format(pfmt); -} - -D3D12_TEXTURE_COPY_LOCATION -dzn_buffer_get_copy_loc(const struct dzn_buffer *buf, - VkFormat format, - const VkBufferImageCopy2KHR *region, - VkImageAspectFlagBits aspect, - uint32_t layer) -{ - const uint32_t buffer_row_length = - region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width; - const uint32_t buffer_image_height = - region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height; - - VkFormat plane_format = dzn_image_get_plane_format(format, aspect); - - enum pipe_format pfmt = vk_format_to_pipe_format(plane_format); - uint32_t blksz = util_format_get_blocksize(pfmt); - uint32_t blkw = util_format_get_blockwidth(pfmt); - uint32_t blkh = util_format_get_blockheight(pfmt); - - D3D12_TEXTURE_COPY_LOCATION loc = { - .pResource = buf->res, - .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, - .PlacedFootprint = { - .Footprint = { - .Format = - dzn_image_get_placed_footprint_format(format, aspect), - .Width = region->imageExtent.width, - .Height = region->imageExtent.height, - .Depth = region->imageExtent.depth, - .RowPitch = blksz * DIV_ROUND_UP(buffer_row_length, blkw), - }, - }, - }; - - uint32_t buffer_layer_stride = - loc.PlacedFootprint.Footprint.RowPitch * - DIV_ROUND_UP(loc.PlacedFootprint.Footprint.Height, blkh); - - loc.PlacedFootprint.Offset = - region->bufferOffset + (layer * buffer_layer_stride); - - return loc; -} - -D3D12_TEXTURE_COPY_LOCATION -dzn_buffer_get_line_copy_loc(const struct dzn_buffer *buf, VkFormat format, - const VkBufferImageCopy2KHR *region, - const D3D12_TEXTURE_COPY_LOCATION *loc, - uint32_t y, uint32_t z, uint32_t *start_x) -{ - uint32_t buffer_row_length = - region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width; - uint32_t buffer_image_height = - region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height; - - format = dzn_image_get_plane_format(format, region->imageSubresource.aspectMask); - - enum pipe_format pfmt = vk_format_to_pipe_format(format); - uint32_t blksz = util_format_get_blocksize(pfmt); - uint32_t blkw = util_format_get_blockwidth(pfmt); - uint32_t blkh = util_format_get_blockheight(pfmt); - uint32_t blkd = util_format_get_blockdepth(pfmt); - D3D12_TEXTURE_COPY_LOCATION new_loc = *loc; - uint32_t buffer_row_stride = - DIV_ROUND_UP(buffer_row_length, blkw) * blksz; - uint32_t buffer_layer_stride = - buffer_row_stride * - DIV_ROUND_UP(buffer_image_height, blkh); - - uint64_t tex_offset = - ((y / blkh) * buffer_row_stride) + - ((z / blkd) * buffer_layer_stride); - uint64_t offset = loc->PlacedFootprint.Offset + tex_offset; - uint32_t offset_alignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; - - while (offset_alignment % blksz) - offset_alignment += D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; - - new_loc.PlacedFootprint.Footprint.Height = blkh; - new_loc.PlacedFootprint.Footprint.Depth = 1; - new_loc.PlacedFootprint.Offset = (offset / offset_alignment) * offset_alignment; - *start_x = ((offset % offset_alignment) / blksz) * blkw; - new_loc.PlacedFootprint.Footprint.Width = *start_x + region->imageExtent.width; - new_loc.PlacedFootprint.Footprint.RowPitch = - ALIGN_POT(DIV_ROUND_UP(new_loc.PlacedFootprint.Footprint.Width, blkw) * blksz, - D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - return new_loc; -} - -bool -dzn_buffer_supports_region_copy(const D3D12_TEXTURE_COPY_LOCATION *loc) -{ - return !(loc->PlacedFootprint.Offset & (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1)) && - !(loc->PlacedFootprint.Footprint.RowPitch & (D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateBuffer(VkDevice device, - const VkBufferCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkBuffer *pBuffer) -{ - return dzn_buffer_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pBuffer); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyBuffer(VkDevice device, - VkBuffer buffer, - const VkAllocationCallbacks *pAllocator) -{ - dzn_buffer_destroy(dzn_buffer_from_handle(buffer), pAllocator); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetBufferMemoryRequirements2(VkDevice dev, - const VkBufferMemoryRequirementsInfo2 *pInfo, - VkMemoryRequirements2 *pMemoryRequirements) -{ - VK_FROM_HANDLE(dzn_device, device, dev); - VK_FROM_HANDLE(dzn_buffer, buffer, pInfo->buffer); - struct dzn_physical_device *pdev = - container_of(device->vk.physical, struct dzn_physical_device, vk); - - /* uh, this is grossly over-estimating things */ - uint32_t alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - VkDeviceSize size = buffer->size; - - if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { - alignment = MAX2(alignment, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); - size = ALIGN_POT(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); - } - - pMemoryRequirements->memoryRequirements.size = size; - pMemoryRequirements->memoryRequirements.alignment = alignment; - pMemoryRequirements->memoryRequirements.memoryTypeBits = - dzn_physical_device_get_mem_type_mask_for_resource(pdev, &buffer->desc); - - vk_foreach_struct(ext, pMemoryRequirements->pNext) { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { - VkMemoryDedicatedRequirements *requirements = - (VkMemoryDedicatedRequirements *)ext; - /* TODO: figure out dedicated allocations */ - requirements->prefersDedicatedAllocation = false; - requirements->requiresDedicatedAllocation = false; - break; - } - - default: - dzn_debug_ignored_stype(ext->sType); - break; - } - } - -#if 0 - D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo( - UINT visibleMask, - UINT numResourceDescs, - const D3D12_RESOURCE_DESC *pResourceDescs); -#endif -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_BindBufferMemory2(VkDevice _device, - uint32_t bindInfoCount, - const VkBindBufferMemoryInfo *pBindInfos) -{ - VK_FROM_HANDLE(dzn_device, device, _device); - - for (uint32_t i = 0; i < bindInfoCount; i++) { - assert(pBindInfos[i].sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO); - - VK_FROM_HANDLE(dzn_device_memory, mem, pBindInfos[i].memory); - VK_FROM_HANDLE(dzn_buffer, buffer, pBindInfos[i].buffer); - - if (FAILED(ID3D12Device1_CreatePlacedResource(device->dev, mem->heap, - pBindInfos[i].memoryOffset, - &buffer->desc, - mem->initial_state, - NULL, - IID_ID3D12Resource, - (void **)&buffer->res))) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - return VK_SUCCESS; -} - -static VkResult -dzn_framebuffer_create(struct dzn_device *device, - const VkFramebufferCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkFramebuffer *out) -{ - VK_MULTIALLOC(ma); - VK_MULTIALLOC_DECL(&ma, struct dzn_framebuffer, framebuffer, 1); - VK_MULTIALLOC_DECL(&ma, struct dzn_image_view *, attachments, pCreateInfo->attachmentCount); - - if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - framebuffer->width = pCreateInfo->width; - framebuffer->height = pCreateInfo->height; - framebuffer->layers = pCreateInfo->layers; - - framebuffer->attachments = attachments; - framebuffer->attachment_count = pCreateInfo->attachmentCount; - for (uint32_t i = 0; i < framebuffer->attachment_count; i++) { - VK_FROM_HANDLE(dzn_image_view, iview, pCreateInfo->pAttachments[i]); - framebuffer->attachments[i] = iview; - } - - vk_object_base_init(&device->vk, &framebuffer->base, VK_OBJECT_TYPE_FRAMEBUFFER); - *out = dzn_framebuffer_to_handle(framebuffer); - return VK_SUCCESS; -} - -static void -dzn_framebuffer_destroy(struct dzn_framebuffer *framebuffer, - const VkAllocationCallbacks *pAllocator) -{ - if (!framebuffer) - return; - - struct dzn_device *device = - container_of(framebuffer->base.device, struct dzn_device, vk); - - vk_object_base_finish(&framebuffer->base); - vk_free2(&device->vk.alloc, pAllocator, framebuffer); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateFramebuffer(VkDevice device, - const VkFramebufferCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkFramebuffer *pFramebuffer) -{ - return dzn_framebuffer_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pFramebuffer); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyFramebuffer(VkDevice device, - VkFramebuffer fb, - const VkAllocationCallbacks *pAllocator) -{ - dzn_framebuffer_destroy(dzn_framebuffer_from_handle(fb), pAllocator); -} - -static void -dzn_event_destroy(struct dzn_event *event, - const VkAllocationCallbacks *pAllocator) -{ - if (!event) - return; - - struct dzn_device *device = - container_of(event->base.device, struct dzn_device, vk); - - if (event->fence) - ID3D12Fence_Release(event->fence); - - vk_object_base_finish(&event->base); - vk_free2(&device->vk.alloc, pAllocator, event); -} - -static VkResult -dzn_event_create(struct dzn_device *device, - const VkEventCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkEvent *out) -{ - struct dzn_event *event = (struct dzn_event *) - vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!event) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT); - - if (FAILED(ID3D12Device1_CreateFence(device->dev, 0, D3D12_FENCE_FLAG_NONE, - IID_ID3D12Fence, - (void **)&event->fence))) { - dzn_event_destroy(event, pAllocator); - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - *out = dzn_event_to_handle(event); - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateEvent(VkDevice device, - const VkEventCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkEvent *pEvent) -{ - return dzn_event_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pEvent); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyEvent(VkDevice device, - VkEvent event, - const VkAllocationCallbacks *pAllocator) -{ - dzn_event_destroy(dzn_event_from_handle(event), pAllocator); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_ResetEvent(VkDevice dev, - VkEvent evt) -{ - VK_FROM_HANDLE(dzn_device, device, dev); - VK_FROM_HANDLE(dzn_event, event, evt); - - if (FAILED(ID3D12Fence_Signal(event->fence, 0))) - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_SetEvent(VkDevice dev, - VkEvent evt) -{ - VK_FROM_HANDLE(dzn_device, device, dev); - VK_FROM_HANDLE(dzn_event, event, evt); - - if (FAILED(ID3D12Fence_Signal(event->fence, 1))) - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_GetEventStatus(VkDevice device, - VkEvent evt) -{ - VK_FROM_HANDLE(dzn_event, event, evt); - - return ID3D12Fence_GetCompletedValue(event->fence) == 0 ? - VK_EVENT_RESET : VK_EVENT_SET; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetDeviceMemoryCommitment(VkDevice device, - VkDeviceMemory memory, - VkDeviceSize *pCommittedMemoryInBytes) -{ - VK_FROM_HANDLE(dzn_device_memory, mem, memory); - - // TODO: find if there's a way to query/track actual heap residency - *pCommittedMemoryInBytes = mem->size; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_QueueBindSparse(VkQueue queue, - uint32_t bindInfoCount, - const VkBindSparseInfo *pBindInfo, - VkFence fence) -{ - // FIXME: add proper implem - dzn_stub(); - return VK_SUCCESS; -} - -static D3D12_TEXTURE_ADDRESS_MODE -dzn_sampler_translate_addr_mode(VkSamplerAddressMode in) -{ - switch (in) { - case VK_SAMPLER_ADDRESS_MODE_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP; - case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; - case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER; - default: unreachable("Invalid address mode"); - } -} - -static void -dzn_sampler_destroy(struct dzn_sampler *sampler, - const VkAllocationCallbacks *pAllocator) -{ - if (!sampler) - return; - - struct dzn_device *device = - container_of(sampler->base.device, struct dzn_device, vk); - - vk_object_base_finish(&sampler->base); - vk_free2(&device->vk.alloc, pAllocator, sampler); -} - -static VkResult -dzn_sampler_create(struct dzn_device *device, - const VkSamplerCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkSampler *out) -{ - struct dzn_sampler *sampler = (struct dzn_sampler *) - vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sampler) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER); - - const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = (const VkSamplerCustomBorderColorCreateInfoEXT *) - vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); - - /* TODO: have a sampler pool to allocate shader-invisible descs which we - * can copy to the desc_set when UpdateDescriptorSets() is called. - */ - sampler->desc.Filter = dzn_translate_sampler_filter(pCreateInfo); - sampler->desc.AddressU = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeU); - sampler->desc.AddressV = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeV); - sampler->desc.AddressW = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeW); - sampler->desc.MipLODBias = pCreateInfo->mipLodBias; - sampler->desc.MaxAnisotropy = pCreateInfo->maxAnisotropy; - sampler->desc.MinLOD = pCreateInfo->minLod; - sampler->desc.MaxLOD = pCreateInfo->maxLod; - - if (pCreateInfo->compareEnable) - sampler->desc.ComparisonFunc = dzn_translate_compare_op(pCreateInfo->compareOp); - - bool reads_border_color = - pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || - pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || - pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - - if (reads_border_color) { - switch (pCreateInfo->borderColor) { - case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: - case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: - sampler->desc.BorderColor[0] = 0.0f; - sampler->desc.BorderColor[1] = 0.0f; - sampler->desc.BorderColor[2] = 0.0f; - sampler->desc.BorderColor[3] = - pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ? 0.0f : 1.0f; - sampler->static_border_color = - pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ? - D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK : - D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK; - break; - case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: - sampler->desc.BorderColor[0] = sampler->desc.BorderColor[1] = 1.0f; - sampler->desc.BorderColor[2] = sampler->desc.BorderColor[3] = 1.0f; - sampler->static_border_color = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE; - break; - case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: - sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1; - for (unsigned i = 0; i < ARRAY_SIZE(sampler->desc.BorderColor); i++) - sampler->desc.BorderColor[i] = pBorderColor->customBorderColor.float32[i]; - break; - case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: - case VK_BORDER_COLOR_INT_OPAQUE_BLACK: - case VK_BORDER_COLOR_INT_OPAQUE_WHITE: - case VK_BORDER_COLOR_INT_CUSTOM_EXT: - /* FIXME: sampling from integer textures is not supported yet. */ - sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1; - break; - default: - unreachable("Unsupported border color"); - } - } - - *out = dzn_sampler_to_handle(sampler); - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateSampler(VkDevice device, - const VkSamplerCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkSampler *pSampler) -{ - return dzn_sampler_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pSampler); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroySampler(VkDevice device, - VkSampler sampler, - const VkAllocationCallbacks *pAllocator) -{ - dzn_sampler_destroy(dzn_sampler_from_handle(sampler), pAllocator); -} diff --git a/src/microsoft/vulkan/dzn_image.c b/src/microsoft/vulkan/dzn_image.c new file mode 100644 index 00000000000..2d01dfdecaf --- /dev/null +++ b/src/microsoft/vulkan/dzn_image.c @@ -0,0 +1,1240 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_debug_report.h" +#include "vk_format.h" +#include "vk_util.h" + +static void +dzn_image_destroy(struct dzn_image *image, + const VkAllocationCallbacks *pAllocator) +{ + if (!image) + return; + + struct dzn_device *device = container_of(image->vk.base.device, struct dzn_device, vk); + + if (image->res) + ID3D12Resource_Release(image->res); + + vk_image_finish(&image->vk); + vk_free2(&device->vk.alloc, pAllocator, image); +} + +static VkResult +dzn_image_create(struct dzn_device *device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *out) +{ + struct dzn_image *image = (struct dzn_image *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*image), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + struct dzn_physical_device *pdev = + container_of(device->vk.physical, struct dzn_physical_device, vk); + + if (!image) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + const VkExternalMemoryImageCreateInfo *create_info = + (const VkExternalMemoryImageCreateInfo *) + vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO); + +#if 0 + VkExternalMemoryHandleTypeFlags supported = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT; + + if (create_info && (create_info->handleTypes & supported)) + return dzn_image_from_external(device, pCreateInfo, create_info, + pAllocator, pImage); +#endif + +#if 0 + const VkImageSwapchainCreateInfoKHR *swapchain_info = (const VkImageSwapchainCreateInfoKHR *) + vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR); + if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) + return dzn_image_from_swapchain(device, pCreateInfo, swapchain_info, + pAllocator, pImage); +#endif + + vk_image_init(&device->vk, &image->vk, pCreateInfo); + enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); + + if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) { + /* Treat linear images as buffers: they should only be used as copy + * src/dest, and CopyTextureResource() can manipulate buffers. + * We only support linear tiling on things strictly required by the spec: + * "Images created with tiling equal to VK_IMAGE_TILING_LINEAR have + * further restrictions on their limits and capabilities compared to + * images created with tiling equal to VK_IMAGE_TILING_OPTIMAL. Creation + * of images with tiling VK_IMAGE_TILING_LINEAR may not be supported + * unless other parameters meet all of the constraints: + * - imageType is VK_IMAGE_TYPE_2D + * - format is not a depth/stencil format + * - mipLevels is 1 + * - arrayLayers is 1 + * - samples is VK_SAMPLE_COUNT_1_BIT + * - usage only includes VK_IMAGE_USAGE_TRANSFER_SRC_BIT and/or VK_IMAGE_USAGE_TRANSFER_DST_BIT + * " + */ + assert(!vk_format_is_depth_or_stencil(pCreateInfo->format)); + assert(pCreateInfo->mipLevels == 1); + assert(pCreateInfo->arrayLayers == 1); + assert(pCreateInfo->samples == 1); + assert(pCreateInfo->imageType != VK_IMAGE_TYPE_3D); + assert(!(pCreateInfo->usage & ~(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT))); + D3D12_RESOURCE_DESC tmp_desc = { + .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = ALIGN(image->vk.extent.width, util_format_get_blockwidth(pfmt)), + .Height = (UINT)ALIGN(image->vk.extent.height, util_format_get_blockheight(pfmt)), + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = + dzn_image_get_dxgi_format(pCreateInfo->format, pCreateInfo->usage, 0), + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + .Flags = D3D12_RESOURCE_FLAG_NONE + }; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; + uint64_t size = 0; + ID3D12Device1_GetCopyableFootprints(device->dev, &tmp_desc, 0, 1, 0, &footprint, NULL, NULL, &size); + + image->linear.row_stride = footprint.Footprint.RowPitch; + image->linear.size = size; + size *= pCreateInfo->arrayLayers; + image->desc.Format = DXGI_FORMAT_UNKNOWN; + image->desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + image->desc.Width = size; + image->desc.Height = 1; + image->desc.DepthOrArraySize = 1; + image->desc.MipLevels = 1; + image->desc.SampleDesc.Count = 1; + image->desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + } else { + image->desc.Format = + dzn_image_get_dxgi_format(pCreateInfo->format, + pCreateInfo->usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + 0), + image->desc.Dimension = (D3D12_RESOURCE_DIMENSION)(D3D12_RESOURCE_DIMENSION_TEXTURE1D + pCreateInfo->imageType); + image->desc.Width = image->vk.extent.width; + image->desc.Height = image->vk.extent.height; + image->desc.DepthOrArraySize = pCreateInfo->imageType == VK_IMAGE_TYPE_3D ? + image->vk.extent.depth : + pCreateInfo->arrayLayers; + image->desc.MipLevels = pCreateInfo->mipLevels; + image->desc.SampleDesc.Count = pCreateInfo->samples; + image->desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + } + + if (image->desc.SampleDesc.Count > 1) + image->desc.Alignment = D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT; + else + image->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + + image->desc.SampleDesc.Quality = 0; + + image->desc.Flags = D3D12_RESOURCE_FLAG_NONE; + + if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + if (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + + if (!(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) + image->desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + + /* Images with TRANSFER_DST can be cleared or passed as a blit/resolve + * destination. Both operations require the RT or DS cap flags. + */ + if ((image->vk.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) && + image->vk.tiling == VK_IMAGE_TILING_OPTIMAL) { + + D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = + dzn_physical_device_get_format_support(pdev, pCreateInfo->format); + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) { + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } else if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) { + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + } else if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) { + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + } + + if (image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + *out = dzn_image_to_handle(image); + return VK_SUCCESS; +} + +DXGI_FORMAT +dzn_image_get_dxgi_format(VkFormat format, + VkImageUsageFlags usage, + VkImageAspectFlags aspects) +{ + enum pipe_format pfmt = vk_format_to_pipe_format(format); + + if (!vk_format_is_depth_or_stencil(format)) + return dzn_pipe_to_dxgi_format(pfmt); + + switch (pfmt) { + case PIPE_FORMAT_Z16_UNORM: + return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? + DXGI_FORMAT_D16_UNORM : DXGI_FORMAT_R16_UNORM; + + case PIPE_FORMAT_Z32_FLOAT: + return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? + DXGI_FORMAT_D32_FLOAT : DXGI_FORMAT_R32_FLOAT; + + case PIPE_FORMAT_Z24X8_UNORM: + return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? + DXGI_FORMAT_D24_UNORM_S8_UINT : DXGI_FORMAT_R24_UNORM_X8_TYPELESS; + + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + return DXGI_FORMAT_D24_UNORM_S8_UINT; + + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + return DXGI_FORMAT_R24_UNORM_X8_TYPELESS; + else + return DXGI_FORMAT_X24_TYPELESS_G8_UINT; + + case PIPE_FORMAT_X24S8_UINT: + return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? + DXGI_FORMAT_D24_UNORM_S8_UINT : DXGI_FORMAT_X24_TYPELESS_G8_UINT; + + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + if (usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; + + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + return DXGI_FORMAT_X32_TYPELESS_G8X24_UINT; + else if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + else + return DXGI_FORMAT_R32G8X24_TYPELESS; + + default: + return dzn_pipe_to_dxgi_format(pfmt); + } +} + +DXGI_FORMAT +dzn_image_get_placed_footprint_format(VkFormat format, + VkImageAspectFlags aspect) +{ + DXGI_FORMAT out = + dzn_image_get_dxgi_format(format, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + aspect); + + switch (out) { + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + return DXGI_FORMAT_R32_TYPELESS; + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + return DXGI_FORMAT_R8_TYPELESS; + default: + return out; + } +} + +VkFormat +dzn_image_get_plane_format(VkFormat format, + VkImageAspectFlags aspectMask) +{ + if (aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) + return vk_format_stencil_only(format); + else if (aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) + return vk_format_depth_only(format); + else + return format; +} + +uint32_t +dzn_image_layers_get_subresource_index(const struct dzn_image *image, + const VkImageSubresourceLayers *subres, + VkImageAspectFlagBits aspect, + uint32_t layer) +{ + int planeSlice = + aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; + + return subres->mipLevel + + ((subres->baseArrayLayer + layer) * image->desc.MipLevels) + + (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize); +} + +uint32_t +dzn_image_range_get_subresource_index(const struct dzn_image *image, + const VkImageSubresourceRange *subres, + VkImageAspectFlagBits aspect, + uint32_t level, uint32_t layer) +{ + int planeSlice = + aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; + + return subres->baseMipLevel + level + + ((subres->baseArrayLayer + layer) * image->desc.MipLevels) + + (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize); +} + +static uint32_t +dzn_image_get_subresource_index(const struct dzn_image *image, + const VkImageSubresource *subres, + VkImageAspectFlagBits aspect) +{ + int planeSlice = + aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; + + return subres->mipLevel + + (subres->arrayLayer * image->desc.MipLevels) + + (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize); +} + +D3D12_TEXTURE_COPY_LOCATION +dzn_image_get_copy_loc(const struct dzn_image *image, + const VkImageSubresourceLayers *subres, + VkImageAspectFlagBits aspect, + uint32_t layer) +{ + D3D12_TEXTURE_COPY_LOCATION loc = { + .pResource = image->res, + }; + + assert((subres->aspectMask & aspect) != 0); + VkFormat format = dzn_image_get_plane_format(image->vk.format, aspect); + + if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { + VkImageUsageFlags usage = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + assert((subres->baseArrayLayer + layer) == 0); + assert(subres->mipLevel == 0); + loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + loc.PlacedFootprint.Offset = 0; + loc.PlacedFootprint.Footprint.Format = + dzn_image_get_placed_footprint_format(image->vk.format, aspect); + loc.PlacedFootprint.Footprint.Width = image->vk.extent.width; + loc.PlacedFootprint.Footprint.Height = image->vk.extent.height; + loc.PlacedFootprint.Footprint.Depth = image->vk.extent.depth; + loc.PlacedFootprint.Footprint.RowPitch = image->linear.row_stride; + } else { + loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + loc.SubresourceIndex = dzn_image_layers_get_subresource_index(image, subres, aspect, layer); + } + + return loc; +} + +D3D12_DEPTH_STENCIL_VIEW_DESC +dzn_image_get_dsv_desc(const struct dzn_image *image, + const VkImageSubresourceRange *range, + uint32_t level) +{ + uint32_t layer_count = dzn_get_layer_count(image, range); + D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = { + .Format = + dzn_image_get_dxgi_format(image->vk.format, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + range->aspectMask), + }; + + switch (image->vk.image_type) { + case VK_IMAGE_TYPE_1D: + dsv_desc.ViewDimension = + image->vk.array_layers > 1 ? + D3D12_DSV_DIMENSION_TEXTURE1DARRAY : + D3D12_DSV_DIMENSION_TEXTURE1D; + break; + case VK_IMAGE_TYPE_2D: + if (image->vk.array_layers > 1) { + dsv_desc.ViewDimension = + image->vk.samples > 1 ? + D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY : + D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + } else { + dsv_desc.ViewDimension = + image->vk.samples > 1 ? + D3D12_DSV_DIMENSION_TEXTURE2DMS : + D3D12_DSV_DIMENSION_TEXTURE2D; + } + break; + default: + unreachable("Invalid image type"); + } + + switch (dsv_desc.ViewDimension) { + case D3D12_DSV_DIMENSION_TEXTURE1D: + dsv_desc.Texture1D.MipSlice = range->baseMipLevel + level; + break; + case D3D12_DSV_DIMENSION_TEXTURE1DARRAY: + dsv_desc.Texture1DArray.MipSlice = range->baseMipLevel + level; + dsv_desc.Texture1DArray.FirstArraySlice = range->baseArrayLayer; + dsv_desc.Texture1DArray.ArraySize = layer_count; + break; + case D3D12_DSV_DIMENSION_TEXTURE2D: + dsv_desc.Texture2D.MipSlice = range->baseMipLevel + level; + break; + case D3D12_DSV_DIMENSION_TEXTURE2DMS: + break; + case D3D12_DSV_DIMENSION_TEXTURE2DARRAY: + dsv_desc.Texture2DArray.MipSlice = range->baseMipLevel + level; + dsv_desc.Texture2DArray.FirstArraySlice = range->baseArrayLayer; + dsv_desc.Texture2DArray.ArraySize = layer_count; + break; + } + + return dsv_desc; +} + +D3D12_RENDER_TARGET_VIEW_DESC +dzn_image_get_rtv_desc(const struct dzn_image *image, + const VkImageSubresourceRange *range, + uint32_t level) +{ + uint32_t layer_count = dzn_get_layer_count(image, range); + D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = { + .Format = + dzn_image_get_dxgi_format(image->vk.format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT), + }; + + switch (image->vk.image_type) { + case VK_IMAGE_TYPE_1D: + rtv_desc.ViewDimension = + image->vk.array_layers > 1 ? + D3D12_RTV_DIMENSION_TEXTURE1DARRAY : D3D12_RTV_DIMENSION_TEXTURE1D; + break; + case VK_IMAGE_TYPE_2D: + if (image->vk.array_layers > 1) { + rtv_desc.ViewDimension = + image->vk.samples > 1 ? + D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY : + D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + } else { + rtv_desc.ViewDimension = + image->vk.samples > 1 ? + D3D12_RTV_DIMENSION_TEXTURE2DMS : + D3D12_RTV_DIMENSION_TEXTURE2D; + } + break; + case VK_IMAGE_TYPE_3D: + rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; + break; + default: unreachable("Invalid image type\n"); + } + + switch (rtv_desc.ViewDimension) { + case D3D12_RTV_DIMENSION_TEXTURE1D: + rtv_desc.Texture1D.MipSlice = range->baseMipLevel + level; + break; + case D3D12_RTV_DIMENSION_TEXTURE1DARRAY: + rtv_desc.Texture1DArray.MipSlice = range->baseMipLevel + level; + rtv_desc.Texture1DArray.FirstArraySlice = range->baseArrayLayer; + rtv_desc.Texture1DArray.ArraySize = layer_count; + break; + case D3D12_RTV_DIMENSION_TEXTURE2D: + rtv_desc.Texture2D.MipSlice = range->baseMipLevel + level; + if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_1_BIT) + rtv_desc.Texture2D.PlaneSlice = 1; + else if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_2_BIT) + rtv_desc.Texture2D.PlaneSlice = 2; + else + rtv_desc.Texture2D.PlaneSlice = 0; + break; + case D3D12_RTV_DIMENSION_TEXTURE2DMS: + break; + case D3D12_RTV_DIMENSION_TEXTURE2DARRAY: + rtv_desc.Texture2DArray.MipSlice = range->baseMipLevel + level; + rtv_desc.Texture2DArray.FirstArraySlice = range->baseArrayLayer; + rtv_desc.Texture2DArray.ArraySize = layer_count; + if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_1_BIT) + rtv_desc.Texture2DArray.PlaneSlice = 1; + else if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_2_BIT) + rtv_desc.Texture2DArray.PlaneSlice = 2; + else + rtv_desc.Texture2DArray.PlaneSlice = 0; + break; + case D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY: + rtv_desc.Texture2DMSArray.FirstArraySlice = range->baseArrayLayer; + rtv_desc.Texture2DMSArray.ArraySize = layer_count; + break; + case D3D12_RTV_DIMENSION_TEXTURE3D: + rtv_desc.Texture3D.MipSlice = range->baseMipLevel + level; + rtv_desc.Texture3D.FirstWSlice = range->baseArrayLayer; + rtv_desc.Texture3D.WSize = + range->layerCount == VK_REMAINING_ARRAY_LAYERS ? -1 : layer_count; + break; + } + + return rtv_desc; +} + +D3D12_RESOURCE_STATES +dzn_image_layout_to_state(VkImageLayout layout, VkImageAspectFlagBits aspect) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_PREINITIALIZED: + case VK_IMAGE_LAYOUT_UNDEFINED: + case VK_IMAGE_LAYOUT_GENERAL: + /* YOLO! */ + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + return D3D12_RESOURCE_STATE_COMMON; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + return D3D12_RESOURCE_STATE_COPY_DEST; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + return D3D12_RESOURCE_STATE_COPY_SOURCE; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + return D3D12_RESOURCE_STATE_RENDER_TARGET; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL: + return D3D12_RESOURCE_STATE_DEPTH_WRITE; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: + return D3D12_RESOURCE_STATE_DEPTH_READ; + + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + return aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? + D3D12_RESOURCE_STATE_DEPTH_WRITE : + D3D12_RESOURCE_STATE_DEPTH_READ; + + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + return aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? + D3D12_RESOURCE_STATE_DEPTH_READ : + D3D12_RESOURCE_STATE_DEPTH_WRITE; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE; + + default: + unreachable("not implemented"); + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + return dzn_image_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pImage); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyImage(VkDevice device, VkImage image, + const VkAllocationCallbacks *pAllocator) +{ + dzn_image_destroy(dzn_image_from_handle(image), pAllocator); +} + +static struct dzn_image * +dzn_swapchain_get_image(struct dzn_device *device, + VkSwapchainKHR swapchain, + uint32_t index) +{ + uint32_t n_images = index + 1; + STACK_ARRAY(VkImage, images, n_images); + struct dzn_image *image = NULL; + + VkResult result = wsi_common_get_images(swapchain, &n_images, images); + + if (result == VK_SUCCESS || result == VK_INCOMPLETE) + image = dzn_image_from_handle(images[index]); + + STACK_ARRAY_FINISH(images); + return image; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_BindImageMemory2(VkDevice dev, + uint32_t bindInfoCount, + const VkBindImageMemoryInfo *pBindInfos) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + + for (uint32_t i = 0; i < bindInfoCount; i++) { + const VkBindImageMemoryInfo *bind_info = &pBindInfos[i]; + VK_FROM_HANDLE(dzn_device_memory, mem, bind_info->memory); + VK_FROM_HANDLE(dzn_image, image, bind_info->image); + bool did_bind = false; + + vk_foreach_struct_const(s, bind_info->pNext) { + switch (s->sType) { + case VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR: { + const VkBindImageMemorySwapchainInfoKHR *swapchain_info = + (const VkBindImageMemorySwapchainInfoKHR *) s; + struct dzn_image *swapchain_image = + dzn_swapchain_get_image(device, + swapchain_info->swapchain, + swapchain_info->imageIndex); + assert(swapchain_image); + assert(image->vk.aspects == swapchain_image->vk.aspects); + assert(mem == NULL); + + /* TODO: something something binding the image memory */ + assert(false); + + did_bind = true; + break; + } + default: + dzn_debug_ignored_stype(s->sType); + break; + } + } + + if (!did_bind) { + image->mem = mem; + image->mem_offset = bind_info->memoryOffset; + if (FAILED(ID3D12Device1_CreatePlacedResource(device->dev, mem->heap, + bind_info->memoryOffset, + &image->desc, + mem->initial_state, + NULL, + &IID_ID3D12Resource, + &image->res))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + did_bind = true; + } + } + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetImageMemoryRequirements2(VkDevice _device, + const VkImageMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + VK_FROM_HANDLE(dzn_device, device, _device); + VK_FROM_HANDLE(dzn_image, image, pInfo->image); + struct dzn_physical_device *pdev = + container_of(device->vk.physical, struct dzn_physical_device, vk); + + vk_foreach_struct_const(ext, pInfo->pNext) { + dzn_debug_ignored_stype(ext->sType); + } + + vk_foreach_struct(ext, pMemoryRequirements->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { + VkMemoryDedicatedRequirements *requirements = + (VkMemoryDedicatedRequirements *)ext; + /* TODO: figure out dedicated allocations */ + requirements->prefersDedicatedAllocation = false; + requirements->requiresDedicatedAllocation = false; + break; + } + + default: + dzn_debug_ignored_stype(ext->sType); + break; + } + } + + D3D12_RESOURCE_ALLOCATION_INFO info; + ID3D12Device1_GetResourceAllocationInfo(device->dev, &info, 0, 1, &image->desc); + + pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) { + .size = info.SizeInBytes, + .alignment = info.Alignment, + .memoryTypeBits = + dzn_physical_device_get_mem_type_mask_for_resource(pdev, &image->desc), + }; + + /* + * MSAA images need memory to be aligned on + * D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT (4MB), but the memory + * allocation function doesn't know what the memory will be used for, + * and forcing all allocations to be 4MB-aligned has a cost, so let's + * force MSAA resources to be at least 4MB, such that the allocation + * logic can consider sub-4MB allocations to not require this 4MB alignment. + */ + if (image->vk.samples > 1 && + pMemoryRequirements->memoryRequirements.size < D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT) + pMemoryRequirements->memoryRequirements.size = D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetImageSubresourceLayout(VkDevice _device, + VkImage _image, + const VkImageSubresource *subresource, + VkSubresourceLayout *layout) +{ + VK_FROM_HANDLE(dzn_device, device, _device); + VK_FROM_HANDLE(dzn_image, image, _image); + + if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { + assert(subresource->arrayLayer == 0); + assert(subresource->mipLevel == 0); + layout->offset = 0; + layout->rowPitch = image->linear.row_stride; + layout->depthPitch = 0; + layout->arrayPitch = 0; + layout->size = image->linear.size; + } else { + UINT subres_index = + dzn_image_get_subresource_index(image, subresource, + (VkImageAspectFlagBits)subresource->aspectMask); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; + UINT num_rows; + UINT64 row_size, total_size; + ID3D12Device1_GetCopyableFootprints(device->dev, &image->desc, + subres_index, 1, + 0, // base-offset? + &footprint, + &num_rows, &row_size, + &total_size); + + layout->offset = footprint.Offset; + layout->rowPitch = footprint.Footprint.RowPitch; + layout->depthPitch = layout->rowPitch * footprint.Footprint.Height; + layout->arrayPitch = layout->depthPitch; // uuuh... why is this even here? + layout->size = total_size; + } +} + +static D3D12_SHADER_COMPONENT_MAPPING +translate_swizzle(VkComponentSwizzle in, uint32_t comp) +{ + switch (in) { + case VK_COMPONENT_SWIZZLE_IDENTITY: + return (D3D12_SHADER_COMPONENT_MAPPING) + (comp + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); + case VK_COMPONENT_SWIZZLE_ZERO: + return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0; + case VK_COMPONENT_SWIZZLE_ONE: + return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1; + case VK_COMPONENT_SWIZZLE_R: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0; + case VK_COMPONENT_SWIZZLE_G: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1; + case VK_COMPONENT_SWIZZLE_B: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2; + case VK_COMPONENT_SWIZZLE_A: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3; + default: unreachable("Invalid swizzle"); + } +} + +static void +dzn_image_view_prepare_srv_desc(struct dzn_image_view *iview) +{ + uint32_t plane_slice = (iview->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0; + bool ms = iview->vk.image->samples > 1; + uint32_t layers_per_elem = + (iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE || + iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) ? + 6 : 1; + bool use_array = (iview->vk.base_array_layer / layers_per_elem) > 0 || + (iview->vk.layer_count / layers_per_elem) > 1; + + iview->srv_desc = (D3D12_SHADER_RESOURCE_VIEW_DESC) { + .Format = + dzn_image_get_dxgi_format(iview->vk.format, + iview->vk.image->usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + iview->vk.aspects), + }; + + D3D12_SHADER_COMPONENT_MAPPING swz[] = { + translate_swizzle(iview->vk.swizzle.r, 0), + translate_swizzle(iview->vk.swizzle.g, 1), + translate_swizzle(iview->vk.swizzle.b, 2), + translate_swizzle(iview->vk.swizzle.a, 3), + }; + + /* Swap components to fake B4G4R4A4 support. */ + if (iview->vk.format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { + static const D3D12_SHADER_COMPONENT_MAPPING bgra4_remap[] = { + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, + }; + + for (uint32_t i = 0; i < ARRAY_SIZE(swz); i++) + swz[i] = bgra4_remap[swz[i]]; + } + + iview->srv_desc.Shader4ComponentMapping = + D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(swz[0], swz[1], swz[2], swz[3]); + + switch (iview->vk.view_type) { + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + case VK_IMAGE_VIEW_TYPE_1D: + if (use_array) { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + iview->srv_desc.Texture1DArray.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.Texture1DArray.MipLevels = iview->vk.level_count; + iview->srv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->srv_desc.Texture1DArray.ArraySize = iview->vk.layer_count; + } else { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + iview->srv_desc.Texture1D.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.Texture1D.MipLevels = iview->vk.level_count; + } + break; + + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_2D: + if (use_array && ms) { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + iview->srv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer; + iview->srv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count; + } else if (use_array && !ms) { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + iview->srv_desc.Texture2DArray.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.Texture2DArray.MipLevels = iview->vk.level_count; + iview->srv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->srv_desc.Texture2DArray.ArraySize = iview->vk.layer_count; + iview->srv_desc.Texture2DArray.PlaneSlice = plane_slice; + } else if (!use_array && ms) { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; + } else { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + iview->srv_desc.Texture2D.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.Texture2D.MipLevels = iview->vk.level_count; + iview->srv_desc.Texture2D.PlaneSlice = plane_slice; + } + break; + + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE: + if (use_array) { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + iview->srv_desc.TextureCubeArray.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.TextureCubeArray.MipLevels = iview->vk.level_count; + iview->srv_desc.TextureCubeArray.First2DArrayFace = iview->vk.base_array_layer; + iview->srv_desc.TextureCubeArray.NumCubes = iview->vk.layer_count / 6; + } else { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + iview->srv_desc.TextureCube.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.TextureCube.MipLevels = iview->vk.level_count; + } + break; + + case VK_IMAGE_VIEW_TYPE_3D: + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + iview->srv_desc.Texture3D.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.Texture3D.MipLevels = iview->vk.level_count; + break; + + default: unreachable("Invalid view type"); + } +} + +static void +dzn_image_view_prepare_uav_desc(struct dzn_image_view *iview) +{ + bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1; + + assert(iview->vk.image->samples == 1); + + iview->uav_desc = (D3D12_UNORDERED_ACCESS_VIEW_DESC) { + .Format = + dzn_image_get_dxgi_format(iview->vk.format, + VK_IMAGE_USAGE_STORAGE_BIT, + iview->vk.aspects), + }; + + switch (iview->vk.view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + if (use_array) { + iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + iview->uav_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level; + iview->uav_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->uav_desc.Texture1DArray.ArraySize = iview->vk.layer_count; + } else { + iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; + iview->uav_desc.Texture1D.MipSlice = iview->vk.base_mip_level; + } + break; + + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + if (use_array) { + iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + iview->uav_desc.Texture2DArray.PlaneSlice = 0; + iview->uav_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level; + iview->uav_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->uav_desc.Texture2DArray.ArraySize = iview->vk.layer_count; + } else { + iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + iview->uav_desc.Texture2D.MipSlice = iview->vk.base_mip_level; + iview->uav_desc.Texture2D.PlaneSlice = 0; + } + break; + case VK_IMAGE_VIEW_TYPE_3D: + iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + iview->uav_desc.Texture3D.MipSlice = iview->vk.base_mip_level; + iview->uav_desc.Texture3D.FirstWSlice = 0; + iview->uav_desc.Texture3D.WSize = iview->vk.extent.depth; + break; + default: unreachable("Invalid type"); + } +} + +static void +dzn_image_view_prepare_rtv_desc(struct dzn_image_view *iview) +{ + bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1; + bool from_3d_image = iview->vk.image->image_type == VK_IMAGE_TYPE_3D; + bool ms = iview->vk.image->samples > 1; + uint32_t plane_slice = + (iview->vk.aspects & VK_IMAGE_ASPECT_PLANE_2_BIT) ? 2 : + (iview->vk.aspects & VK_IMAGE_ASPECT_PLANE_1_BIT) ? 1 : 0; + + assert(iview->vk.level_count == 1); + + iview->rtv_desc = (D3D12_RENDER_TARGET_VIEW_DESC) { + .Format = + dzn_image_get_dxgi_format(iview->vk.format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + iview->vk.aspects), + }; + + switch (iview->vk.view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + if (use_array) { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + iview->rtv_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level; + iview->rtv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->rtv_desc.Texture1DArray.ArraySize = iview->vk.layer_count; + } else { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; + iview->rtv_desc.Texture1D.MipSlice = iview->vk.base_mip_level; + } + break; + + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + if (from_3d_image) { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; + iview->rtv_desc.Texture3D.MipSlice = iview->vk.base_mip_level; + iview->rtv_desc.Texture3D.FirstWSlice = iview->vk.base_array_layer; + iview->rtv_desc.Texture3D.WSize = iview->vk.layer_count; + } else if (use_array && ms) { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + iview->rtv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer; + iview->rtv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count; + } else if (use_array && !ms) { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + iview->rtv_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level; + iview->rtv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->rtv_desc.Texture2DArray.ArraySize = iview->vk.layer_count; + iview->rtv_desc.Texture2DArray.PlaneSlice = plane_slice; + } else if (!use_array && ms) { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS; + } else { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + iview->rtv_desc.Texture2D.MipSlice = iview->vk.base_mip_level; + iview->rtv_desc.Texture2D.PlaneSlice = plane_slice; + } + break; + + case VK_IMAGE_VIEW_TYPE_3D: + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; + iview->rtv_desc.Texture3D.MipSlice = iview->vk.base_mip_level; + iview->rtv_desc.Texture3D.FirstWSlice = 0; + iview->rtv_desc.Texture3D.WSize = iview->vk.extent.depth; + break; + + default: unreachable("Invalid view type"); + } +} + +static void +dzn_image_view_prepare_dsv_desc(struct dzn_image_view *iview) +{ + bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1; + bool ms = iview->vk.image->samples > 1; + + iview->dsv_desc = (D3D12_DEPTH_STENCIL_VIEW_DESC) { + .Format = + dzn_image_get_dxgi_format(iview->vk.format, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + iview->vk.aspects), + }; + + switch (iview->vk.view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + if (use_array) { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + iview->dsv_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level; + iview->dsv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->dsv_desc.Texture1DArray.ArraySize = iview->vk.layer_count; + } else { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D; + iview->dsv_desc.Texture1D.MipSlice = iview->vk.base_mip_level; + } + break; + + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + if (use_array && ms) { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + iview->dsv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer; + iview->dsv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count; + } else if (use_array && !ms) { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + iview->dsv_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level; + iview->dsv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->dsv_desc.Texture2DArray.ArraySize = iview->vk.layer_count; + } else if (!use_array && ms) { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMS; + } else { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + iview->dsv_desc.Texture2D.MipSlice = iview->vk.base_mip_level; + } + break; + + default: unreachable("Invalid view type"); + } +} + +void +dzn_image_view_finish(struct dzn_image_view *iview) +{ + vk_image_view_finish(&iview->vk); +} + +void +dzn_image_view_init(struct dzn_device *device, + struct dzn_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo) +{ + VK_FROM_HANDLE(dzn_image, image, pCreateInfo->image); + + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + uint32_t level_count = dzn_get_level_count(image, range); + uint32_t layer_count = dzn_get_layer_count(image, range); + uint32_t plane_slice = + pCreateInfo->subresourceRange.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; + + vk_image_view_init(&device->vk, &iview->vk, pCreateInfo); + + assert(layer_count > 0); + assert(range->baseMipLevel < image->vk.mip_levels); + + /* View usage should be a subset of image usage */ + assert(iview->vk.usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); + + switch (image->vk.image_type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + assert(range->baseArrayLayer + dzn_get_layer_count(image, range) - 1 <= image->vk.array_layers); + break; + case VK_IMAGE_TYPE_3D: + assert(range->baseArrayLayer + dzn_get_layer_count(image, range) - 1 + <= u_minify(image->vk.extent.depth, range->baseMipLevel)); + break; + } + + dzn_image_view_prepare_srv_desc(iview); + + if (iview->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) + dzn_image_view_prepare_uav_desc(iview); + + if (iview->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + dzn_image_view_prepare_rtv_desc(iview); + + if (iview->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + dzn_image_view_prepare_dsv_desc(iview); +} + +static void +dzn_image_view_destroy(struct dzn_image_view *iview, + const VkAllocationCallbacks *pAllocator) +{ + if (!iview) + return; + + struct dzn_device *device = container_of(iview->vk.base.device, struct dzn_device, vk); + + vk_image_view_finish(&iview->vk); + vk_free2(&device->vk.alloc, pAllocator, iview); +} + +static VkResult +dzn_image_view_create(struct dzn_device *device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *out) +{ + struct dzn_image_view *iview = (struct dzn_image_view *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*iview), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!iview) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + dzn_image_view_init(device, iview, pCreateInfo); + + *out = dzn_image_view_to_handle(iview); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateImageView(VkDevice device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + return dzn_image_view_create(dzn_device_from_handle(device), pCreateInfo, + pAllocator, pView); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyImageView(VkDevice device, + VkImageView imageView, + const VkAllocationCallbacks *pAllocator) +{ + dzn_image_view_destroy(dzn_image_view_from_handle(imageView), pAllocator); +} + +static void +dzn_buffer_view_destroy(struct dzn_buffer_view *bview, + const VkAllocationCallbacks *pAllocator) +{ + if (!bview) + return; + + struct dzn_device *device = container_of(bview->base.device, struct dzn_device, vk); + + vk_object_base_finish(&bview->base); + vk_free2(&device->vk.alloc, pAllocator, bview); +} + +static VkResult +dzn_buffer_view_create(struct dzn_device *device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *out) +{ + VK_FROM_HANDLE(dzn_buffer, buf, pCreateInfo->buffer); + + struct dzn_buffer_view *bview = (struct dzn_buffer_view *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*bview), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!bview) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &bview->base, VK_OBJECT_TYPE_BUFFER_VIEW); + + enum pipe_format pfmt = vk_format_to_pipe_format(pCreateInfo->format); + unsigned blksz = util_format_get_blocksize(pfmt); + VkDeviceSize size = + pCreateInfo->range == VK_WHOLE_SIZE ? + buf->size - pCreateInfo->offset : pCreateInfo->range; + + bview->buffer = buf; + if (buf->usage & + (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) { + bview->srv_desc = (D3D12_SHADER_RESOURCE_VIEW_DESC) { + .Format = dzn_buffer_get_dxgi_format(pCreateInfo->format), + .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, + .Shader4ComponentMapping = + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .Buffer = { + .FirstElement = pCreateInfo->offset / blksz, + .NumElements = (UINT)(size / blksz), + .Flags = D3D12_BUFFER_SRV_FLAG_NONE, + }, + }; + } + + if (buf->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { + bview->uav_desc = (D3D12_UNORDERED_ACCESS_VIEW_DESC) { + .Format = dzn_buffer_get_dxgi_format(pCreateInfo->format), + .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, + .Buffer = { + .FirstElement = pCreateInfo->offset / blksz, + .NumElements = (UINT)(size / blksz), + .Flags = D3D12_BUFFER_UAV_FLAG_NONE, + }, + }; + } + + *out = dzn_buffer_view_to_handle(bview); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateBufferView(VkDevice device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + return dzn_buffer_view_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pView); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyBufferView(VkDevice device, + VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + dzn_buffer_view_destroy(dzn_buffer_view_from_handle(bufferView), pAllocator); +} diff --git a/src/microsoft/vulkan/dzn_image.cpp b/src/microsoft/vulkan/dzn_image.cpp deleted file mode 100644 index 48f923ca579..00000000000 --- a/src/microsoft/vulkan/dzn_image.cpp +++ /dev/null @@ -1,1240 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" - -#include "vk_alloc.h" -#include "vk_debug_report.h" -#include "vk_format.h" -#include "vk_util.h" - -static void -dzn_image_destroy(struct dzn_image *image, - const VkAllocationCallbacks *pAllocator) -{ - if (!image) - return; - - struct dzn_device *device = container_of(image->vk.base.device, struct dzn_device, vk); - - if (image->res) - ID3D12Resource_Release(image->res); - - vk_image_finish(&image->vk); - vk_free2(&device->vk.alloc, pAllocator, image); -} - -static VkResult -dzn_image_create(struct dzn_device *device, - const VkImageCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImage *out) -{ - struct dzn_image *image = (struct dzn_image *) - vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*image), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - struct dzn_physical_device *pdev = - container_of(device->vk.physical, struct dzn_physical_device, vk); - - if (!image) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - const VkExternalMemoryImageCreateInfo *create_info = - (const VkExternalMemoryImageCreateInfo *) - vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO); - -#if 0 - VkExternalMemoryHandleTypeFlags supported = - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT | - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT | - VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT | - VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT | - VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | - VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT; - - if (create_info && (create_info->handleTypes & supported)) - return dzn_image_from_external(device, pCreateInfo, create_info, - pAllocator, pImage); -#endif - -#if 0 - const VkImageSwapchainCreateInfoKHR *swapchain_info = (const VkImageSwapchainCreateInfoKHR *) - vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR); - if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) - return dzn_image_from_swapchain(device, pCreateInfo, swapchain_info, - pAllocator, pImage); -#endif - - vk_image_init(&device->vk, &image->vk, pCreateInfo); - enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); - - if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) { - /* Treat linear images as buffers: they should only be used as copy - * src/dest, and CopyTextureResource() can manipulate buffers. - * We only support linear tiling on things strictly required by the spec: - * "Images created with tiling equal to VK_IMAGE_TILING_LINEAR have - * further restrictions on their limits and capabilities compared to - * images created with tiling equal to VK_IMAGE_TILING_OPTIMAL. Creation - * of images with tiling VK_IMAGE_TILING_LINEAR may not be supported - * unless other parameters meet all of the constraints: - * - imageType is VK_IMAGE_TYPE_2D - * - format is not a depth/stencil format - * - mipLevels is 1 - * - arrayLayers is 1 - * - samples is VK_SAMPLE_COUNT_1_BIT - * - usage only includes VK_IMAGE_USAGE_TRANSFER_SRC_BIT and/or VK_IMAGE_USAGE_TRANSFER_DST_BIT - * " - */ - assert(!vk_format_is_depth_or_stencil(pCreateInfo->format)); - assert(pCreateInfo->mipLevels == 1); - assert(pCreateInfo->arrayLayers == 1); - assert(pCreateInfo->samples == 1); - assert(pCreateInfo->imageType != VK_IMAGE_TYPE_3D); - assert(!(pCreateInfo->usage & ~(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT))); - D3D12_RESOURCE_DESC tmp_desc = { - .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, - .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, - .Width = ALIGN(image->vk.extent.width, util_format_get_blockwidth(pfmt)), - .Height = (UINT)ALIGN(image->vk.extent.height, util_format_get_blockheight(pfmt)), - .DepthOrArraySize = 1, - .MipLevels = 1, - .Format = - dzn_image_get_dxgi_format(pCreateInfo->format, pCreateInfo->usage, 0), - .SampleDesc = { .Count = 1, .Quality = 0 }, - .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, - .Flags = D3D12_RESOURCE_FLAG_NONE - }; - D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; - uint64_t size = 0; - ID3D12Device1_GetCopyableFootprints(device->dev, &tmp_desc, 0, 1, 0, &footprint, NULL, NULL, &size); - - image->linear.row_stride = footprint.Footprint.RowPitch; - image->linear.size = size; - size *= pCreateInfo->arrayLayers; - image->desc.Format = DXGI_FORMAT_UNKNOWN; - image->desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - image->desc.Width = size; - image->desc.Height = 1; - image->desc.DepthOrArraySize = 1; - image->desc.MipLevels = 1; - image->desc.SampleDesc.Count = 1; - image->desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - } else { - image->desc.Format = - dzn_image_get_dxgi_format(pCreateInfo->format, - pCreateInfo->usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, - 0), - image->desc.Dimension = (D3D12_RESOURCE_DIMENSION)(D3D12_RESOURCE_DIMENSION_TEXTURE1D + pCreateInfo->imageType); - image->desc.Width = image->vk.extent.width; - image->desc.Height = image->vk.extent.height; - image->desc.DepthOrArraySize = pCreateInfo->imageType == VK_IMAGE_TYPE_3D ? - image->vk.extent.depth : - pCreateInfo->arrayLayers; - image->desc.MipLevels = pCreateInfo->mipLevels; - image->desc.SampleDesc.Count = pCreateInfo->samples; - image->desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; - } - - if (image->desc.SampleDesc.Count > 1) - image->desc.Alignment = D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT; - else - image->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - - image->desc.SampleDesc.Quality = 0; - - image->desc.Flags = D3D12_RESOURCE_FLAG_NONE; - - if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) - image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - - if (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; - - if (!(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) - image->desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; - } - - /* Images with TRANSFER_DST can be cleared or passed as a blit/resolve - * destination. Both operations require the RT or DS cap flags. - */ - if ((image->vk.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) && - image->vk.tiling == VK_IMAGE_TILING_OPTIMAL) { - - D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = - dzn_physical_device_get_format_support(pdev, pCreateInfo->format); - if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) { - image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - } else if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) { - image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; - } else if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) { - image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - } - } - - if (image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) - image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - - *out = dzn_image_to_handle(image); - return VK_SUCCESS; -} - -DXGI_FORMAT -dzn_image_get_dxgi_format(VkFormat format, - VkImageUsageFlags usage, - VkImageAspectFlags aspects) -{ - enum pipe_format pfmt = vk_format_to_pipe_format(format); - - if (!vk_format_is_depth_or_stencil(format)) - return dzn_pipe_to_dxgi_format(pfmt); - - switch (pfmt) { - case PIPE_FORMAT_Z16_UNORM: - return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? - DXGI_FORMAT_D16_UNORM : DXGI_FORMAT_R16_UNORM; - - case PIPE_FORMAT_Z32_FLOAT: - return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? - DXGI_FORMAT_D32_FLOAT : DXGI_FORMAT_R32_FLOAT; - - case PIPE_FORMAT_Z24X8_UNORM: - return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? - DXGI_FORMAT_D24_UNORM_S8_UINT : DXGI_FORMAT_R24_UNORM_X8_TYPELESS; - - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - if (usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) - return DXGI_FORMAT_D24_UNORM_S8_UINT; - - if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - return DXGI_FORMAT_R24_UNORM_X8_TYPELESS; - else - return DXGI_FORMAT_X24_TYPELESS_G8_UINT; - - case PIPE_FORMAT_X24S8_UINT: - return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? - DXGI_FORMAT_D24_UNORM_S8_UINT : DXGI_FORMAT_X24_TYPELESS_G8_UINT; - - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - if (usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) - return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; - - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) - return DXGI_FORMAT_X32_TYPELESS_G8X24_UINT; - else if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; - else - return DXGI_FORMAT_R32G8X24_TYPELESS; - - default: - return dzn_pipe_to_dxgi_format(pfmt); - } -} - -DXGI_FORMAT -dzn_image_get_placed_footprint_format(VkFormat format, - VkImageAspectFlags aspect) -{ - DXGI_FORMAT out = - dzn_image_get_dxgi_format(format, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT, - aspect); - - switch (out) { - case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: - case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: - return DXGI_FORMAT_R32_TYPELESS; - case DXGI_FORMAT_X24_TYPELESS_G8_UINT: - case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: - return DXGI_FORMAT_R8_TYPELESS; - default: - return out; - } -} - -VkFormat -dzn_image_get_plane_format(VkFormat format, - VkImageAspectFlags aspectMask) -{ - if (aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) - return vk_format_stencil_only(format); - else if (aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) - return vk_format_depth_only(format); - else - return format; -} - -uint32_t -dzn_image_layers_get_subresource_index(const struct dzn_image *image, - const VkImageSubresourceLayers *subres, - VkImageAspectFlagBits aspect, - uint32_t layer) -{ - int planeSlice = - aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; - - return subres->mipLevel + - ((subres->baseArrayLayer + layer) * image->desc.MipLevels) + - (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize); -} - -uint32_t -dzn_image_range_get_subresource_index(const struct dzn_image *image, - const VkImageSubresourceRange *subres, - VkImageAspectFlagBits aspect, - uint32_t level, uint32_t layer) -{ - int planeSlice = - aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; - - return subres->baseMipLevel + level + - ((subres->baseArrayLayer + layer) * image->desc.MipLevels) + - (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize); -} - -static uint32_t -dzn_image_get_subresource_index(const struct dzn_image *image, - const VkImageSubresource *subres, - VkImageAspectFlagBits aspect) -{ - int planeSlice = - aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; - - return subres->mipLevel + - (subres->arrayLayer * image->desc.MipLevels) + - (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize); -} - -D3D12_TEXTURE_COPY_LOCATION -dzn_image_get_copy_loc(const struct dzn_image *image, - const VkImageSubresourceLayers *subres, - VkImageAspectFlagBits aspect, - uint32_t layer) -{ - D3D12_TEXTURE_COPY_LOCATION loc = { - .pResource = image->res, - }; - - assert((subres->aspectMask & aspect) != 0); - VkFormat format = dzn_image_get_plane_format(image->vk.format, aspect); - - if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { - VkImageUsageFlags usage = - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; - - assert((subres->baseArrayLayer + layer) == 0); - assert(subres->mipLevel == 0); - loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - loc.PlacedFootprint.Offset = 0; - loc.PlacedFootprint.Footprint.Format = - dzn_image_get_placed_footprint_format(image->vk.format, aspect); - loc.PlacedFootprint.Footprint.Width = image->vk.extent.width; - loc.PlacedFootprint.Footprint.Height = image->vk.extent.height; - loc.PlacedFootprint.Footprint.Depth = image->vk.extent.depth; - loc.PlacedFootprint.Footprint.RowPitch = image->linear.row_stride; - } else { - loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - loc.SubresourceIndex = dzn_image_layers_get_subresource_index(image, subres, aspect, layer); - } - - return loc; -} - -D3D12_DEPTH_STENCIL_VIEW_DESC -dzn_image_get_dsv_desc(const struct dzn_image *image, - const VkImageSubresourceRange *range, - uint32_t level) -{ - uint32_t layer_count = dzn_get_layer_count(image, range); - D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = { - .Format = - dzn_image_get_dxgi_format(image->vk.format, - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, - range->aspectMask), - }; - - switch (image->vk.image_type) { - case VK_IMAGE_TYPE_1D: - dsv_desc.ViewDimension = - image->vk.array_layers > 1 ? - D3D12_DSV_DIMENSION_TEXTURE1DARRAY : - D3D12_DSV_DIMENSION_TEXTURE1D; - break; - case VK_IMAGE_TYPE_2D: - if (image->vk.array_layers > 1) { - dsv_desc.ViewDimension = - image->vk.samples > 1 ? - D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY : - D3D12_DSV_DIMENSION_TEXTURE2DARRAY; - } else { - dsv_desc.ViewDimension = - image->vk.samples > 1 ? - D3D12_DSV_DIMENSION_TEXTURE2DMS : - D3D12_DSV_DIMENSION_TEXTURE2D; - } - break; - default: - unreachable("Invalid image type"); - } - - switch (dsv_desc.ViewDimension) { - case D3D12_DSV_DIMENSION_TEXTURE1D: - dsv_desc.Texture1D.MipSlice = range->baseMipLevel + level; - break; - case D3D12_DSV_DIMENSION_TEXTURE1DARRAY: - dsv_desc.Texture1DArray.MipSlice = range->baseMipLevel + level; - dsv_desc.Texture1DArray.FirstArraySlice = range->baseArrayLayer; - dsv_desc.Texture1DArray.ArraySize = layer_count; - break; - case D3D12_DSV_DIMENSION_TEXTURE2D: - dsv_desc.Texture2D.MipSlice = range->baseMipLevel + level; - break; - case D3D12_DSV_DIMENSION_TEXTURE2DMS: - break; - case D3D12_DSV_DIMENSION_TEXTURE2DARRAY: - dsv_desc.Texture2DArray.MipSlice = range->baseMipLevel + level; - dsv_desc.Texture2DArray.FirstArraySlice = range->baseArrayLayer; - dsv_desc.Texture2DArray.ArraySize = layer_count; - break; - } - - return dsv_desc; -} - -D3D12_RENDER_TARGET_VIEW_DESC -dzn_image_get_rtv_desc(const struct dzn_image *image, - const VkImageSubresourceRange *range, - uint32_t level) -{ - uint32_t layer_count = dzn_get_layer_count(image, range); - D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = { - .Format = - dzn_image_get_dxgi_format(image->vk.format, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - VK_IMAGE_ASPECT_COLOR_BIT), - }; - - switch (image->vk.image_type) { - case VK_IMAGE_TYPE_1D: - rtv_desc.ViewDimension = - image->vk.array_layers > 1 ? - D3D12_RTV_DIMENSION_TEXTURE1DARRAY : D3D12_RTV_DIMENSION_TEXTURE1D; - break; - case VK_IMAGE_TYPE_2D: - if (image->vk.array_layers > 1) { - rtv_desc.ViewDimension = - image->vk.samples > 1 ? - D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY : - D3D12_RTV_DIMENSION_TEXTURE2DARRAY; - } else { - rtv_desc.ViewDimension = - image->vk.samples > 1 ? - D3D12_RTV_DIMENSION_TEXTURE2DMS : - D3D12_RTV_DIMENSION_TEXTURE2D; - } - break; - case VK_IMAGE_TYPE_3D: - rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; - break; - default: unreachable("Invalid image type\n"); - } - - switch (rtv_desc.ViewDimension) { - case D3D12_RTV_DIMENSION_TEXTURE1D: - rtv_desc.Texture1D.MipSlice = range->baseMipLevel + level; - break; - case D3D12_RTV_DIMENSION_TEXTURE1DARRAY: - rtv_desc.Texture1DArray.MipSlice = range->baseMipLevel + level; - rtv_desc.Texture1DArray.FirstArraySlice = range->baseArrayLayer; - rtv_desc.Texture1DArray.ArraySize = layer_count; - break; - case D3D12_RTV_DIMENSION_TEXTURE2D: - rtv_desc.Texture2D.MipSlice = range->baseMipLevel + level; - if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_1_BIT) - rtv_desc.Texture2D.PlaneSlice = 1; - else if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_2_BIT) - rtv_desc.Texture2D.PlaneSlice = 2; - else - rtv_desc.Texture2D.PlaneSlice = 0; - break; - case D3D12_RTV_DIMENSION_TEXTURE2DMS: - break; - case D3D12_RTV_DIMENSION_TEXTURE2DARRAY: - rtv_desc.Texture2DArray.MipSlice = range->baseMipLevel + level; - rtv_desc.Texture2DArray.FirstArraySlice = range->baseArrayLayer; - rtv_desc.Texture2DArray.ArraySize = layer_count; - if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_1_BIT) - rtv_desc.Texture2DArray.PlaneSlice = 1; - else if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_2_BIT) - rtv_desc.Texture2DArray.PlaneSlice = 2; - else - rtv_desc.Texture2DArray.PlaneSlice = 0; - break; - case D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY: - rtv_desc.Texture2DMSArray.FirstArraySlice = range->baseArrayLayer; - rtv_desc.Texture2DMSArray.ArraySize = layer_count; - break; - case D3D12_RTV_DIMENSION_TEXTURE3D: - rtv_desc.Texture3D.MipSlice = range->baseMipLevel + level; - rtv_desc.Texture3D.FirstWSlice = range->baseArrayLayer; - rtv_desc.Texture3D.WSize = - range->layerCount == VK_REMAINING_ARRAY_LAYERS ? -1 : layer_count; - break; - } - - return rtv_desc; -} - -D3D12_RESOURCE_STATES -dzn_image_layout_to_state(VkImageLayout layout, VkImageAspectFlagBits aspect) -{ - switch (layout) { - case VK_IMAGE_LAYOUT_PREINITIALIZED: - case VK_IMAGE_LAYOUT_UNDEFINED: - case VK_IMAGE_LAYOUT_GENERAL: - /* YOLO! */ - case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - return D3D12_RESOURCE_STATE_COMMON; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - return D3D12_RESOURCE_STATE_COPY_DEST; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - return D3D12_RESOURCE_STATE_COPY_SOURCE; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - return D3D12_RESOURCE_STATE_RENDER_TARGET; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL: - return D3D12_RESOURCE_STATE_DEPTH_WRITE; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: - case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: - return D3D12_RESOURCE_STATE_DEPTH_READ; - - case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: - return aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? - D3D12_RESOURCE_STATE_DEPTH_WRITE : - D3D12_RESOURCE_STATE_DEPTH_READ; - - case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: - return aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? - D3D12_RESOURCE_STATE_DEPTH_READ : - D3D12_RESOURCE_STATE_DEPTH_WRITE; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - return D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE; - - default: - unreachable("not implemented"); - } -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateImage(VkDevice device, - const VkImageCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImage *pImage) -{ - return dzn_image_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pImage); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyImage(VkDevice device, VkImage image, - const VkAllocationCallbacks *pAllocator) -{ - dzn_image_destroy(dzn_image_from_handle(image), pAllocator); -} - -static struct dzn_image * -dzn_swapchain_get_image(struct dzn_device *device, - VkSwapchainKHR swapchain, - uint32_t index) -{ - uint32_t n_images = index + 1; - STACK_ARRAY(VkImage, images, n_images); - struct dzn_image *image = NULL; - - VkResult result = wsi_common_get_images(swapchain, &n_images, images); - - if (result == VK_SUCCESS || result == VK_INCOMPLETE) - image = dzn_image_from_handle(images[index]); - - STACK_ARRAY_FINISH(images); - return image; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_BindImageMemory2(VkDevice dev, - uint32_t bindInfoCount, - const VkBindImageMemoryInfo *pBindInfos) -{ - VK_FROM_HANDLE(dzn_device, device, dev); - - for (uint32_t i = 0; i < bindInfoCount; i++) { - const VkBindImageMemoryInfo *bind_info = &pBindInfos[i]; - VK_FROM_HANDLE(dzn_device_memory, mem, bind_info->memory); - VK_FROM_HANDLE(dzn_image, image, bind_info->image); - bool did_bind = false; - - vk_foreach_struct_const(s, bind_info->pNext) { - switch (s->sType) { - case VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR: { - const VkBindImageMemorySwapchainInfoKHR *swapchain_info = - (const VkBindImageMemorySwapchainInfoKHR *) s; - struct dzn_image *swapchain_image = - dzn_swapchain_get_image(device, - swapchain_info->swapchain, - swapchain_info->imageIndex); - assert(swapchain_image); - assert(image->vk.aspects == swapchain_image->vk.aspects); - assert(mem == NULL); - - /* TODO: something something binding the image memory */ - assert(false); - - did_bind = true; - break; - } - default: - dzn_debug_ignored_stype(s->sType); - break; - } - } - - if (!did_bind) { - image->mem = mem; - image->mem_offset = bind_info->memoryOffset; - if (FAILED(ID3D12Device1_CreatePlacedResource(device->dev, mem->heap, - bind_info->memoryOffset, - &image->desc, - mem->initial_state, - NULL, - IID_ID3D12Resource, - (void **)&image->res))) - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - did_bind = true; - } - } - - return VK_SUCCESS; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetImageMemoryRequirements2(VkDevice _device, - const VkImageMemoryRequirementsInfo2 *pInfo, - VkMemoryRequirements2 *pMemoryRequirements) -{ - VK_FROM_HANDLE(dzn_device, device, _device); - VK_FROM_HANDLE(dzn_image, image, pInfo->image); - struct dzn_physical_device *pdev = - container_of(device->vk.physical, struct dzn_physical_device, vk); - - vk_foreach_struct_const(ext, pInfo->pNext) { - dzn_debug_ignored_stype(ext->sType); - } - - vk_foreach_struct(ext, pMemoryRequirements->pNext) { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { - VkMemoryDedicatedRequirements *requirements = - (VkMemoryDedicatedRequirements *)ext; - /* TODO: figure out dedicated allocations */ - requirements->prefersDedicatedAllocation = false; - requirements->requiresDedicatedAllocation = false; - break; - } - - default: - dzn_debug_ignored_stype(ext->sType); - break; - } - } - - D3D12_RESOURCE_ALLOCATION_INFO info; - ID3D12Device1_GetResourceAllocationInfo(device->dev, &info, 0, 1, &image->desc); - - pMemoryRequirements->memoryRequirements = VkMemoryRequirements { - .size = info.SizeInBytes, - .alignment = info.Alignment, - .memoryTypeBits = - dzn_physical_device_get_mem_type_mask_for_resource(pdev, &image->desc), - }; - - /* - * MSAA images need memory to be aligned on - * D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT (4MB), but the memory - * allocation function doesn't know what the memory will be used for, - * and forcing all allocations to be 4MB-aligned has a cost, so let's - * force MSAA resources to be at least 4MB, such that the allocation - * logic can consider sub-4MB allocations to not require this 4MB alignment. - */ - if (image->vk.samples > 1 && - pMemoryRequirements->memoryRequirements.size < D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT) - pMemoryRequirements->memoryRequirements.size = D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_GetImageSubresourceLayout(VkDevice _device, - VkImage _image, - const VkImageSubresource *subresource, - VkSubresourceLayout *layout) -{ - VK_FROM_HANDLE(dzn_device, device, _device); - VK_FROM_HANDLE(dzn_image, image, _image); - - if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { - assert(subresource->arrayLayer == 0); - assert(subresource->mipLevel == 0); - layout->offset = 0; - layout->rowPitch = image->linear.row_stride; - layout->depthPitch = 0; - layout->arrayPitch = 0; - layout->size = image->linear.size; - } else { - UINT subres_index = - dzn_image_get_subresource_index(image, subresource, - (VkImageAspectFlagBits)subresource->aspectMask); - D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; - UINT num_rows; - UINT64 row_size, total_size; - ID3D12Device1_GetCopyableFootprints(device->dev, &image->desc, - subres_index, 1, - 0, // base-offset? - &footprint, - &num_rows, &row_size, - &total_size); - - layout->offset = footprint.Offset; - layout->rowPitch = footprint.Footprint.RowPitch; - layout->depthPitch = layout->rowPitch * footprint.Footprint.Height; - layout->arrayPitch = layout->depthPitch; // uuuh... why is this even here? - layout->size = total_size; - } -} - -static D3D12_SHADER_COMPONENT_MAPPING -translate_swizzle(VkComponentSwizzle in, uint32_t comp) -{ - switch (in) { - case VK_COMPONENT_SWIZZLE_IDENTITY: - return (D3D12_SHADER_COMPONENT_MAPPING) - (comp + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); - case VK_COMPONENT_SWIZZLE_ZERO: - return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0; - case VK_COMPONENT_SWIZZLE_ONE: - return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1; - case VK_COMPONENT_SWIZZLE_R: - return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0; - case VK_COMPONENT_SWIZZLE_G: - return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1; - case VK_COMPONENT_SWIZZLE_B: - return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2; - case VK_COMPONENT_SWIZZLE_A: - return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3; - default: unreachable("Invalid swizzle"); - } -} - -static void -dzn_image_view_prepare_srv_desc(struct dzn_image_view *iview) -{ - uint32_t plane_slice = (iview->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0; - bool ms = iview->vk.image->samples > 1; - uint32_t layers_per_elem = - (iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE || - iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) ? - 6 : 1; - bool use_array = (iview->vk.base_array_layer / layers_per_elem) > 0 || - (iview->vk.layer_count / layers_per_elem) > 1; - - iview->srv_desc = D3D12_SHADER_RESOURCE_VIEW_DESC { - .Format = - dzn_image_get_dxgi_format(iview->vk.format, - iview->vk.image->usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, - iview->vk.aspects), - }; - - D3D12_SHADER_COMPONENT_MAPPING swz[] = { - translate_swizzle(iview->vk.swizzle.r, 0), - translate_swizzle(iview->vk.swizzle.g, 1), - translate_swizzle(iview->vk.swizzle.b, 2), - translate_swizzle(iview->vk.swizzle.a, 3), - }; - - /* Swap components to fake B4G4R4A4 support. */ - if (iview->vk.format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { - static const D3D12_SHADER_COMPONENT_MAPPING bgra4_remap[] = { - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, - D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, - }; - - for (uint32_t i = 0; i < ARRAY_SIZE(swz); i++) - swz[i] = bgra4_remap[swz[i]]; - } - - iview->srv_desc.Shader4ComponentMapping = - D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(swz[0], swz[1], swz[2], swz[3]); - - switch (iview->vk.view_type) { - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - case VK_IMAGE_VIEW_TYPE_1D: - if (use_array) { - iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; - iview->srv_desc.Texture1DArray.MostDetailedMip = iview->vk.base_mip_level; - iview->srv_desc.Texture1DArray.MipLevels = iview->vk.level_count; - iview->srv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; - iview->srv_desc.Texture1DArray.ArraySize = iview->vk.layer_count; - } else { - iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - iview->srv_desc.Texture1D.MostDetailedMip = iview->vk.base_mip_level; - iview->srv_desc.Texture1D.MipLevels = iview->vk.level_count; - } - break; - - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - case VK_IMAGE_VIEW_TYPE_2D: - if (use_array && ms) { - iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; - iview->srv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer; - iview->srv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count; - } else if (use_array && !ms) { - iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - iview->srv_desc.Texture2DArray.MostDetailedMip = iview->vk.base_mip_level; - iview->srv_desc.Texture2DArray.MipLevels = iview->vk.level_count; - iview->srv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; - iview->srv_desc.Texture2DArray.ArraySize = iview->vk.layer_count; - iview->srv_desc.Texture2DArray.PlaneSlice = plane_slice; - } else if (!use_array && ms) { - iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; - } else { - iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - iview->srv_desc.Texture2D.MostDetailedMip = iview->vk.base_mip_level; - iview->srv_desc.Texture2D.MipLevels = iview->vk.level_count; - iview->srv_desc.Texture2D.PlaneSlice = plane_slice; - } - break; - - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - case VK_IMAGE_VIEW_TYPE_CUBE: - if (use_array) { - iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; - iview->srv_desc.TextureCubeArray.MostDetailedMip = iview->vk.base_mip_level; - iview->srv_desc.TextureCubeArray.MipLevels = iview->vk.level_count; - iview->srv_desc.TextureCubeArray.First2DArrayFace = iview->vk.base_array_layer; - iview->srv_desc.TextureCubeArray.NumCubes = iview->vk.layer_count / 6; - } else { - iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; - iview->srv_desc.TextureCube.MostDetailedMip = iview->vk.base_mip_level; - iview->srv_desc.TextureCube.MipLevels = iview->vk.level_count; - } - break; - - case VK_IMAGE_VIEW_TYPE_3D: - iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; - iview->srv_desc.Texture3D.MostDetailedMip = iview->vk.base_mip_level; - iview->srv_desc.Texture3D.MipLevels = iview->vk.level_count; - break; - - default: unreachable("Invalid view type"); - } -} - -static void -dzn_image_view_prepare_uav_desc(struct dzn_image_view *iview) -{ - bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1; - - assert(iview->vk.image->samples == 1); - - iview->uav_desc = D3D12_UNORDERED_ACCESS_VIEW_DESC { - .Format = - dzn_image_get_dxgi_format(iview->vk.format, - VK_IMAGE_USAGE_STORAGE_BIT, - iview->vk.aspects), - }; - - switch (iview->vk.view_type) { - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - if (use_array) { - iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1DARRAY; - iview->uav_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level; - iview->uav_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; - iview->uav_desc.Texture1DArray.ArraySize = iview->vk.layer_count; - } else { - iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; - iview->uav_desc.Texture1D.MipSlice = iview->vk.base_mip_level; - } - break; - - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - if (use_array) { - iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; - iview->uav_desc.Texture2DArray.PlaneSlice = 0; - iview->uav_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level; - iview->uav_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; - iview->uav_desc.Texture2DArray.ArraySize = iview->vk.layer_count; - } else { - iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; - iview->uav_desc.Texture2D.MipSlice = iview->vk.base_mip_level; - iview->uav_desc.Texture2D.PlaneSlice = 0; - } - break; - case VK_IMAGE_VIEW_TYPE_3D: - iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; - iview->uav_desc.Texture3D.MipSlice = iview->vk.base_mip_level; - iview->uav_desc.Texture3D.FirstWSlice = 0; - iview->uav_desc.Texture3D.WSize = iview->vk.extent.depth; - break; - default: unreachable("Invalid type"); - } -} - -static void -dzn_image_view_prepare_rtv_desc(struct dzn_image_view *iview) -{ - bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1; - bool from_3d_image = iview->vk.image->image_type == VK_IMAGE_TYPE_3D; - bool ms = iview->vk.image->samples > 1; - uint32_t plane_slice = - (iview->vk.aspects & VK_IMAGE_ASPECT_PLANE_2_BIT) ? 2 : - (iview->vk.aspects & VK_IMAGE_ASPECT_PLANE_1_BIT) ? 1 : 0; - - assert(iview->vk.level_count == 1); - - iview->rtv_desc = D3D12_RENDER_TARGET_VIEW_DESC { - .Format = - dzn_image_get_dxgi_format(iview->vk.format, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - iview->vk.aspects), - }; - - switch (iview->vk.view_type) { - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - if (use_array) { - iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1DARRAY; - iview->rtv_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level; - iview->rtv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; - iview->rtv_desc.Texture1DArray.ArraySize = iview->vk.layer_count; - } else { - iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; - iview->rtv_desc.Texture1D.MipSlice = iview->vk.base_mip_level; - } - break; - - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - if (from_3d_image) { - iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; - iview->rtv_desc.Texture3D.MipSlice = iview->vk.base_mip_level; - iview->rtv_desc.Texture3D.FirstWSlice = iview->vk.base_array_layer; - iview->rtv_desc.Texture3D.WSize = iview->vk.layer_count; - } else if (use_array && ms) { - iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; - iview->rtv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer; - iview->rtv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count; - } else if (use_array && !ms) { - iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; - iview->rtv_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level; - iview->rtv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; - iview->rtv_desc.Texture2DArray.ArraySize = iview->vk.layer_count; - iview->rtv_desc.Texture2DArray.PlaneSlice = plane_slice; - } else if (!use_array && ms) { - iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS; - } else { - iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - iview->rtv_desc.Texture2D.MipSlice = iview->vk.base_mip_level; - iview->rtv_desc.Texture2D.PlaneSlice = plane_slice; - } - break; - - case VK_IMAGE_VIEW_TYPE_3D: - iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; - iview->rtv_desc.Texture3D.MipSlice = iview->vk.base_mip_level; - iview->rtv_desc.Texture3D.FirstWSlice = 0; - iview->rtv_desc.Texture3D.WSize = iview->vk.extent.depth; - break; - - default: unreachable("Invalid view type"); - } -} - -static void -dzn_image_view_prepare_dsv_desc(struct dzn_image_view *iview) -{ - bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1; - bool ms = iview->vk.image->samples > 1; - - iview->dsv_desc = D3D12_DEPTH_STENCIL_VIEW_DESC { - .Format = - dzn_image_get_dxgi_format(iview->vk.format, - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, - iview->vk.aspects), - }; - - switch (iview->vk.view_type) { - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - if (use_array) { - iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1DARRAY; - iview->dsv_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level; - iview->dsv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; - iview->dsv_desc.Texture1DArray.ArraySize = iview->vk.layer_count; - } else { - iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D; - iview->dsv_desc.Texture1D.MipSlice = iview->vk.base_mip_level; - } - break; - - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - if (use_array && ms) { - iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; - iview->dsv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer; - iview->dsv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count; - } else if (use_array && !ms) { - iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY; - iview->dsv_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level; - iview->dsv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; - iview->dsv_desc.Texture2DArray.ArraySize = iview->vk.layer_count; - } else if (!use_array && ms) { - iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMS; - } else { - iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; - iview->dsv_desc.Texture2D.MipSlice = iview->vk.base_mip_level; - } - break; - - default: unreachable("Invalid view type"); - } -} - -void -dzn_image_view_finish(struct dzn_image_view *iview) -{ - vk_image_view_finish(&iview->vk); -} - -void -dzn_image_view_init(struct dzn_device *device, - struct dzn_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo) -{ - VK_FROM_HANDLE(dzn_image, image, pCreateInfo->image); - - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - uint32_t level_count = dzn_get_level_count(image, range); - uint32_t layer_count = dzn_get_layer_count(image, range); - uint32_t plane_slice = - pCreateInfo->subresourceRange.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; - - vk_image_view_init(&device->vk, &iview->vk, pCreateInfo); - - assert(layer_count > 0); - assert(range->baseMipLevel < image->vk.mip_levels); - - /* View usage should be a subset of image usage */ - assert(iview->vk.usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); - - switch (image->vk.image_type) { - default: - unreachable("bad VkImageType"); - case VK_IMAGE_TYPE_1D: - case VK_IMAGE_TYPE_2D: - assert(range->baseArrayLayer + dzn_get_layer_count(image, range) - 1 <= image->vk.array_layers); - break; - case VK_IMAGE_TYPE_3D: - assert(range->baseArrayLayer + dzn_get_layer_count(image, range) - 1 - <= u_minify(image->vk.extent.depth, range->baseMipLevel)); - break; - } - - dzn_image_view_prepare_srv_desc(iview); - - if (iview->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) - dzn_image_view_prepare_uav_desc(iview); - - if (iview->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) - dzn_image_view_prepare_rtv_desc(iview); - - if (iview->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) - dzn_image_view_prepare_dsv_desc(iview); -} - -static void -dzn_image_view_destroy(struct dzn_image_view *iview, - const VkAllocationCallbacks *pAllocator) -{ - if (!iview) - return; - - struct dzn_device *device = container_of(iview->vk.base.device, struct dzn_device, vk); - - vk_image_view_finish(&iview->vk); - vk_free2(&device->vk.alloc, pAllocator, iview); -} - -static VkResult -dzn_image_view_create(struct dzn_device *device, - const VkImageViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImageView *out) -{ - struct dzn_image_view *iview = (struct dzn_image_view *) - vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*iview), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!iview) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - dzn_image_view_init(device, iview, pCreateInfo); - - *out = dzn_image_view_to_handle(iview); - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateImageView(VkDevice device, - const VkImageViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImageView *pView) -{ - return dzn_image_view_create(dzn_device_from_handle(device), pCreateInfo, - pAllocator, pView); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyImageView(VkDevice device, - VkImageView imageView, - const VkAllocationCallbacks *pAllocator) -{ - dzn_image_view_destroy(dzn_image_view_from_handle(imageView), pAllocator); -} - -static void -dzn_buffer_view_destroy(struct dzn_buffer_view *bview, - const VkAllocationCallbacks *pAllocator) -{ - if (!bview) - return; - - struct dzn_device *device = container_of(bview->base.device, struct dzn_device, vk); - - vk_object_base_finish(&bview->base); - vk_free2(&device->vk.alloc, pAllocator, bview); -} - -static VkResult -dzn_buffer_view_create(struct dzn_device *device, - const VkBufferViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkBufferView *out) -{ - VK_FROM_HANDLE(dzn_buffer, buf, pCreateInfo->buffer); - - struct dzn_buffer_view *bview = (struct dzn_buffer_view *) - vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*bview), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!bview) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &bview->base, VK_OBJECT_TYPE_BUFFER_VIEW); - - enum pipe_format pfmt = vk_format_to_pipe_format(pCreateInfo->format); - unsigned blksz = util_format_get_blocksize(pfmt); - VkDeviceSize size = - pCreateInfo->range == VK_WHOLE_SIZE ? - buf->size - pCreateInfo->offset : pCreateInfo->range; - - bview->buffer = buf; - if (buf->usage & - (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) { - bview->srv_desc = D3D12_SHADER_RESOURCE_VIEW_DESC { - .Format = dzn_buffer_get_dxgi_format(pCreateInfo->format), - .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, - .Shader4ComponentMapping = - D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, - .Buffer = { - .FirstElement = pCreateInfo->offset / blksz, - .NumElements = (UINT)(size / blksz), - .Flags = D3D12_BUFFER_SRV_FLAG_NONE, - }, - }; - } - - if (buf->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { - bview->uav_desc = D3D12_UNORDERED_ACCESS_VIEW_DESC { - .Format = dzn_buffer_get_dxgi_format(pCreateInfo->format), - .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, - .Buffer = { - .FirstElement = pCreateInfo->offset / blksz, - .NumElements = (UINT)(size / blksz), - .Flags = D3D12_BUFFER_UAV_FLAG_NONE, - }, - }; - } - - *out = dzn_buffer_view_to_handle(bview); - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateBufferView(VkDevice device, - const VkBufferViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkBufferView *pView) -{ - return dzn_buffer_view_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pView); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyBufferView(VkDevice device, - VkBufferView bufferView, - const VkAllocationCallbacks *pAllocator) -{ - dzn_buffer_view_destroy(dzn_buffer_view_from_handle(bufferView), pAllocator); -} diff --git a/src/microsoft/vulkan/dzn_meta.c b/src/microsoft/vulkan/dzn_meta.c new file mode 100644 index 00000000000..ce69cce8703 --- /dev/null +++ b/src/microsoft/vulkan/dzn_meta.c @@ -0,0 +1,736 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "spirv_to_dxil.h" +#include "nir_to_dxil.h" + +#include "dxil_nir.h" +#include "dxil_nir_lower_int_samplers.h" +#include "dxil_validator.h" + +static void +dzn_meta_compile_shader(struct dzn_device *device, nir_shader *nir, + D3D12_SHADER_BYTECODE *slot) +{ + struct dzn_instance *instance = + container_of(device->vk.physical->instance, struct dzn_instance, vk); + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + if ((instance->debug_flags & DZN_DEBUG_NIR) && + (instance->debug_flags & DZN_DEBUG_INTERNAL)) + nir_print_shader(nir, stderr); + + struct nir_to_dxil_options opts = { .environment = DXIL_ENVIRONMENT_VULKAN }; + struct blob dxil_blob; + bool ret = nir_to_dxil(nir, &opts, &dxil_blob); + assert(ret); + + char *err; + bool res = dxil_validate_module(instance->dxil_validator, + dxil_blob.data, + dxil_blob.size, &err); + + if ((instance->debug_flags & DZN_DEBUG_DXIL) && + (instance->debug_flags & DZN_DEBUG_INTERNAL)) { + char *disasm = dxil_disasm_module(instance->dxil_validator, + dxil_blob.data, + dxil_blob.size); + if (disasm) { + fprintf(stderr, + "== BEGIN SHADER ============================================\n" + "%s\n" + "== END SHADER ==============================================\n", + disasm); + ralloc_free(disasm); + } + } + + if ((instance->debug_flags & DZN_DEBUG_DXIL) && + (instance->debug_flags & DZN_DEBUG_INTERNAL) && + err) { + fprintf(stderr, + "== VALIDATION ERROR =============================================\n" + "%s\n" + "== END ==========================================================\n", + err); + ralloc_free(err); + } + assert(res); + + void *data; + size_t size; + blob_finish_get_buffer(&dxil_blob, &data, &size); + slot->pShaderBytecode = data; + slot->BytecodeLength = size; +} + +#define DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT 4 + +static void +dzn_meta_indirect_draw_finish(struct dzn_device *device, enum dzn_indirect_draw_type type) +{ + struct dzn_meta_indirect_draw *meta = &device->indirect_draws[type]; + + if (meta->root_sig) + ID3D12RootSignature_Release(meta->root_sig); + + if (meta->pipeline_state) + ID3D12PipelineState_Release(meta->pipeline_state); +} + +static VkResult +dzn_meta_indirect_draw_init(struct dzn_device *device, + enum dzn_indirect_draw_type type) +{ + struct dzn_meta_indirect_draw *meta = &device->indirect_draws[type]; + struct dzn_instance *instance = + container_of(device->vk.physical->instance, struct dzn_instance, vk); + VkResult ret = VK_SUCCESS; + + glsl_type_singleton_init_or_ref(); + + nir_shader *nir = dzn_nir_indirect_draw_shader(type); + bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN || + type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN; + uint32_t shader_params_size = + triangle_fan ? + sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) : + sizeof(struct dzn_indirect_draw_rewrite_params); + + uint32_t root_param_count = 0; + D3D12_ROOT_PARAMETER1 root_params[DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT]; + + root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, + .Constants = { + .ShaderRegister = 0, + .RegisterSpace = 0, + .Num32BitValues = shader_params_size / 4, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + + root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV, + .Descriptor = { + .ShaderRegister = 1, + .RegisterSpace = 0, + .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + + root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV, + .Descriptor = { + .ShaderRegister = 2, + .RegisterSpace = 0, + .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + + + if (triangle_fan) { + root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV, + .Descriptor = { + .ShaderRegister = 3, + .RegisterSpace = 0, + .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + } + + assert(root_param_count <= ARRAY_SIZE(root_params)); + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { + .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, + .Desc_1_1 = { + .NumParameters = root_param_count, + .pParameters = root_params, + .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, + }, + }; + + D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { + .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + meta->root_sig = + dzn_device_create_root_sig(device, &root_sig_desc); + if (!meta->root_sig) { + ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + goto out; + } + + desc.pRootSignature = meta->root_sig; + dzn_meta_compile_shader(device, nir, &desc.CS); + assert(desc.CS.pShaderBytecode); + + if (FAILED(ID3D12Device1_CreateComputePipelineState(device->dev, &desc, + &IID_ID3D12PipelineState, + &meta->pipeline_state))) + ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + +out: + if (ret != VK_SUCCESS) + dzn_meta_indirect_draw_finish(device, type); + + free((void *)desc.CS.pShaderBytecode); + ralloc_free(nir); + glsl_type_singleton_decref(); + + return ret; +} + +#define DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT 3 + +static void +dzn_meta_triangle_fan_rewrite_index_finish(struct dzn_device *device, + enum dzn_index_type old_index_type) +{ + struct dzn_meta_triangle_fan_rewrite_index *meta = + &device->triangle_fan[old_index_type]; + + if (meta->root_sig) + ID3D12RootSignature_Release(meta->root_sig); + if (meta->pipeline_state) + ID3D12PipelineState_Release(meta->pipeline_state); + if (meta->cmd_sig) + ID3D12CommandSignature_Release(meta->cmd_sig); +} + +static VkResult +dzn_meta_triangle_fan_rewrite_index_init(struct dzn_device *device, + enum dzn_index_type old_index_type) +{ + struct dzn_meta_triangle_fan_rewrite_index *meta = + &device->triangle_fan[old_index_type]; + struct dzn_instance *instance = + container_of(device->vk.physical->instance, struct dzn_instance, vk); + VkResult ret = VK_SUCCESS; + + glsl_type_singleton_init_or_ref(); + + uint8_t old_index_size = dzn_index_size(old_index_type); + + nir_shader *nir = dzn_nir_triangle_fan_rewrite_index_shader(old_index_size); + + uint32_t root_param_count = 0; + D3D12_ROOT_PARAMETER1 root_params[DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT]; + + root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV, + .Descriptor = { + .ShaderRegister = 1, + .RegisterSpace = 0, + .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + + root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, + .Constants = { + .ShaderRegister = 0, + .RegisterSpace = 0, + .Num32BitValues = sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + + if (old_index_type != DZN_NO_INDEX) { + root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV, + .Descriptor = { + .ShaderRegister = 2, + .RegisterSpace = 0, + .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + } + + assert(root_param_count <= ARRAY_SIZE(root_params)); + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { + .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, + .Desc_1_1 = { + .NumParameters = root_param_count, + .pParameters = root_params, + .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, + }, + }; + + D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { + .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + D3D12_INDIRECT_ARGUMENT_DESC cmd_args[] = { + { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW, + .UnorderedAccessView = { + .RootParameterIndex = 0, + }, + }, + { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, + .Constant = { + .RootParameterIndex = 1, + .DestOffsetIn32BitValues = 0, + .Num32BitValuesToSet = sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, + }, + }, + { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, + }, + }; + + D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = { + .ByteStride = sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params), + .NumArgumentDescs = ARRAY_SIZE(cmd_args), + .pArgumentDescs = cmd_args, + }; + + assert((cmd_sig_desc.ByteStride & 7) == 0); + + meta->root_sig = dzn_device_create_root_sig(device, &root_sig_desc); + if (!meta->root_sig) { + ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + goto out; + } + + + desc.pRootSignature = meta->root_sig; + dzn_meta_compile_shader(device, nir, &desc.CS); + + if (FAILED(ID3D12Device1_CreateComputePipelineState(device->dev, &desc, + &IID_ID3D12PipelineState, + &meta->pipeline_state))) { + ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + goto out; + } + + if (FAILED(ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc, + meta->root_sig, + &IID_ID3D12CommandSignature, + &meta->cmd_sig))) + ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + +out: + if (ret != VK_SUCCESS) + dzn_meta_triangle_fan_rewrite_index_finish(device, old_index_type); + + free((void *)desc.CS.pShaderBytecode); + ralloc_free(nir); + glsl_type_singleton_decref(); + + return ret; +} + +static const D3D12_SHADER_BYTECODE * +dzn_meta_blits_get_vs(struct dzn_device *device) +{ + struct dzn_meta_blits *meta = &device->blits; + D3D12_SHADER_BYTECODE *out; + + mtx_lock(&meta->shaders_lock); + + if (meta->vs.pShaderBytecode == NULL) { + nir_shader *nir = dzn_nir_blit_vs(); + + NIR_PASS_V(nir, nir_lower_system_values); + + gl_system_value system_values[] = { + SYSTEM_VALUE_FIRST_VERTEX, + SYSTEM_VALUE_BASE_VERTEX, + }; + + NIR_PASS_V(nir, dxil_nir_lower_system_values_to_zero, system_values, + ARRAY_SIZE(system_values)); + + D3D12_SHADER_BYTECODE bc; + + dzn_meta_compile_shader(device, nir, &bc); + meta->vs.pShaderBytecode = + vk_alloc(&device->vk.alloc, bc.BytecodeLength, 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (meta->vs.pShaderBytecode) { + meta->vs.BytecodeLength = bc.BytecodeLength; + memcpy((void *)meta->vs.pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength); + out = &meta->vs; + } + free((void *)bc.pShaderBytecode); + ralloc_free(nir); + } else { + out = &meta->vs; + } + + mtx_unlock(&meta->shaders_lock); + + return &meta->vs; +} + +static const D3D12_SHADER_BYTECODE * +dzn_meta_blits_get_fs(struct dzn_device *device, + const struct dzn_nir_blit_info *info) +{ + struct dzn_meta_blits *meta = &device->blits; + D3D12_SHADER_BYTECODE *out = NULL; + + mtx_lock(&meta->shaders_lock); + + STATIC_ASSERT(sizeof(struct dzn_nir_blit_info) == sizeof(uint32_t)); + + struct hash_entry *he = + _mesa_hash_table_search(meta->fs, (void *)(uintptr_t)info->hash_key); + + if (!he) { + nir_shader *nir = dzn_nir_blit_fs(info); + + if (info->out_type != GLSL_TYPE_FLOAT) { + dxil_wrap_sampler_state wrap_state = { + .is_int_sampler = 1, + .is_linear_filtering = 0, + .skip_boundary_conditions = 1, + }; + dxil_lower_sample_to_txf_for_integer_tex(nir, &wrap_state, NULL, 0); + } + + D3D12_SHADER_BYTECODE bc; + + dzn_meta_compile_shader(device, nir, &bc); + + out = (D3D12_SHADER_BYTECODE *) + vk_alloc(&device->vk.alloc, + sizeof(D3D12_SHADER_BYTECODE) + bc.BytecodeLength, 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (out) { + out->pShaderBytecode = (void *)(out + 1); + memcpy((void *)out->pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength); + out->BytecodeLength = bc.BytecodeLength; + _mesa_hash_table_insert(meta->fs, &info->hash_key, out); + } + free((void *)bc.pShaderBytecode); + ralloc_free(nir); + } else { + out = (D3D12_SHADER_BYTECODE *)he->data; + } + + mtx_unlock(&meta->shaders_lock); + + return out; +} + +static void +dzn_meta_blit_destroy(struct dzn_device *device, struct dzn_meta_blit *blit) +{ + if (!blit) + return; + + if (blit->root_sig) + ID3D12RootSignature_Release(blit->root_sig); + if (blit->pipeline_state) + ID3D12PipelineState_Release(blit->pipeline_state); + + vk_free(&device->vk.alloc, blit); +} + +static struct dzn_meta_blit * +dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key *key) +{ + struct dzn_meta_blits *blits = &device->blits; + struct dzn_meta_blit *blit = (struct dzn_meta_blit *) + vk_zalloc(&device->vk.alloc, sizeof(*blit), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + + if (!blit) + return NULL; + + D3D12_DESCRIPTOR_RANGE1 ranges[] = { + { + .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + .NumDescriptors = 1, + .BaseShaderRegister = 0, + .RegisterSpace = 0, + .Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS, + .OffsetInDescriptorsFromTableStart = 0, + }, + }; + + D3D12_STATIC_SAMPLER_DESC samplers[] = { + { + .Filter = key->linear_filter ? + D3D12_FILTER_MIN_MAG_MIP_LINEAR : + D3D12_FILTER_MIN_MAG_MIP_POINT, + .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + .MipLODBias = 0, + .MaxAnisotropy = 0, + .MinLOD = 0, + .MaxLOD = D3D12_FLOAT32_MAX, + .ShaderRegister = 0, + .RegisterSpace = 0, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL, + }, + }; + + D3D12_ROOT_PARAMETER1 root_params[] = { + { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, + .DescriptorTable = { + .NumDescriptorRanges = ARRAY_SIZE(ranges), + .pDescriptorRanges = ranges, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL, + }, + { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, + .Constants = { + .ShaderRegister = 0, + .RegisterSpace = 0, + .Num32BitValues = 17, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX, + }, + }; + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { + .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, + .Desc_1_1 = { + .NumParameters = ARRAY_SIZE(root_params), + .pParameters = root_params, + .NumStaticSamplers = ARRAY_SIZE(samplers), + .pStaticSamplers = samplers, + .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, + }, + }; + + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = { + .SampleMask = key->resolve ? 1 : (1ULL << key->samples) - 1, + .RasterizerState = { + .FillMode = D3D12_FILL_MODE_SOLID, + .CullMode = D3D12_CULL_MODE_NONE, + .DepthClipEnable = TRUE, + }, + .PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + .SampleDesc = { + .Count = key->resolve ? 1 : key->samples, + .Quality = 0, + }, + .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + struct dzn_nir_blit_info blit_fs_info = { + .src_samples = key->samples, + .loc = key->loc, + .out_type = key->out_type, + .sampler_dim = key->sampler_dim, + .src_is_array = key->src_is_array, + .resolve = key->resolve, + .padding = 0, + }; + + blit->root_sig = dzn_device_create_root_sig(device, &root_sig_desc); + if (!blit->root_sig) { + dzn_meta_blit_destroy(device, blit); + return NULL; + } + + desc.pRootSignature = blit->root_sig; + + const D3D12_SHADER_BYTECODE *vs, *fs; + + vs = dzn_meta_blits_get_vs(device); + if (!vs) { + dzn_meta_blit_destroy(device, blit); + return NULL; + } + + desc.VS = *vs; + assert(desc.VS.pShaderBytecode); + + fs = dzn_meta_blits_get_fs(device, &blit_fs_info); + if (!fs) { + dzn_meta_blit_destroy(device, blit); + return NULL; + } + + desc.PS = *fs; + assert(desc.PS.pShaderBytecode); + + assert(key->loc == FRAG_RESULT_DATA0 || + key->loc == FRAG_RESULT_DEPTH || + key->loc == FRAG_RESULT_STENCIL); + + if (key->loc == FRAG_RESULT_DATA0) { + desc.NumRenderTargets = 1; + desc.RTVFormats[0] = key->out_format; + desc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; + } else { + desc.DSVFormat = key->out_format; + if (key->loc == FRAG_RESULT_DEPTH) { + desc.DepthStencilState.DepthEnable = TRUE; + desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; + } else { + assert(key->loc == FRAG_RESULT_STENCIL); + desc.DepthStencilState.StencilEnable = TRUE; + desc.DepthStencilState.StencilWriteMask = 0xff; + desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE; + desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE; + desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE; + desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; + desc.DepthStencilState.BackFace = desc.DepthStencilState.FrontFace; + } + } + + if (FAILED(ID3D12Device1_CreateGraphicsPipelineState(device->dev, &desc, + &IID_ID3D12PipelineState, + &blit->pipeline_state))) { + dzn_meta_blit_destroy(device, blit); + return NULL; + } + + return blit; +} + +const struct dzn_meta_blit * +dzn_meta_blits_get_context(struct dzn_device *device, + const struct dzn_meta_blit_key *key) +{ + struct dzn_meta_blit *out = NULL; + + STATIC_ASSERT(sizeof(key) == sizeof(uint64_t)); + + mtx_lock(&device->blits.contexts_lock); + + out = (struct dzn_meta_blit *) + _mesa_hash_table_u64_search(device->blits.contexts, key->u64); + if (!out) { + out = dzn_meta_blit_create(device, key); + + if (out) + _mesa_hash_table_u64_insert(device->blits.contexts, key->u64, out); + } + + mtx_unlock(&device->blits.contexts_lock); + + return out; +} + +static void +dzn_meta_blits_finish(struct dzn_device *device) +{ + struct dzn_meta_blits *meta = &device->blits; + + vk_free(&device->vk.alloc, (void *)meta->vs.pShaderBytecode); + + if (meta->fs) { + hash_table_foreach(meta->fs, he) + vk_free(&device->vk.alloc, he->data); + _mesa_hash_table_destroy(meta->fs, NULL); + } + + if (meta->contexts) { + hash_table_foreach(meta->contexts->table, he) + dzn_meta_blit_destroy(device, (struct dzn_meta_blit *)he->data); + _mesa_hash_table_u64_destroy(meta->contexts); + } + + mtx_destroy(&meta->shaders_lock); + mtx_destroy(&meta->contexts_lock); +} + +static VkResult +dzn_meta_blits_init(struct dzn_device *device) +{ + struct dzn_instance *instance = + container_of(device->vk.physical->instance, struct dzn_instance, vk); + struct dzn_meta_blits *meta = &device->blits; + + mtx_init(&meta->shaders_lock, mtx_plain); + mtx_init(&meta->contexts_lock, mtx_plain); + + meta->fs = _mesa_hash_table_create_u32_keys(NULL); + if (!meta->fs) { + dzn_meta_blits_finish(device); + return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + meta->contexts = _mesa_hash_table_u64_create(NULL); + if (!meta->contexts) { + dzn_meta_blits_finish(device); + return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + return VK_SUCCESS; +} + +void +dzn_meta_finish(struct dzn_device *device) +{ + for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++) + dzn_meta_triangle_fan_rewrite_index_finish(device, (enum dzn_index_type)i); + + for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++) + dzn_meta_indirect_draw_finish(device, (enum dzn_indirect_draw_type)i); + + dzn_meta_blits_finish(device); +} + +VkResult +dzn_meta_init(struct dzn_device *device) +{ + VkResult result = dzn_meta_blits_init(device); + if (result != VK_SUCCESS) + goto out; + + for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++) { + VkResult result = + dzn_meta_indirect_draw_init(device, (enum dzn_indirect_draw_type)i); + if (result != VK_SUCCESS) + goto out; + } + + for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++) { + VkResult result = + dzn_meta_triangle_fan_rewrite_index_init(device, (enum dzn_index_type)i); + if (result != VK_SUCCESS) + goto out; + } + +out: + if (result != VK_SUCCESS) { + dzn_meta_finish(device); + return result; + } + + return VK_SUCCESS; +} diff --git a/src/microsoft/vulkan/dzn_meta.cpp b/src/microsoft/vulkan/dzn_meta.cpp deleted file mode 100644 index ebd0108ddb0..00000000000 --- a/src/microsoft/vulkan/dzn_meta.cpp +++ /dev/null @@ -1,736 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" - -#include "spirv_to_dxil.h" -#include "nir_to_dxil.h" - -#include "dxil_nir.h" -#include "dxil_nir_lower_int_samplers.h" -#include "dxil_validator.h" - -static void -dzn_meta_compile_shader(struct dzn_device *device, nir_shader *nir, - D3D12_SHADER_BYTECODE *slot) -{ - struct dzn_instance *instance = - container_of(device->vk.physical->instance, struct dzn_instance, vk); - - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - - if ((instance->debug_flags & DZN_DEBUG_NIR) && - (instance->debug_flags & DZN_DEBUG_INTERNAL)) - nir_print_shader(nir, stderr); - - struct nir_to_dxil_options opts = { .environment = DXIL_ENVIRONMENT_VULKAN }; - struct blob dxil_blob; - bool ret = nir_to_dxil(nir, &opts, &dxil_blob); - assert(ret); - - char *err; - bool res = dxil_validate_module(instance->dxil_validator, - dxil_blob.data, - dxil_blob.size, &err); - - if ((instance->debug_flags & DZN_DEBUG_DXIL) && - (instance->debug_flags & DZN_DEBUG_INTERNAL)) { - char *disasm = dxil_disasm_module(instance->dxil_validator, - dxil_blob.data, - dxil_blob.size); - if (disasm) { - fprintf(stderr, - "== BEGIN SHADER ============================================\n" - "%s\n" - "== END SHADER ==============================================\n", - disasm); - ralloc_free(disasm); - } - } - - if ((instance->debug_flags & DZN_DEBUG_DXIL) && - (instance->debug_flags & DZN_DEBUG_INTERNAL) && - err) { - fprintf(stderr, - "== VALIDATION ERROR =============================================\n" - "%s\n" - "== END ==========================================================\n", - err); - ralloc_free(err); - } - assert(res); - - void *data; - size_t size; - blob_finish_get_buffer(&dxil_blob, &data, &size); - slot->pShaderBytecode = data; - slot->BytecodeLength = size; -} - -#define DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT 4 - -static void -dzn_meta_indirect_draw_finish(struct dzn_device *device, enum dzn_indirect_draw_type type) -{ - struct dzn_meta_indirect_draw *meta = &device->indirect_draws[type]; - - if (meta->root_sig) - ID3D12RootSignature_Release(meta->root_sig); - - if (meta->pipeline_state) - ID3D12PipelineState_Release(meta->pipeline_state); -} - -static VkResult -dzn_meta_indirect_draw_init(struct dzn_device *device, - enum dzn_indirect_draw_type type) -{ - struct dzn_meta_indirect_draw *meta = &device->indirect_draws[type]; - struct dzn_instance *instance = - container_of(device->vk.physical->instance, struct dzn_instance, vk); - VkResult ret = VK_SUCCESS; - - glsl_type_singleton_init_or_ref(); - - nir_shader *nir = dzn_nir_indirect_draw_shader(type); - bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN || - type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN; - uint32_t shader_params_size = - triangle_fan ? - sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) : - sizeof(struct dzn_indirect_draw_rewrite_params); - - uint32_t root_param_count = 0; - D3D12_ROOT_PARAMETER1 root_params[DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT]; - - root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, - .Constants = { - .ShaderRegister = 0, - .RegisterSpace = 0, - .Num32BitValues = shader_params_size / 4, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, - }; - - root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV, - .Descriptor = { - .ShaderRegister = 1, - .RegisterSpace = 0, - .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, - }; - - root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV, - .Descriptor = { - .ShaderRegister = 2, - .RegisterSpace = 0, - .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, - }; - - - if (triangle_fan) { - root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV, - .Descriptor = { - .ShaderRegister = 3, - .RegisterSpace = 0, - .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, - }; - } - - assert(root_param_count <= ARRAY_SIZE(root_params)); - - D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { - .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, - .Desc_1_1 = { - .NumParameters = root_param_count, - .pParameters = root_params, - .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, - }, - }; - - D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { - .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, - }; - - meta->root_sig = - dzn_device_create_root_sig(device, &root_sig_desc); - if (!meta->root_sig) { - ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); - goto out; - } - - desc.pRootSignature = meta->root_sig; - dzn_meta_compile_shader(device, nir, &desc.CS); - assert(desc.CS.pShaderBytecode); - - if (FAILED(ID3D12Device1_CreateComputePipelineState(device->dev, &desc, - IID_ID3D12PipelineState, - (void **)&meta->pipeline_state))) - ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); - -out: - if (ret != VK_SUCCESS) - dzn_meta_indirect_draw_finish(device, type); - - free((void *)desc.CS.pShaderBytecode); - ralloc_free(nir); - glsl_type_singleton_decref(); - - return ret; -} - -#define DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT 3 - -static void -dzn_meta_triangle_fan_rewrite_index_finish(struct dzn_device *device, - enum dzn_index_type old_index_type) -{ - struct dzn_meta_triangle_fan_rewrite_index *meta = - &device->triangle_fan[old_index_type]; - - if (meta->root_sig) - ID3D12RootSignature_Release(meta->root_sig); - if (meta->pipeline_state) - ID3D12PipelineState_Release(meta->pipeline_state); - if (meta->cmd_sig) - ID3D12CommandSignature_Release(meta->cmd_sig); -} - -static VkResult -dzn_meta_triangle_fan_rewrite_index_init(struct dzn_device *device, - enum dzn_index_type old_index_type) -{ - struct dzn_meta_triangle_fan_rewrite_index *meta = - &device->triangle_fan[old_index_type]; - struct dzn_instance *instance = - container_of(device->vk.physical->instance, struct dzn_instance, vk); - VkResult ret = VK_SUCCESS; - - glsl_type_singleton_init_or_ref(); - - uint8_t old_index_size = dzn_index_size(old_index_type); - - nir_shader *nir = dzn_nir_triangle_fan_rewrite_index_shader(old_index_size); - - uint32_t root_param_count = 0; - D3D12_ROOT_PARAMETER1 root_params[DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT]; - - root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV, - .Descriptor = { - .ShaderRegister = 1, - .RegisterSpace = 0, - .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, - }; - - root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, - .Constants = { - .ShaderRegister = 0, - .RegisterSpace = 0, - .Num32BitValues = sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, - }; - - if (old_index_type != DZN_NO_INDEX) { - root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV, - .Descriptor = { - .ShaderRegister = 2, - .RegisterSpace = 0, - .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, - }; - } - - assert(root_param_count <= ARRAY_SIZE(root_params)); - - D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { - .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, - .Desc_1_1 = { - .NumParameters = root_param_count, - .pParameters = root_params, - .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, - }, - }; - - D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { - .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, - }; - - D3D12_INDIRECT_ARGUMENT_DESC cmd_args[] = { - { - .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW, - .UnorderedAccessView = { - .RootParameterIndex = 0, - }, - }, - { - .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, - .Constant = { - .RootParameterIndex = 1, - .DestOffsetIn32BitValues = 0, - .Num32BitValuesToSet = sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, - }, - }, - { - .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, - }, - }; - - D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = { - .ByteStride = sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params), - .NumArgumentDescs = ARRAY_SIZE(cmd_args), - .pArgumentDescs = cmd_args, - }; - - assert((cmd_sig_desc.ByteStride & 7) == 0); - - meta->root_sig = dzn_device_create_root_sig(device, &root_sig_desc); - if (!meta->root_sig) { - ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); - goto out; - } - - - desc.pRootSignature = meta->root_sig; - dzn_meta_compile_shader(device, nir, &desc.CS); - - if (FAILED(ID3D12Device1_CreateComputePipelineState(device->dev, &desc, - IID_ID3D12PipelineState, - (void **)&meta->pipeline_state))) { - ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); - goto out; - } - - if (FAILED(ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc, - meta->root_sig, - IID_ID3D12CommandSignature, - (void **)&meta->cmd_sig))) - ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); - -out: - if (ret != VK_SUCCESS) - dzn_meta_triangle_fan_rewrite_index_finish(device, old_index_type); - - free((void *)desc.CS.pShaderBytecode); - ralloc_free(nir); - glsl_type_singleton_decref(); - - return ret; -} - -static const D3D12_SHADER_BYTECODE * -dzn_meta_blits_get_vs(struct dzn_device *device) -{ - struct dzn_meta_blits *meta = &device->blits; - D3D12_SHADER_BYTECODE *out; - - mtx_lock(&meta->shaders_lock); - - if (meta->vs.pShaderBytecode == NULL) { - nir_shader *nir = dzn_nir_blit_vs(); - - NIR_PASS_V(nir, nir_lower_system_values); - - gl_system_value system_values[] = { - SYSTEM_VALUE_FIRST_VERTEX, - SYSTEM_VALUE_BASE_VERTEX, - }; - - NIR_PASS_V(nir, dxil_nir_lower_system_values_to_zero, system_values, - ARRAY_SIZE(system_values)); - - D3D12_SHADER_BYTECODE bc; - - dzn_meta_compile_shader(device, nir, &bc); - meta->vs.pShaderBytecode = - vk_alloc(&device->vk.alloc, bc.BytecodeLength, 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (meta->vs.pShaderBytecode) { - meta->vs.BytecodeLength = bc.BytecodeLength; - memcpy((void *)meta->vs.pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength); - out = &meta->vs; - } - free((void *)bc.pShaderBytecode); - ralloc_free(nir); - } else { - out = &meta->vs; - } - - mtx_unlock(&meta->shaders_lock); - - return &meta->vs; -} - -static const D3D12_SHADER_BYTECODE * -dzn_meta_blits_get_fs(struct dzn_device *device, - const struct dzn_nir_blit_info *info) -{ - struct dzn_meta_blits *meta = &device->blits; - D3D12_SHADER_BYTECODE *out = NULL; - - mtx_lock(&meta->shaders_lock); - - STATIC_ASSERT(sizeof(struct dzn_nir_blit_info) == sizeof(uint32_t)); - - struct hash_entry *he = - _mesa_hash_table_search(meta->fs, (void *)(uintptr_t)info->hash_key); - - if (!he) { - nir_shader *nir = dzn_nir_blit_fs(info); - - if (info->out_type != GLSL_TYPE_FLOAT) { - dxil_wrap_sampler_state wrap_state = { - .is_int_sampler = 1, - .is_linear_filtering = 0, - .skip_boundary_conditions = 1, - }; - dxil_lower_sample_to_txf_for_integer_tex(nir, &wrap_state, NULL, 0); - } - - D3D12_SHADER_BYTECODE bc; - - dzn_meta_compile_shader(device, nir, &bc); - - out = (D3D12_SHADER_BYTECODE *) - vk_alloc(&device->vk.alloc, - sizeof(D3D12_SHADER_BYTECODE) + bc.BytecodeLength, 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (out) { - out->pShaderBytecode = (void *)(out + 1); - memcpy((void *)out->pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength); - out->BytecodeLength = bc.BytecodeLength; - _mesa_hash_table_insert(meta->fs, &info->hash_key, out); - } - free((void *)bc.pShaderBytecode); - ralloc_free(nir); - } else { - out = (D3D12_SHADER_BYTECODE *)he->data; - } - - mtx_unlock(&meta->shaders_lock); - - return out; -} - -static void -dzn_meta_blit_destroy(struct dzn_device *device, struct dzn_meta_blit *blit) -{ - if (!blit) - return; - - if (blit->root_sig) - ID3D12RootSignature_Release(blit->root_sig); - if (blit->pipeline_state) - ID3D12PipelineState_Release(blit->pipeline_state); - - vk_free(&device->vk.alloc, blit); -} - -static struct dzn_meta_blit * -dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key *key) -{ - struct dzn_meta_blits *blits = &device->blits; - struct dzn_meta_blit *blit = (struct dzn_meta_blit *) - vk_zalloc(&device->vk.alloc, sizeof(*blit), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - if (!blit) - return NULL; - - D3D12_DESCRIPTOR_RANGE1 ranges[] = { - { - .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV, - .NumDescriptors = 1, - .BaseShaderRegister = 0, - .RegisterSpace = 0, - .Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS, - .OffsetInDescriptorsFromTableStart = 0, - }, - }; - - D3D12_STATIC_SAMPLER_DESC samplers[] = { - { - .Filter = key->linear_filter ? - D3D12_FILTER_MIN_MAG_MIP_LINEAR : - D3D12_FILTER_MIN_MAG_MIP_POINT, - .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, - .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, - .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, - .MipLODBias = 0, - .MaxAnisotropy = 0, - .MinLOD = 0, - .MaxLOD = D3D12_FLOAT32_MAX, - .ShaderRegister = 0, - .RegisterSpace = 0, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL, - }, - }; - - D3D12_ROOT_PARAMETER1 root_params[] = { - { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, - .DescriptorTable = { - .NumDescriptorRanges = ARRAY_SIZE(ranges), - .pDescriptorRanges = ranges, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL, - }, - { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, - .Constants = { - .ShaderRegister = 0, - .RegisterSpace = 0, - .Num32BitValues = 17, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX, - }, - }; - - D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { - .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, - .Desc_1_1 = { - .NumParameters = ARRAY_SIZE(root_params), - .pParameters = root_params, - .NumStaticSamplers = ARRAY_SIZE(samplers), - .pStaticSamplers = samplers, - .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, - }, - }; - - D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = { - .SampleMask = key->resolve ? 1 : (1ULL << key->samples) - 1, - .RasterizerState = { - .FillMode = D3D12_FILL_MODE_SOLID, - .CullMode = D3D12_CULL_MODE_NONE, - .DepthClipEnable = TRUE, - }, - .PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, - .SampleDesc = { - .Count = key->resolve ? 1 : key->samples, - .Quality = 0, - }, - .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, - }; - - struct dzn_nir_blit_info blit_fs_info = { - .src_samples = key->samples, - .loc = key->loc, - .out_type = key->out_type, - .sampler_dim = key->sampler_dim, - .src_is_array = key->src_is_array, - .resolve = key->resolve, - .padding = 0, - }; - - blit->root_sig = dzn_device_create_root_sig(device, &root_sig_desc); - if (!blit->root_sig) { - dzn_meta_blit_destroy(device, blit); - return NULL; - } - - desc.pRootSignature = blit->root_sig; - - const D3D12_SHADER_BYTECODE *vs, *fs; - - vs = dzn_meta_blits_get_vs(device); - if (!vs) { - dzn_meta_blit_destroy(device, blit); - return NULL; - } - - desc.VS = *vs; - assert(desc.VS.pShaderBytecode); - - fs = dzn_meta_blits_get_fs(device, &blit_fs_info); - if (!fs) { - dzn_meta_blit_destroy(device, blit); - return NULL; - } - - desc.PS = *fs; - assert(desc.PS.pShaderBytecode); - - assert(key->loc == FRAG_RESULT_DATA0 || - key->loc == FRAG_RESULT_DEPTH || - key->loc == FRAG_RESULT_STENCIL); - - if (key->loc == FRAG_RESULT_DATA0) { - desc.NumRenderTargets = 1; - desc.RTVFormats[0] = key->out_format; - desc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; - } else { - desc.DSVFormat = key->out_format; - if (key->loc == FRAG_RESULT_DEPTH) { - desc.DepthStencilState.DepthEnable = TRUE; - desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; - desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; - } else { - assert(key->loc == FRAG_RESULT_STENCIL); - desc.DepthStencilState.StencilEnable = TRUE; - desc.DepthStencilState.StencilWriteMask = 0xff; - desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE; - desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE; - desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE; - desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; - desc.DepthStencilState.BackFace = desc.DepthStencilState.FrontFace; - } - } - - if (FAILED(ID3D12Device1_CreateGraphicsPipelineState(device->dev, &desc, - IID_ID3D12PipelineState, - (void **)&blit->pipeline_state))) { - dzn_meta_blit_destroy(device, blit); - return NULL; - } - - return blit; -} - -const struct dzn_meta_blit * -dzn_meta_blits_get_context(struct dzn_device *device, - const struct dzn_meta_blit_key *key) -{ - struct dzn_meta_blit *out = NULL; - - STATIC_ASSERT(sizeof(key) == sizeof(uint64_t)); - - mtx_lock(&device->blits.contexts_lock); - - out = (struct dzn_meta_blit *) - _mesa_hash_table_u64_search(device->blits.contexts, key->u64); - if (!out) { - out = dzn_meta_blit_create(device, key); - - if (out) - _mesa_hash_table_u64_insert(device->blits.contexts, key->u64, out); - } - - mtx_unlock(&device->blits.contexts_lock); - - return out; -} - -static void -dzn_meta_blits_finish(struct dzn_device *device) -{ - struct dzn_meta_blits *meta = &device->blits; - - vk_free(&device->vk.alloc, (void *)meta->vs.pShaderBytecode); - - if (meta->fs) { - hash_table_foreach(meta->fs, he) - vk_free(&device->vk.alloc, he->data); - _mesa_hash_table_destroy(meta->fs, NULL); - } - - if (meta->contexts) { - hash_table_foreach(meta->contexts->table, he) - dzn_meta_blit_destroy(device, (struct dzn_meta_blit *)he->data); - _mesa_hash_table_u64_destroy(meta->contexts); - } - - mtx_destroy(&meta->shaders_lock); - mtx_destroy(&meta->contexts_lock); -} - -static VkResult -dzn_meta_blits_init(struct dzn_device *device) -{ - struct dzn_instance *instance = - container_of(device->vk.physical->instance, struct dzn_instance, vk); - struct dzn_meta_blits *meta = &device->blits; - - mtx_init(&meta->shaders_lock, mtx_plain); - mtx_init(&meta->contexts_lock, mtx_plain); - - meta->fs = _mesa_hash_table_create_u32_keys(NULL); - if (!meta->fs) { - dzn_meta_blits_finish(device); - return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - meta->contexts = _mesa_hash_table_u64_create(NULL); - if (!meta->contexts) { - dzn_meta_blits_finish(device); - return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - return VK_SUCCESS; -} - -void -dzn_meta_finish(struct dzn_device *device) -{ - for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++) - dzn_meta_triangle_fan_rewrite_index_finish(device, (enum dzn_index_type)i); - - for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++) - dzn_meta_indirect_draw_finish(device, (enum dzn_indirect_draw_type)i); - - dzn_meta_blits_finish(device); -} - -VkResult -dzn_meta_init(struct dzn_device *device) -{ - VkResult result = dzn_meta_blits_init(device); - if (result != VK_SUCCESS) - goto out; - - for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++) { - VkResult result = - dzn_meta_indirect_draw_init(device, (enum dzn_indirect_draw_type)i); - if (result != VK_SUCCESS) - goto out; - } - - for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++) { - VkResult result = - dzn_meta_triangle_fan_rewrite_index_init(device, (enum dzn_index_type)i); - if (result != VK_SUCCESS) - goto out; - } - -out: - if (result != VK_SUCCESS) { - dzn_meta_finish(device); - return result; - } - - return VK_SUCCESS; -} diff --git a/src/microsoft/vulkan/dzn_pass.c b/src/microsoft/vulkan/dzn_pass.c new file mode 100644 index 00000000000..67e0e0c5608 --- /dev/null +++ b/src/microsoft/vulkan/dzn_pass.c @@ -0,0 +1,223 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_format.h" + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateRenderPass2(VkDevice dev, + const VkRenderPassCreateInfo2KHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct dzn_render_pass, pass, 1); + VK_MULTIALLOC_DECL(&ma, struct dzn_subpass, subpasses, + pCreateInfo->subpassCount); + VK_MULTIALLOC_DECL(&ma, struct dzn_attachment, attachments, + pCreateInfo->attachmentCount); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS); + pass->subpasses = subpasses; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = attachments; + pass->attachment_count = pCreateInfo->attachmentCount; + + assert(!pass->attachment_count || pass->attachments); + for (uint32_t i = 0; i < pass->attachment_count; i++) { + const VkAttachmentDescription2 *attachment = &pCreateInfo->pAttachments[i]; + + attachments[i].idx = i; + attachments[i].format = attachment->format; + assert(attachments[i].format); + if (vk_format_is_depth_or_stencil(attachment->format)) { + attachments[i].clear.depth = + attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR; + attachments[i].clear.stencil = + attachment->stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR; + } else { + attachments[i].clear.color = + attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR; + } + attachments[i].samples = attachment->samples; + if (vk_format_has_stencil(attachment->format)) { + attachments[i].stencil.before = + dzn_image_layout_to_state(attachment->initialLayout, VK_IMAGE_ASPECT_STENCIL_BIT); + attachments[i].stencil.after = + dzn_image_layout_to_state(attachment->finalLayout, VK_IMAGE_ASPECT_STENCIL_BIT); + attachments[i].stencil.last = attachments[i].stencil.before; + } + + if (vk_format_has_depth(attachment->format)) { + attachments[i].before = + dzn_image_layout_to_state(attachment->initialLayout, VK_IMAGE_ASPECT_STENCIL_BIT); + attachments[i].after = + dzn_image_layout_to_state(attachment->finalLayout, VK_IMAGE_ASPECT_STENCIL_BIT); + attachments[i].last = attachments[i].before; + } else { + assert(vk_format_is_color(attachment->format)); + attachments[i].before = + dzn_image_layout_to_state(attachment->initialLayout, VK_IMAGE_ASPECT_COLOR_BIT); + attachments[i].after = + dzn_image_layout_to_state(attachment->finalLayout, VK_IMAGE_ASPECT_COLOR_BIT); + attachments[i].last = attachments[i].before; + } + } + + assert(subpasses); + for (uint32_t i = 0; i < pass->subpass_count; i++) { + const VkSubpassDescription2 *subpass = &pCreateInfo->pSubpasses[i]; + const VkSubpassDescription2 *subpass_after = NULL; + + if (i + 1 < pass->subpass_count) + subpass_after = &pCreateInfo->pSubpasses[i + 1]; + + for (uint32_t j = 0; j < subpass->colorAttachmentCount; j++) { + uint32_t idx = subpass->pColorAttachments[j].attachment; + subpasses[i].colors[j].idx = idx; + if (idx != VK_ATTACHMENT_UNUSED) { + subpasses[i].colors[j].aspects = VK_IMAGE_ASPECT_COLOR_BIT; + subpasses[i].colors[j].before = attachments[idx].last; + subpasses[i].colors[j].during = + dzn_image_layout_to_state(subpass->pColorAttachments[j].layout, + VK_IMAGE_ASPECT_COLOR_BIT); + attachments[idx].last = subpasses[i].colors[j].during; + attachments[idx].aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + subpasses[i].color_count = j + 1; + } + + idx = subpass->pResolveAttachments ? + subpass->pResolveAttachments[j].attachment : + VK_ATTACHMENT_UNUSED; + subpasses[i].resolve[j].idx = idx; + if (idx != VK_ATTACHMENT_UNUSED) { + subpasses[i].resolve[j].aspects = VK_IMAGE_ASPECT_COLOR_BIT; + subpasses[i].resolve[j].before = attachments[idx].last; + subpasses[i].resolve[j].during = + dzn_image_layout_to_state(subpass->pResolveAttachments[j].layout, + VK_IMAGE_ASPECT_COLOR_BIT); + attachments[idx].last = subpasses[i].resolve[j].during; + attachments[idx].aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + } + } + + subpasses[i].zs.idx = VK_ATTACHMENT_UNUSED; + if (subpass->pDepthStencilAttachment) { + uint32_t idx = subpass->pDepthStencilAttachment->attachment; + subpasses[i].zs.idx = idx; + if (idx != VK_ATTACHMENT_UNUSED) { + subpasses[i].zs.aspects = vk_format_aspects(attachments[idx].format); + subpasses[i].zs.before = attachments[idx].last; + subpasses[i].zs.during = attachments[idx].last; + subpasses[i].zs.stencil.before = attachments[idx].stencil.last; + subpasses[i].zs.stencil.during = attachments[idx].stencil.last; + + if (subpasses[i].zs.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + subpasses[i].zs.stencil.during = + dzn_image_layout_to_state(subpass->pDepthStencilAttachment->layout, + VK_IMAGE_ASPECT_STENCIL_BIT); + } + + if (subpasses[i].zs.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + subpasses[i].zs.during = + dzn_image_layout_to_state(subpass->pDepthStencilAttachment->layout, + VK_IMAGE_ASPECT_DEPTH_BIT); + } + + attachments[idx].last = subpasses[i].zs.during; + attachments[idx].stencil.last = subpasses[i].zs.stencil.during; + attachments[idx].aspects |= subpasses[i].zs.aspects; + } + } + + subpasses[i].input_count = subpass->inputAttachmentCount; + for (uint32_t j = 0; j < subpasses[i].input_count; j++) { + uint32_t idx = subpass->pInputAttachments[j].attachment; + subpasses[i].inputs[j].idx = idx; + if (idx != VK_ATTACHMENT_UNUSED) { + subpasses[i].inputs[j].aspects = subpass->pInputAttachments[j].aspectMask; + subpasses[i].inputs[j].before = attachments[idx].last; + subpasses[i].inputs[j].during = attachments[idx].last; + subpasses[i].inputs[j].stencil.before = attachments[idx].stencil.last; + subpasses[i].inputs[j].stencil.during = attachments[idx].stencil.last; + + if (subpasses[i].inputs[j].aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + subpasses[i].inputs[j].stencil.during = + dzn_image_layout_to_state(subpass->pInputAttachments[j].layout, + VK_IMAGE_ASPECT_STENCIL_BIT); + } + + if (subpasses[i].inputs[j].aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + subpasses[i].inputs[j].during = + dzn_image_layout_to_state(subpass->pInputAttachments[j].layout, + VK_IMAGE_ASPECT_DEPTH_BIT); + attachments[idx].last = subpasses[i].inputs[j].during; + } else if (subpasses[i].inputs[j].aspects == VK_IMAGE_ASPECT_COLOR_BIT) { + subpasses[i].inputs[j].during = + dzn_image_layout_to_state(subpass->pInputAttachments[j].layout, + VK_IMAGE_ASPECT_COLOR_BIT); + } + + attachments[idx].last = subpasses[i].inputs[j].during; + attachments[idx].stencil.last = subpasses[i].inputs[j].stencil.during; + attachments[idx].aspects |= subpass->pInputAttachments[j].aspectMask; + } + } + } + + *pRenderPass = dzn_render_pass_to_handle(pass); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyRenderPass(VkDevice dev, + VkRenderPass p, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VK_FROM_HANDLE(dzn_render_pass, pass, p); + + if (!pass) + return; + + vk_object_base_finish(&pass->base); + vk_free2(&device->vk.alloc, pAllocator, pass); +} + + +VKAPI_ATTR void VKAPI_CALL +dzn_GetRenderAreaGranularity(VkDevice device, + VkRenderPass pass, + VkExtent2D *pGranularity) +{ + // FIXME: query the actual optimal granularity + pGranularity->width = pGranularity->height = 1; +} diff --git a/src/microsoft/vulkan/dzn_pass.cpp b/src/microsoft/vulkan/dzn_pass.cpp deleted file mode 100644 index 67e0e0c5608..00000000000 --- a/src/microsoft/vulkan/dzn_pass.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" - -#include "vk_alloc.h" -#include "vk_format.h" - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateRenderPass2(VkDevice dev, - const VkRenderPassCreateInfo2KHR *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkRenderPass *pRenderPass) -{ - VK_FROM_HANDLE(dzn_device, device, dev); - - VK_MULTIALLOC(ma); - VK_MULTIALLOC_DECL(&ma, struct dzn_render_pass, pass, 1); - VK_MULTIALLOC_DECL(&ma, struct dzn_subpass, subpasses, - pCreateInfo->subpassCount); - VK_MULTIALLOC_DECL(&ma, struct dzn_attachment, attachments, - pCreateInfo->attachmentCount); - - if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS); - pass->subpasses = subpasses; - pass->subpass_count = pCreateInfo->subpassCount; - pass->attachments = attachments; - pass->attachment_count = pCreateInfo->attachmentCount; - - assert(!pass->attachment_count || pass->attachments); - for (uint32_t i = 0; i < pass->attachment_count; i++) { - const VkAttachmentDescription2 *attachment = &pCreateInfo->pAttachments[i]; - - attachments[i].idx = i; - attachments[i].format = attachment->format; - assert(attachments[i].format); - if (vk_format_is_depth_or_stencil(attachment->format)) { - attachments[i].clear.depth = - attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR; - attachments[i].clear.stencil = - attachment->stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR; - } else { - attachments[i].clear.color = - attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR; - } - attachments[i].samples = attachment->samples; - if (vk_format_has_stencil(attachment->format)) { - attachments[i].stencil.before = - dzn_image_layout_to_state(attachment->initialLayout, VK_IMAGE_ASPECT_STENCIL_BIT); - attachments[i].stencil.after = - dzn_image_layout_to_state(attachment->finalLayout, VK_IMAGE_ASPECT_STENCIL_BIT); - attachments[i].stencil.last = attachments[i].stencil.before; - } - - if (vk_format_has_depth(attachment->format)) { - attachments[i].before = - dzn_image_layout_to_state(attachment->initialLayout, VK_IMAGE_ASPECT_STENCIL_BIT); - attachments[i].after = - dzn_image_layout_to_state(attachment->finalLayout, VK_IMAGE_ASPECT_STENCIL_BIT); - attachments[i].last = attachments[i].before; - } else { - assert(vk_format_is_color(attachment->format)); - attachments[i].before = - dzn_image_layout_to_state(attachment->initialLayout, VK_IMAGE_ASPECT_COLOR_BIT); - attachments[i].after = - dzn_image_layout_to_state(attachment->finalLayout, VK_IMAGE_ASPECT_COLOR_BIT); - attachments[i].last = attachments[i].before; - } - } - - assert(subpasses); - for (uint32_t i = 0; i < pass->subpass_count; i++) { - const VkSubpassDescription2 *subpass = &pCreateInfo->pSubpasses[i]; - const VkSubpassDescription2 *subpass_after = NULL; - - if (i + 1 < pass->subpass_count) - subpass_after = &pCreateInfo->pSubpasses[i + 1]; - - for (uint32_t j = 0; j < subpass->colorAttachmentCount; j++) { - uint32_t idx = subpass->pColorAttachments[j].attachment; - subpasses[i].colors[j].idx = idx; - if (idx != VK_ATTACHMENT_UNUSED) { - subpasses[i].colors[j].aspects = VK_IMAGE_ASPECT_COLOR_BIT; - subpasses[i].colors[j].before = attachments[idx].last; - subpasses[i].colors[j].during = - dzn_image_layout_to_state(subpass->pColorAttachments[j].layout, - VK_IMAGE_ASPECT_COLOR_BIT); - attachments[idx].last = subpasses[i].colors[j].during; - attachments[idx].aspects |= VK_IMAGE_ASPECT_COLOR_BIT; - subpasses[i].color_count = j + 1; - } - - idx = subpass->pResolveAttachments ? - subpass->pResolveAttachments[j].attachment : - VK_ATTACHMENT_UNUSED; - subpasses[i].resolve[j].idx = idx; - if (idx != VK_ATTACHMENT_UNUSED) { - subpasses[i].resolve[j].aspects = VK_IMAGE_ASPECT_COLOR_BIT; - subpasses[i].resolve[j].before = attachments[idx].last; - subpasses[i].resolve[j].during = - dzn_image_layout_to_state(subpass->pResolveAttachments[j].layout, - VK_IMAGE_ASPECT_COLOR_BIT); - attachments[idx].last = subpasses[i].resolve[j].during; - attachments[idx].aspects |= VK_IMAGE_ASPECT_COLOR_BIT; - } - } - - subpasses[i].zs.idx = VK_ATTACHMENT_UNUSED; - if (subpass->pDepthStencilAttachment) { - uint32_t idx = subpass->pDepthStencilAttachment->attachment; - subpasses[i].zs.idx = idx; - if (idx != VK_ATTACHMENT_UNUSED) { - subpasses[i].zs.aspects = vk_format_aspects(attachments[idx].format); - subpasses[i].zs.before = attachments[idx].last; - subpasses[i].zs.during = attachments[idx].last; - subpasses[i].zs.stencil.before = attachments[idx].stencil.last; - subpasses[i].zs.stencil.during = attachments[idx].stencil.last; - - if (subpasses[i].zs.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { - subpasses[i].zs.stencil.during = - dzn_image_layout_to_state(subpass->pDepthStencilAttachment->layout, - VK_IMAGE_ASPECT_STENCIL_BIT); - } - - if (subpasses[i].zs.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { - subpasses[i].zs.during = - dzn_image_layout_to_state(subpass->pDepthStencilAttachment->layout, - VK_IMAGE_ASPECT_DEPTH_BIT); - } - - attachments[idx].last = subpasses[i].zs.during; - attachments[idx].stencil.last = subpasses[i].zs.stencil.during; - attachments[idx].aspects |= subpasses[i].zs.aspects; - } - } - - subpasses[i].input_count = subpass->inputAttachmentCount; - for (uint32_t j = 0; j < subpasses[i].input_count; j++) { - uint32_t idx = subpass->pInputAttachments[j].attachment; - subpasses[i].inputs[j].idx = idx; - if (idx != VK_ATTACHMENT_UNUSED) { - subpasses[i].inputs[j].aspects = subpass->pInputAttachments[j].aspectMask; - subpasses[i].inputs[j].before = attachments[idx].last; - subpasses[i].inputs[j].during = attachments[idx].last; - subpasses[i].inputs[j].stencil.before = attachments[idx].stencil.last; - subpasses[i].inputs[j].stencil.during = attachments[idx].stencil.last; - - if (subpasses[i].inputs[j].aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { - subpasses[i].inputs[j].stencil.during = - dzn_image_layout_to_state(subpass->pInputAttachments[j].layout, - VK_IMAGE_ASPECT_STENCIL_BIT); - } - - if (subpasses[i].inputs[j].aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { - subpasses[i].inputs[j].during = - dzn_image_layout_to_state(subpass->pInputAttachments[j].layout, - VK_IMAGE_ASPECT_DEPTH_BIT); - attachments[idx].last = subpasses[i].inputs[j].during; - } else if (subpasses[i].inputs[j].aspects == VK_IMAGE_ASPECT_COLOR_BIT) { - subpasses[i].inputs[j].during = - dzn_image_layout_to_state(subpass->pInputAttachments[j].layout, - VK_IMAGE_ASPECT_COLOR_BIT); - } - - attachments[idx].last = subpasses[i].inputs[j].during; - attachments[idx].stencil.last = subpasses[i].inputs[j].stencil.during; - attachments[idx].aspects |= subpass->pInputAttachments[j].aspectMask; - } - } - } - - *pRenderPass = dzn_render_pass_to_handle(pass); - return VK_SUCCESS; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyRenderPass(VkDevice dev, - VkRenderPass p, - const VkAllocationCallbacks *pAllocator) -{ - VK_FROM_HANDLE(dzn_device, device, dev); - VK_FROM_HANDLE(dzn_render_pass, pass, p); - - if (!pass) - return; - - vk_object_base_finish(&pass->base); - vk_free2(&device->vk.alloc, pAllocator, pass); -} - - -VKAPI_ATTR void VKAPI_CALL -dzn_GetRenderAreaGranularity(VkDevice device, - VkRenderPass pass, - VkExtent2D *pGranularity) -{ - // FIXME: query the actual optimal granularity - pGranularity->width = pGranularity->height = 1; -} diff --git a/src/microsoft/vulkan/dzn_pipeline.c b/src/microsoft/vulkan/dzn_pipeline.c new file mode 100644 index 00000000000..11fd1a54b75 --- /dev/null +++ b/src/microsoft/vulkan/dzn_pipeline.c @@ -0,0 +1,1195 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "spirv_to_dxil.h" + +#include "dxil_validator.h" + +#include "vk_alloc.h" +#include "vk_util.h" +#include "vk_format.h" + +#include "util/u_debug.h" + +static dxil_spirv_shader_stage +to_dxil_shader_stage(VkShaderStageFlagBits in) +{ + switch (in) { + case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX; + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL; + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL; + case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY; + case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT; + case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE; + default: unreachable("Unsupported stage"); + } +} + +static VkResult +dzn_pipeline_compile_shader(struct dzn_device *device, + const VkAllocationCallbacks *alloc, + struct dzn_pipeline_layout *layout, + const VkPipelineShaderStageCreateInfo *stage_info, + enum dxil_spirv_yz_flip_mode yz_flip_mode, + uint16_t y_flip_mask, uint16_t z_flip_mask, + bool force_sample_rate_shading, + D3D12_SHADER_BYTECODE *slot) +{ + struct dzn_instance *instance = + container_of(device->vk.physical->instance, struct dzn_instance, vk); + const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo; + VK_FROM_HANDLE(vk_shader_module, module, stage_info->module); + struct dxil_spirv_object dxil_object; + + /* convert VkSpecializationInfo */ + struct dxil_spirv_specialization *spec = NULL; + uint32_t num_spec = 0; + + if (spec_info && spec_info->mapEntryCount) { + spec = (struct dxil_spirv_specialization *) + vk_alloc2(&device->vk.alloc, alloc, + spec_info->mapEntryCount * sizeof(*spec), 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!spec) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) { + const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i]; + const uint8_t *data = (const uint8_t *)spec_info->pData + entry->offset; + assert(data + entry->size <= (const uint8_t *)spec_info->pData + spec_info->dataSize); + spec[i].id = entry->constantID; + switch (entry->size) { + case 8: + spec[i].value.u64 = *(const uint64_t *)data; + break; + case 4: + spec[i].value.u32 = *(const uint32_t *)data; + break; + case 2: + spec[i].value.u16 = *(const uint16_t *)data; + break; + case 1: + spec[i].value.u8 = *(const uint8_t *)data; + break; + default: + assert(!"Invalid spec constant size"); + break; + } + + spec[i].defined_on_module = false; + } + + num_spec = spec_info->mapEntryCount; + } + + struct dxil_spirv_runtime_conf conf = { + .runtime_data_cbv = { + .register_space = DZN_REGISTER_SPACE_SYSVALS, + .base_shader_register = 0, + }, + .push_constant_cbv = { + .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT, + .base_shader_register = 0, + }, + .descriptor_set_count = layout->set_count, + .descriptor_sets = layout->binding_translation, + .zero_based_vertex_instance_id = false, + .yz_flip = { + .mode = yz_flip_mode, + .y_mask = y_flip_mask, + .z_mask = z_flip_mask, + }, + .read_only_images_as_srvs = true, + .force_sample_rate_shading = force_sample_rate_shading, + }; + + struct dxil_spirv_debug_options dbg_opts = { + .dump_nir = !!(instance->debug_flags & DZN_DEBUG_NIR), + }; + + /* TODO: Extend spirv_to_dxil() to allow passing a custom allocator */ + bool success = + spirv_to_dxil((uint32_t *)module->data, module->size / sizeof(uint32_t), + spec, num_spec, + to_dxil_shader_stage(stage_info->stage), + stage_info->pName, &dbg_opts, &conf, &dxil_object); + + vk_free2(&device->vk.alloc, alloc, spec); + + if (!success) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + char *err; + bool res = dxil_validate_module(instance->dxil_validator, + dxil_object.binary.buffer, + dxil_object.binary.size, &err); + + if (instance->debug_flags & DZN_DEBUG_DXIL) { + char *disasm = dxil_disasm_module(instance->dxil_validator, + dxil_object.binary.buffer, + dxil_object.binary.size); + if (disasm) { + fprintf(stderr, + "== BEGIN SHADER ============================================\n" + "%s\n" + "== END SHADER ==============================================\n", + disasm); + ralloc_free(disasm); + } + } + + if (!res) { + if (err) { + fprintf(stderr, + "== VALIDATION ERROR =============================================\n" + "%s\n" + "== END ==========================================================\n", + err); + ralloc_free(err); + } + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + slot->pShaderBytecode = dxil_object.binary.buffer; + slot->BytecodeLength = dxil_object.binary.size; + return VK_SUCCESS; +} + +static D3D12_SHADER_BYTECODE * +dzn_pipeline_get_gfx_shader_slot(D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, + VkShaderStageFlagBits in) +{ + switch (in) { + case VK_SHADER_STAGE_VERTEX_BIT: return &desc->VS; + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return &desc->DS; + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return &desc->HS; + case VK_SHADER_STAGE_GEOMETRY_BIT: return &desc->GS; + case VK_SHADER_STAGE_FRAGMENT_BIT: return &desc->PS; + default: unreachable("Unsupported stage"); + } +} + +static VkResult +dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline *pipeline, + const VkAllocationCallbacks *alloc, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in, + D3D12_INPUT_ELEMENT_DESC **input_elems) +{ + struct dzn_device *device = + container_of(pipeline->base.base.device, struct dzn_device, vk); + const VkPipelineVertexInputStateCreateInfo *in_vi = + in->pVertexInputState; + + if (!in_vi->vertexAttributeDescriptionCount) { + out->InputLayout.pInputElementDescs = NULL; + out->InputLayout.NumElements = 0; + *input_elems = NULL; + return VK_SUCCESS; + } + + *input_elems = (D3D12_INPUT_ELEMENT_DESC *) + vk_alloc2(&device->vk.alloc, alloc, + sizeof(**input_elems) * in_vi->vertexAttributeDescriptionCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!*input_elems) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + D3D12_INPUT_ELEMENT_DESC *inputs = *input_elems; + D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS]; + + pipeline->vb.count = 0; + for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) { + const struct VkVertexInputBindingDescription *bdesc = + &in_vi->pVertexBindingDescriptions[i]; + + pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1); + pipeline->vb.strides[bdesc->binding] = bdesc->stride; + if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) { + slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; + } else { + assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX); + slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + } + } + + for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *attr = + &in_vi->pVertexAttributeDescriptions[i]; + + /* nir_to_dxil() name all vertex inputs as TEXCOORDx */ + inputs[i].SemanticName = "TEXCOORD"; + inputs[i].SemanticIndex = attr->location; + inputs[i].Format = dzn_buffer_get_dxgi_format(attr->format); + inputs[i].InputSlot = attr->binding; + inputs[i].InputSlotClass = slot_class[attr->binding]; + inputs[i].InstanceDataStepRate = + inputs[i].InputSlotClass == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0; + inputs[i].AlignedByteOffset = attr->offset; + } + + out->InputLayout.pInputElementDescs = inputs; + out->InputLayout.NumElements = in_vi->vertexAttributeDescriptionCount; + return VK_SUCCESS; +} + +static D3D12_PRIMITIVE_TOPOLOGY_TYPE +to_prim_topology_type(VkPrimitiveTopology in) +{ + switch (in) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH; + default: unreachable("Invalid primitive topology"); + } +} + +static D3D12_PRIMITIVE_TOPOLOGY +to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points) +{ + switch (in) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + /* Triangle fans are emulated using an intermediate index buffer. */ + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ; + case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: + assert(patch_control_points); + return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1); + default: unreachable("Invalid primitive topology"); + } +} + +static void +dzn_graphics_pipeline_translate_ia(struct dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineInputAssemblyStateCreateInfo *in_ia = + in->pInputAssemblyState; + const VkPipelineTessellationStateCreateInfo *in_tes = + (out->DS.pShaderBytecode && out->HS.pShaderBytecode) ? + in->pTessellationState : NULL; + + out->PrimitiveTopologyType = to_prim_topology_type(in_ia->topology); + pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + pipeline->ia.topology = + to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0); + + /* FIXME: does that work for u16 index buffers? */ + if (in_ia->primitiveRestartEnable) + out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF; + else + out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; +} + +static D3D12_FILL_MODE +translate_polygon_mode(VkPolygonMode in) +{ + switch (in) { + case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID; + case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME; + default: unreachable("Unsupported polygon mode"); + } +} + +static D3D12_CULL_MODE +translate_cull_mode(VkCullModeFlags in) +{ + switch (in) { + case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE; + case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT; + case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK; + /* Front+back face culling is equivalent to 'rasterization disabled' */ + case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE; + default: unreachable("Unsupported cull mode"); + } +} + +static void +dzn_graphics_pipeline_translate_rast(struct dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineRasterizationStateCreateInfo *in_rast = + in->pRasterizationState; + const VkPipelineViewportStateCreateInfo *in_vp = + in_rast->rasterizerDiscardEnable ? NULL : in->pViewportState; + + if (in_vp) { + pipeline->vp.count = in_vp->viewportCount; + if (in_vp->pViewports) { + for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++) + dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]); + } + + pipeline->scissor.count = in_vp->scissorCount; + if (in_vp->pScissors) { + for (uint32_t i = 0; i < in_vp->scissorCount; i++) + dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]); + } + } + + out->RasterizerState.DepthClipEnable = !in_rast->depthClampEnable; + out->RasterizerState.FillMode = translate_polygon_mode(in_rast->polygonMode); + out->RasterizerState.CullMode = translate_cull_mode(in_rast->cullMode); + out->RasterizerState.FrontCounterClockwise = + in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE; + if (in_rast->depthBiasEnable) { + out->RasterizerState.DepthBias = in_rast->depthBiasConstantFactor; + out->RasterizerState.SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor; + out->RasterizerState.DepthBiasClamp = in_rast->depthBiasClamp; + } + + assert(in_rast->lineWidth == 1.0f); +} + +static void +dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineRasterizationStateCreateInfo *in_rast = + in->pRasterizationState; + const VkPipelineMultisampleStateCreateInfo *in_ms = + in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState; + + /* TODO: minSampleShading (use VRS), alphaToOneEnable */ + out->SampleDesc.Count = in_ms ? in_ms->rasterizationSamples : 1; + out->SampleDesc.Quality = 0; + out->SampleMask = in_ms && in_ms->pSampleMask ? + *in_ms->pSampleMask : + (1 << out->SampleDesc.Count) - 1; +} + +static D3D12_STENCIL_OP +translate_stencil_op(VkStencilOp in) +{ + switch (in) { + case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP; + case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO; + case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE; + case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT; + case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT; + case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR; + case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR; + case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT; + default: unreachable("Invalid stencil op"); + } +} + +static void +translate_stencil_test(struct dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineDepthStencilStateCreateInfo *in_zsa = + in->pDepthStencilState; + + bool front_test_uses_ref = + !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) && + in_zsa->front.compareOp != VK_COMPARE_OP_NEVER && + in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS && + (pipeline->zsa.stencil_test.dynamic_compare_mask || + in_zsa->front.compareMask != 0); + bool back_test_uses_ref = + !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) && + in_zsa->back.compareOp != VK_COMPARE_OP_NEVER && + in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS && + (pipeline->zsa.stencil_test.dynamic_compare_mask || + in_zsa->back.compareMask != 0); + + if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask) + pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX; + else if (front_test_uses_ref) + pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask; + else + pipeline->zsa.stencil_test.front.compare_mask = 0; + + if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask) + pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX; + else if (back_test_uses_ref) + pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask; + else + pipeline->zsa.stencil_test.back.compare_mask = 0; + + bool diff_wr_mask = + in->pRasterizationState->cullMode == VK_CULL_MODE_NONE && + (pipeline->zsa.stencil_test.dynamic_write_mask || + in_zsa->back.writeMask != in_zsa->front.writeMask); + bool diff_ref = + in->pRasterizationState->cullMode == VK_CULL_MODE_NONE && + (pipeline->zsa.stencil_test.dynamic_ref || + in_zsa->back.reference != in_zsa->front.reference); + bool diff_cmp_mask = + back_test_uses_ref && front_test_uses_ref && + (pipeline->zsa.stencil_test.dynamic_compare_mask || + pipeline->zsa.stencil_test.front.compare_mask != pipeline->zsa.stencil_test.back.compare_mask); + + if (diff_cmp_mask || diff_wr_mask) + pipeline->zsa.stencil_test.independent_front_back = true; + + bool back_wr_uses_ref = + !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) && + (in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS && + in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) || + (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER && + (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) && + in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) || + (in_zsa->depthTestEnable && + in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS && + in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE); + bool front_wr_uses_ref = + !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) && + (in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS && + in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) || + (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER && + (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) && + in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) || + (in_zsa->depthTestEnable && + in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS && + in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE); + + pipeline->zsa.stencil_test.front.write_mask = + (pipeline->zsa.stencil_test.dynamic_write_mask || + (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ? + 0 : in_zsa->front.writeMask; + pipeline->zsa.stencil_test.back.write_mask = + (pipeline->zsa.stencil_test.dynamic_write_mask || + (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ? + 0 : in_zsa->back.writeMask; + + pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref; + pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref; + + if (diff_ref && + pipeline->zsa.stencil_test.front.uses_ref && + pipeline->zsa.stencil_test.back.uses_ref) + pipeline->zsa.stencil_test.independent_front_back = true; + + pipeline->zsa.stencil_test.front.ref = + pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference; + pipeline->zsa.stencil_test.back.ref = + pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference; + + /* FIXME: We don't support independent {compare,write}_mask and stencil + * reference. Until we have proper support for independent front/back + * stencil test, let's prioritize the front setup when both are active. + */ + out->DepthStencilState.StencilReadMask = + front_test_uses_ref ? + pipeline->zsa.stencil_test.front.compare_mask : + back_test_uses_ref ? + pipeline->zsa.stencil_test.back.compare_mask : 0; + out->DepthStencilState.StencilWriteMask = + pipeline->zsa.stencil_test.front.write_mask ? + pipeline->zsa.stencil_test.front.write_mask : + pipeline->zsa.stencil_test.back.write_mask; + + assert(!pipeline->zsa.stencil_test.independent_front_back); +} + +static void +dzn_graphics_pipeline_translate_zsa(struct dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineRasterizationStateCreateInfo *in_rast = + in->pRasterizationState; + const VkPipelineDepthStencilStateCreateInfo *in_zsa = + in_rast->rasterizerDiscardEnable ? NULL : in->pDepthStencilState; + + if (!in_zsa) + return; + + /* TODO: depthBoundsTestEnable */ + + out->DepthStencilState.DepthEnable = in_zsa->depthTestEnable; + out->DepthStencilState.DepthWriteMask = + in_zsa->depthWriteEnable ? + D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + out->DepthStencilState.DepthFunc = + dzn_translate_compare_op(in_zsa->depthCompareOp); + out->DepthStencilState.StencilEnable = in_zsa->stencilTestEnable; + if (in_zsa->stencilTestEnable) { + out->DepthStencilState.FrontFace.StencilFailOp = + translate_stencil_op(in_zsa->front.failOp); + out->DepthStencilState.FrontFace.StencilDepthFailOp = + translate_stencil_op(in_zsa->front.depthFailOp); + out->DepthStencilState.FrontFace.StencilPassOp = + translate_stencil_op(in_zsa->front.passOp); + out->DepthStencilState.FrontFace.StencilFunc = + dzn_translate_compare_op(in_zsa->front.compareOp); + out->DepthStencilState.BackFace.StencilFailOp = + translate_stencil_op(in_zsa->back.failOp); + out->DepthStencilState.BackFace.StencilDepthFailOp = + translate_stencil_op(in_zsa->back.depthFailOp); + out->DepthStencilState.BackFace.StencilPassOp = + translate_stencil_op(in_zsa->back.passOp); + out->DepthStencilState.BackFace.StencilFunc = + dzn_translate_compare_op(in_zsa->back.compareOp); + + pipeline->zsa.stencil_test.enable = true; + + translate_stencil_test(pipeline, out, in); + } +} + +static D3D12_BLEND +translate_blend_factor(VkBlendFactor in, bool is_alpha) +{ + switch (in) { + case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO; + case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE; + case VK_BLEND_FACTOR_SRC_COLOR: + return is_alpha ? D3D12_BLEND_SRC_ALPHA : D3D12_BLEND_SRC_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + return is_alpha ? D3D12_BLEND_INV_SRC_ALPHA : D3D12_BLEND_INV_SRC_COLOR; + case VK_BLEND_FACTOR_DST_COLOR: + return is_alpha ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_DEST_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + return is_alpha ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_INV_DEST_COLOR; + case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; + case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; + /* FIXME: no way to isolate the alpla and color constants */ + case VK_BLEND_FACTOR_CONSTANT_COLOR: + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + return D3D12_BLEND_BLEND_FACTOR; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + return D3D12_BLEND_INV_BLEND_FACTOR; + case VK_BLEND_FACTOR_SRC1_COLOR: + return is_alpha ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC1_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: + return is_alpha ? D3D12_BLEND_INV_SRC1_ALPHA : D3D12_BLEND_INV_SRC1_COLOR; + case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA; + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; + default: unreachable("Invalid blend factor"); + } +} + +static D3D12_BLEND_OP +translate_blend_op(VkBlendOp in) +{ + switch (in) { + case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD; + case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT; + case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; + case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN; + case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX; + default: unreachable("Invalid blend op"); + } +} + +static D3D12_LOGIC_OP +translate_logic_op(VkLogicOp in) +{ + switch (in) { + case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR; + case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND; + case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE; + case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY; + case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED; + case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP; + case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR; + case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR; + case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR; + case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV; + case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT; + case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE; + case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED; + case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED; + case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND; + case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET; + default: unreachable("Invalid logic op"); + } +} + +static void +dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineRasterizationStateCreateInfo *in_rast = + in->pRasterizationState; + const VkPipelineColorBlendStateCreateInfo *in_blend = + in_rast->rasterizerDiscardEnable ? NULL : in->pColorBlendState; + const VkPipelineMultisampleStateCreateInfo *in_ms = + in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState; + + if (!in_blend || !in_ms) + return; + + D3D12_LOGIC_OP logicop = + in_blend->logicOpEnable ? + translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP; + out->BlendState.AlphaToCoverageEnable = in_ms->alphaToCoverageEnable; + memcpy(pipeline->blend.constants, in_blend->blendConstants, + sizeof(pipeline->blend.constants)); + + for (uint32_t i = 0; i < in_blend->attachmentCount; i++) { + if (i > 0 && + !memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i], + sizeof(*in_blend->pAttachments))) + out->BlendState.IndependentBlendEnable = true; + + out->BlendState.RenderTarget[i].BlendEnable = + in_blend->pAttachments[i].blendEnable; + in_blend->logicOpEnable; + out->BlendState.RenderTarget[i].RenderTargetWriteMask = + in_blend->pAttachments[i].colorWriteMask; + + if (in_blend->logicOpEnable) { + out->BlendState.RenderTarget[i].LogicOpEnable = true; + out->BlendState.RenderTarget[i].LogicOp = logicop; + } else { + out->BlendState.RenderTarget[i].SrcBlend = + translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor, false); + out->BlendState.RenderTarget[i].DestBlend = + translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor, false); + out->BlendState.RenderTarget[i].BlendOp = + translate_blend_op(in_blend->pAttachments[i].colorBlendOp); + out->BlendState.RenderTarget[i].SrcBlendAlpha = + translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor, true); + out->BlendState.RenderTarget[i].DestBlendAlpha = + translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor, true); + out->BlendState.RenderTarget[i].BlendOpAlpha = + translate_blend_op(in_blend->pAttachments[i].alphaBlendOp); + } + } +} + + +static void +dzn_pipeline_init(struct dzn_pipeline *pipeline, + struct dzn_device *device, + VkPipelineBindPoint type, + struct dzn_pipeline_layout *layout) +{ + pipeline->type = type; + pipeline->root.sets_param_count = layout->root.sets_param_count; + pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx; + pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx; + STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type)); + memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type)); + pipeline->root.sig = layout->root.sig; + ID3D12RootSignature_AddRef(pipeline->root.sig); + + STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count)); + memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count)); + + STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets)); + memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets)); + vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); +} + +static void +dzn_pipeline_finish(struct dzn_pipeline *pipeline) +{ + if (pipeline->state) + ID3D12PipelineState_Release(pipeline->state); + if (pipeline->root.sig) + ID3D12RootSignature_Release(pipeline->root.sig); + + vk_object_base_finish(&pipeline->base); +} + +static void +dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline *pipeline, + const VkAllocationCallbacks *alloc) +{ + if (!pipeline) + return; + + for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) { + if (pipeline->indirect_cmd_sigs[i]) + ID3D12CommandSignature_Release(pipeline->indirect_cmd_sigs[i]); + } + + dzn_pipeline_finish(&pipeline->base); + vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline); +} + +static VkResult +dzn_graphics_pipeline_create(struct dzn_device *device, + VkPipelineCache cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *out) +{ + VK_FROM_HANDLE(dzn_render_pass, pass, pCreateInfo->renderPass); + VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout); + const struct dzn_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + uint32_t stage_mask = 0; + VkResult ret; + HRESULT hres = 0; + + struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pipeline) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + dzn_pipeline_init(&pipeline->base, device, + VK_PIPELINE_BIND_POINT_GRAPHICS, + layout); + D3D12_INPUT_ELEMENT_DESC *inputs = NULL; + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = { + .pRootSignature = pipeline->base.root.sig, + .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + const VkPipelineViewportStateCreateInfo *vp_info = + pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? + NULL : pCreateInfo->pViewportState; + + + ret = dzn_graphics_pipeline_translate_vi(pipeline, pAllocator, &desc, pCreateInfo, &inputs); + if (ret != VK_SUCCESS) + goto out; + + if (pCreateInfo->pDynamicState) { + for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) { + switch (pCreateInfo->pDynamicState->pDynamicStates[i]) { + case VK_DYNAMIC_STATE_VIEWPORT: + pipeline->vp.dynamic = true; + break; + case VK_DYNAMIC_STATE_SCISSOR: + pipeline->scissor.dynamic = true; + break; + case VK_DYNAMIC_STATE_STENCIL_REFERENCE: + pipeline->zsa.stencil_test.dynamic_ref = true; + break; + case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: + pipeline->zsa.stencil_test.dynamic_compare_mask = true; + break; + case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: + pipeline->zsa.stencil_test.dynamic_write_mask = true; + break; + case VK_DYNAMIC_STATE_BLEND_CONSTANTS: + pipeline->blend.dynamic_constants = true; + break; + default: unreachable("Unsupported dynamic state"); + } + } + } + + dzn_graphics_pipeline_translate_ia(pipeline, &desc, pCreateInfo); + dzn_graphics_pipeline_translate_rast(pipeline, &desc, pCreateInfo); + dzn_graphics_pipeline_translate_ms(pipeline, &desc, pCreateInfo); + dzn_graphics_pipeline_translate_zsa(pipeline, &desc, pCreateInfo); + dzn_graphics_pipeline_translate_blend(pipeline, &desc, pCreateInfo); + + desc.NumRenderTargets = subpass->color_count; + for (uint32_t i = 0; i < subpass->color_count; i++) { + uint32_t idx = subpass->colors[i].idx; + + if (idx == VK_ATTACHMENT_UNUSED) continue; + + const struct dzn_attachment *attachment = &pass->attachments[idx]; + + desc.RTVFormats[i] = + dzn_image_get_dxgi_format(attachment->format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT); + } + + if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) { + const struct dzn_attachment *attachment = + &pass->attachments[subpass->zs.idx]; + + desc.DSVFormat = + dzn_image_get_dxgi_format(attachment->format, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT); + } + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) + stage_mask |= pCreateInfo->pStages[i].stage; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT && + pCreateInfo->pRasterizationState && + (pCreateInfo->pRasterizationState->rasterizerDiscardEnable || + pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) { + /* Disable rasterization (AKA leave fragment shader NULL) when + * front+back culling or discard is set. + */ + continue; + } + + D3D12_SHADER_BYTECODE *slot = + dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage); + enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE; + uint16_t y_flip_mask = 0, z_flip_mask = 0; + + if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT || + (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_VERTEX_BIT && + !(stage_mask & VK_SHADER_STAGE_GEOMETRY_BIT))) { + if (pipeline->vp.dynamic) { + yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL; + } else if (vp_info) { + for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) { + if (vp_info->pViewports[i].height > 0) + y_flip_mask |= BITFIELD_BIT(i); + + if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth) + z_flip_mask |= BITFIELD_BIT(i); + } + + if (y_flip_mask && z_flip_mask) + yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL; + else if (z_flip_mask) + yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL; + else if (y_flip_mask) + yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL; + } + } + + bool force_sample_rate_shading = + pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT && + pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->sampleShadingEnable; + + ret = dzn_pipeline_compile_shader(device, pAllocator, + layout, &pCreateInfo->pStages[i], + yz_flip_mode, y_flip_mask, z_flip_mask, + force_sample_rate_shading, slot); + if (ret != VK_SUCCESS) + goto out; + } + + + hres = ID3D12Device1_CreateGraphicsPipelineState(device->dev, &desc, + &IID_ID3D12PipelineState, + &pipeline->base.state); + if (FAILED(hres)) { + ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto out; + } + + ret = VK_SUCCESS; + +out: + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + D3D12_SHADER_BYTECODE *slot = + dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage); + free((void *)slot->pShaderBytecode); + } + + vk_free2(&device->vk.alloc, pAllocator, inputs); + if (ret != VK_SUCCESS) + dzn_graphics_pipeline_destroy(pipeline, pAllocator); + else + *out = dzn_graphics_pipeline_to_handle(pipeline); + + return ret; +} + +#define DZN_INDIRECT_CMD_SIG_MAX_ARGS 3 + +ID3D12CommandSignature * +dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline *pipeline, + enum dzn_indirect_draw_cmd_sig_type type) +{ + assert(type < DZN_NUM_INDIRECT_DRAW_CMD_SIGS); + + struct dzn_device *device = + container_of(pipeline->base.base.device, struct dzn_device, vk); + ID3D12CommandSignature *cmdsig = pipeline->indirect_cmd_sigs[type]; + + if (cmdsig) + return cmdsig; + + bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG; + bool indexed = type == DZN_INDIRECT_INDEXED_DRAW_CMD_SIG || triangle_fan; + + uint32_t cmd_arg_count = 0; + D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS]; + + if (triangle_fan) { + cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW, + }; + } + + cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, + .Constant = { + .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx, + .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4, + .Num32BitValuesToSet = 2, + }, + }; + + cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) { + .Type = indexed ? + D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED : + D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, + }; + + assert(cmd_arg_count <= ARRAY_SIZE(cmd_args)); + assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0); + + D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = { + .ByteStride = + triangle_fan ? + sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) : + sizeof(struct dzn_indirect_draw_exec_params), + .NumArgumentDescs = cmd_arg_count, + .pArgumentDescs = cmd_args, + }; + HRESULT hres = + ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc, + pipeline->base.root.sig, + &IID_ID3D12CommandSignature, + &cmdsig); + if (FAILED(hres)) + return NULL; + + pipeline->indirect_cmd_sigs[type] = cmdsig; + return cmdsig; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateGraphicsPipelines(VkDevice dev, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VkResult result = VK_SUCCESS; + + unsigned i; + for (i = 0; i < count; i++) { + result = dzn_graphics_pipeline_create(device, + pipelineCache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (result != VK_SUCCESS) { + pPipelines[i] = VK_NULL_HANDLE; + + /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it + * is not obvious what error should be report upon 2 different failures. + */ + if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT) + break; + + if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) + break; + } + } + + for (; i < count; i++) + pPipelines[i] = VK_NULL_HANDLE; + + return result; +} + +static void +dzn_compute_pipeline_destroy(struct dzn_compute_pipeline *pipeline, + const VkAllocationCallbacks *alloc) +{ + if (!pipeline) + return; + + if (pipeline->indirect_cmd_sig) + ID3D12CommandSignature_Release(pipeline->indirect_cmd_sig); + + dzn_pipeline_finish(&pipeline->base); + vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline); +} + +static VkResult +dzn_compute_pipeline_create(struct dzn_device *device, + VkPipelineCache cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *out) +{ + VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout); + + struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pipeline) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + dzn_pipeline_init(&pipeline->base, device, + VK_PIPELINE_BIND_POINT_COMPUTE, + layout); + + D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { + .pRootSignature = pipeline->base.root.sig, + .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + VkResult ret = + dzn_pipeline_compile_shader(device, pAllocator, layout, + &pCreateInfo->stage, + DXIL_SPIRV_YZ_FLIP_NONE, 0, 0, + false, &desc.CS); + if (ret != VK_SUCCESS) + goto out; + + if (FAILED(ID3D12Device1_CreateComputePipelineState(device->dev, &desc, + &IID_ID3D12PipelineState, + &pipeline->base.state))) { + ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto out; + } + +out: + free((void *)desc.CS.pShaderBytecode); + if (ret != VK_SUCCESS) + dzn_compute_pipeline_destroy(pipeline, pAllocator); + else + *out = dzn_compute_pipeline_to_handle(pipeline); + + return ret; +} + +ID3D12CommandSignature * +dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline *pipeline) +{ + if (pipeline->indirect_cmd_sig) + return pipeline->indirect_cmd_sig; + + struct dzn_device *device = + container_of(pipeline->base.base.device, struct dzn_device, vk); + + D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = { + { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, + .Constant = { + .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx, + .DestOffsetIn32BitValues = 0, + .Num32BitValuesToSet = 3, + }, + }, + { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, + }, + }; + + D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = { + .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2, + .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args), + .pArgumentDescs = indirect_dispatch_args, + }; + + HRESULT hres = + ID3D12Device1_CreateCommandSignature(device->dev, &indirect_dispatch_desc, + pipeline->base.root.sig, + &IID_ID3D12CommandSignature, + &pipeline->indirect_cmd_sig); + if (FAILED(hres)) + return NULL; + + return pipeline->indirect_cmd_sig; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateComputePipelines(VkDevice dev, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VkResult result = VK_SUCCESS; + + unsigned i; + for (i = 0; i < count; i++) { + result = dzn_compute_pipeline_create(device, + pipelineCache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (result != VK_SUCCESS) { + pPipelines[i] = VK_NULL_HANDLE; + + /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it + * is not obvious what error should be report upon 2 different failures. + */ + if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT) + break; + + if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) + break; + } + } + + for (; i < count; i++) + pPipelines[i] = VK_NULL_HANDLE; + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyPipeline(VkDevice device, + VkPipeline pipeline, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline); + + if (!pipe) + return; + + if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) { + struct dzn_graphics_pipeline *gfx = container_of(pipe, struct dzn_graphics_pipeline, base); + dzn_graphics_pipeline_destroy(gfx, pAllocator); + } else { + assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE); + struct dzn_compute_pipeline *compute = container_of(pipe, struct dzn_compute_pipeline, base); + dzn_compute_pipeline_destroy(compute, pAllocator); + } +} diff --git a/src/microsoft/vulkan/dzn_pipeline.cpp b/src/microsoft/vulkan/dzn_pipeline.cpp deleted file mode 100644 index c0726c5c6af..00000000000 --- a/src/microsoft/vulkan/dzn_pipeline.cpp +++ /dev/null @@ -1,1195 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" - -#include "spirv_to_dxil.h" - -#include "dxil_validator.h" - -#include "vk_alloc.h" -#include "vk_util.h" -#include "vk_format.h" - -#include "util/u_debug.h" - -static dxil_spirv_shader_stage -to_dxil_shader_stage(VkShaderStageFlagBits in) -{ - switch (in) { - case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX; - case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL; - case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL; - case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY; - case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT; - case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE; - default: unreachable("Unsupported stage"); - } -} - -static VkResult -dzn_pipeline_compile_shader(struct dzn_device *device, - const VkAllocationCallbacks *alloc, - struct dzn_pipeline_layout *layout, - const VkPipelineShaderStageCreateInfo *stage_info, - enum dxil_spirv_yz_flip_mode yz_flip_mode, - uint16_t y_flip_mask, uint16_t z_flip_mask, - bool force_sample_rate_shading, - D3D12_SHADER_BYTECODE *slot) -{ - struct dzn_instance *instance = - container_of(device->vk.physical->instance, struct dzn_instance, vk); - const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo; - VK_FROM_HANDLE(vk_shader_module, module, stage_info->module); - struct dxil_spirv_object dxil_object; - - /* convert VkSpecializationInfo */ - struct dxil_spirv_specialization *spec = NULL; - uint32_t num_spec = 0; - - if (spec_info && spec_info->mapEntryCount) { - spec = (struct dxil_spirv_specialization *) - vk_alloc2(&device->vk.alloc, alloc, - spec_info->mapEntryCount * sizeof(*spec), 8, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (!spec) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) { - const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i]; - const uint8_t *data = (const uint8_t *)spec_info->pData + entry->offset; - assert(data + entry->size <= (const uint8_t *)spec_info->pData + spec_info->dataSize); - spec[i].id = entry->constantID; - switch (entry->size) { - case 8: - spec[i].value.u64 = *(const uint64_t *)data; - break; - case 4: - spec[i].value.u32 = *(const uint32_t *)data; - break; - case 2: - spec[i].value.u16 = *(const uint16_t *)data; - break; - case 1: - spec[i].value.u8 = *(const uint8_t *)data; - break; - default: - assert(!"Invalid spec constant size"); - break; - } - - spec[i].defined_on_module = false; - } - - num_spec = spec_info->mapEntryCount; - } - - struct dxil_spirv_runtime_conf conf = { - .runtime_data_cbv = { - .register_space = DZN_REGISTER_SPACE_SYSVALS, - .base_shader_register = 0, - }, - .push_constant_cbv = { - .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT, - .base_shader_register = 0, - }, - .descriptor_set_count = layout->set_count, - .descriptor_sets = layout->binding_translation, - .zero_based_vertex_instance_id = false, - .yz_flip = { - .mode = yz_flip_mode, - .y_mask = y_flip_mask, - .z_mask = z_flip_mask, - }, - .read_only_images_as_srvs = true, - .force_sample_rate_shading = force_sample_rate_shading, - }; - - struct dxil_spirv_debug_options dbg_opts = { - .dump_nir = !!(instance->debug_flags & DZN_DEBUG_NIR), - }; - - /* TODO: Extend spirv_to_dxil() to allow passing a custom allocator */ - bool success = - spirv_to_dxil((uint32_t *)module->data, module->size / sizeof(uint32_t), - spec, num_spec, - to_dxil_shader_stage(stage_info->stage), - stage_info->pName, &dbg_opts, &conf, &dxil_object); - - vk_free2(&device->vk.alloc, alloc, spec); - - if (!success) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - char *err; - bool res = dxil_validate_module(instance->dxil_validator, - dxil_object.binary.buffer, - dxil_object.binary.size, &err); - - if (instance->debug_flags & DZN_DEBUG_DXIL) { - char *disasm = dxil_disasm_module(instance->dxil_validator, - dxil_object.binary.buffer, - dxil_object.binary.size); - if (disasm) { - fprintf(stderr, - "== BEGIN SHADER ============================================\n" - "%s\n" - "== END SHADER ==============================================\n", - disasm); - ralloc_free(disasm); - } - } - - if (!res) { - if (err) { - fprintf(stderr, - "== VALIDATION ERROR =============================================\n" - "%s\n" - "== END ==========================================================\n", - err); - ralloc_free(err); - } - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - slot->pShaderBytecode = dxil_object.binary.buffer; - slot->BytecodeLength = dxil_object.binary.size; - return VK_SUCCESS; -} - -static D3D12_SHADER_BYTECODE * -dzn_pipeline_get_gfx_shader_slot(D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, - VkShaderStageFlagBits in) -{ - switch (in) { - case VK_SHADER_STAGE_VERTEX_BIT: return &desc->VS; - case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return &desc->DS; - case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return &desc->HS; - case VK_SHADER_STAGE_GEOMETRY_BIT: return &desc->GS; - case VK_SHADER_STAGE_FRAGMENT_BIT: return &desc->PS; - default: unreachable("Unsupported stage"); - } -} - -static VkResult -dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline *pipeline, - const VkAllocationCallbacks *alloc, - D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, - const VkGraphicsPipelineCreateInfo *in, - D3D12_INPUT_ELEMENT_DESC **input_elems) -{ - struct dzn_device *device = - container_of(pipeline->base.base.device, struct dzn_device, vk); - const VkPipelineVertexInputStateCreateInfo *in_vi = - in->pVertexInputState; - - if (!in_vi->vertexAttributeDescriptionCount) { - out->InputLayout.pInputElementDescs = NULL; - out->InputLayout.NumElements = 0; - *input_elems = NULL; - return VK_SUCCESS; - } - - *input_elems = (D3D12_INPUT_ELEMENT_DESC *) - vk_alloc2(&device->vk.alloc, alloc, - sizeof(**input_elems) * in_vi->vertexAttributeDescriptionCount, 8, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (!*input_elems) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - D3D12_INPUT_ELEMENT_DESC *inputs = *input_elems; - D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS]; - - pipeline->vb.count = 0; - for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) { - const struct VkVertexInputBindingDescription *bdesc = - &in_vi->pVertexBindingDescriptions[i]; - - pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1); - pipeline->vb.strides[bdesc->binding] = bdesc->stride; - if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) { - slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; - } else { - assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX); - slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - } - } - - for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *attr = - &in_vi->pVertexAttributeDescriptions[i]; - - /* nir_to_dxil() name all vertex inputs as TEXCOORDx */ - inputs[i].SemanticName = "TEXCOORD"; - inputs[i].SemanticIndex = attr->location; - inputs[i].Format = dzn_buffer_get_dxgi_format(attr->format); - inputs[i].InputSlot = attr->binding; - inputs[i].InputSlotClass = slot_class[attr->binding]; - inputs[i].InstanceDataStepRate = - inputs[i].InputSlotClass == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0; - inputs[i].AlignedByteOffset = attr->offset; - } - - out->InputLayout.pInputElementDescs = inputs; - out->InputLayout.NumElements = in_vi->vertexAttributeDescriptionCount; - return VK_SUCCESS; -} - -static D3D12_PRIMITIVE_TOPOLOGY_TYPE -to_prim_topology_type(VkPrimitiveTopology in) -{ - switch (in) { - case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: - return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; - case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: - case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: - case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: - case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: - return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: - return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: - return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH; - default: unreachable("Invalid primitive topology"); - } -} - -static D3D12_PRIMITIVE_TOPOLOGY -to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points) -{ - switch (in) { - case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; - case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST; - case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; - case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; - case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; - /* Triangle fans are emulated using an intermediate index buffer. */ - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ; - case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: - assert(patch_control_points); - return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1); - default: unreachable("Invalid primitive topology"); - } -} - -static void -dzn_graphics_pipeline_translate_ia(struct dzn_graphics_pipeline *pipeline, - D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, - const VkGraphicsPipelineCreateInfo *in) -{ - const VkPipelineInputAssemblyStateCreateInfo *in_ia = - in->pInputAssemblyState; - const VkPipelineTessellationStateCreateInfo *in_tes = - (out->DS.pShaderBytecode && out->HS.pShaderBytecode) ? - in->pTessellationState : NULL; - - out->PrimitiveTopologyType = to_prim_topology_type(in_ia->topology); - pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; - pipeline->ia.topology = - to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0); - - /* FIXME: does that work for u16 index buffers? */ - if (in_ia->primitiveRestartEnable) - out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF; - else - out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; -} - -static D3D12_FILL_MODE -translate_polygon_mode(VkPolygonMode in) -{ - switch (in) { - case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID; - case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME; - default: unreachable("Unsupported polygon mode"); - } -} - -static D3D12_CULL_MODE -translate_cull_mode(VkCullModeFlags in) -{ - switch (in) { - case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE; - case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT; - case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK; - /* Front+back face culling is equivalent to 'rasterization disabled' */ - case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE; - default: unreachable("Unsupported cull mode"); - } -} - -static void -dzn_graphics_pipeline_translate_rast(struct dzn_graphics_pipeline *pipeline, - D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, - const VkGraphicsPipelineCreateInfo *in) -{ - const VkPipelineRasterizationStateCreateInfo *in_rast = - in->pRasterizationState; - const VkPipelineViewportStateCreateInfo *in_vp = - in_rast->rasterizerDiscardEnable ? NULL : in->pViewportState; - - if (in_vp) { - pipeline->vp.count = in_vp->viewportCount; - if (in_vp->pViewports) { - for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++) - dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]); - } - - pipeline->scissor.count = in_vp->scissorCount; - if (in_vp->pScissors) { - for (uint32_t i = 0; i < in_vp->scissorCount; i++) - dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]); - } - } - - out->RasterizerState.DepthClipEnable = !in_rast->depthClampEnable; - out->RasterizerState.FillMode = translate_polygon_mode(in_rast->polygonMode); - out->RasterizerState.CullMode = translate_cull_mode(in_rast->cullMode); - out->RasterizerState.FrontCounterClockwise = - in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE; - if (in_rast->depthBiasEnable) { - out->RasterizerState.DepthBias = in_rast->depthBiasConstantFactor; - out->RasterizerState.SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor; - out->RasterizerState.DepthBiasClamp = in_rast->depthBiasClamp; - } - - assert(in_rast->lineWidth == 1.0f); -} - -static void -dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline *pipeline, - D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, - const VkGraphicsPipelineCreateInfo *in) -{ - const VkPipelineRasterizationStateCreateInfo *in_rast = - in->pRasterizationState; - const VkPipelineMultisampleStateCreateInfo *in_ms = - in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState; - - /* TODO: minSampleShading (use VRS), alphaToOneEnable */ - out->SampleDesc.Count = in_ms ? in_ms->rasterizationSamples : 1; - out->SampleDesc.Quality = 0; - out->SampleMask = in_ms && in_ms->pSampleMask ? - *in_ms->pSampleMask : - (1 << out->SampleDesc.Count) - 1; -} - -static D3D12_STENCIL_OP -translate_stencil_op(VkStencilOp in) -{ - switch (in) { - case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP; - case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO; - case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE; - case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT; - case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT; - case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR; - case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR; - case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT; - default: unreachable("Invalid stencil op"); - } -} - -static void -translate_stencil_test(struct dzn_graphics_pipeline *pipeline, - D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, - const VkGraphicsPipelineCreateInfo *in) -{ - const VkPipelineDepthStencilStateCreateInfo *in_zsa = - in->pDepthStencilState; - - bool front_test_uses_ref = - !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) && - in_zsa->front.compareOp != VK_COMPARE_OP_NEVER && - in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS && - (pipeline->zsa.stencil_test.dynamic_compare_mask || - in_zsa->front.compareMask != 0); - bool back_test_uses_ref = - !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) && - in_zsa->back.compareOp != VK_COMPARE_OP_NEVER && - in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS && - (pipeline->zsa.stencil_test.dynamic_compare_mask || - in_zsa->back.compareMask != 0); - - if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask) - pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX; - else if (front_test_uses_ref) - pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask; - else - pipeline->zsa.stencil_test.front.compare_mask = 0; - - if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask) - pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX; - else if (back_test_uses_ref) - pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask; - else - pipeline->zsa.stencil_test.back.compare_mask = 0; - - bool diff_wr_mask = - in->pRasterizationState->cullMode == VK_CULL_MODE_NONE && - (pipeline->zsa.stencil_test.dynamic_write_mask || - in_zsa->back.writeMask != in_zsa->front.writeMask); - bool diff_ref = - in->pRasterizationState->cullMode == VK_CULL_MODE_NONE && - (pipeline->zsa.stencil_test.dynamic_ref || - in_zsa->back.reference != in_zsa->front.reference); - bool diff_cmp_mask = - back_test_uses_ref && front_test_uses_ref && - (pipeline->zsa.stencil_test.dynamic_compare_mask || - pipeline->zsa.stencil_test.front.compare_mask != pipeline->zsa.stencil_test.back.compare_mask); - - if (diff_cmp_mask || diff_wr_mask) - pipeline->zsa.stencil_test.independent_front_back = true; - - bool back_wr_uses_ref = - !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) && - (in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS && - in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) || - (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER && - (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) && - in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) || - (in_zsa->depthTestEnable && - in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS && - in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE); - bool front_wr_uses_ref = - !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) && - (in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS && - in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) || - (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER && - (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) && - in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) || - (in_zsa->depthTestEnable && - in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS && - in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE); - - pipeline->zsa.stencil_test.front.write_mask = - (pipeline->zsa.stencil_test.dynamic_write_mask || - (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ? - 0 : in_zsa->front.writeMask; - pipeline->zsa.stencil_test.back.write_mask = - (pipeline->zsa.stencil_test.dynamic_write_mask || - (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ? - 0 : in_zsa->back.writeMask; - - pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref; - pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref; - - if (diff_ref && - pipeline->zsa.stencil_test.front.uses_ref && - pipeline->zsa.stencil_test.back.uses_ref) - pipeline->zsa.stencil_test.independent_front_back = true; - - pipeline->zsa.stencil_test.front.ref = - pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference; - pipeline->zsa.stencil_test.back.ref = - pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference; - - /* FIXME: We don't support independent {compare,write}_mask and stencil - * reference. Until we have proper support for independent front/back - * stencil test, let's prioritize the front setup when both are active. - */ - out->DepthStencilState.StencilReadMask = - front_test_uses_ref ? - pipeline->zsa.stencil_test.front.compare_mask : - back_test_uses_ref ? - pipeline->zsa.stencil_test.back.compare_mask : 0; - out->DepthStencilState.StencilWriteMask = - pipeline->zsa.stencil_test.front.write_mask ? - pipeline->zsa.stencil_test.front.write_mask : - pipeline->zsa.stencil_test.back.write_mask; - - assert(!pipeline->zsa.stencil_test.independent_front_back); -} - -static void -dzn_graphics_pipeline_translate_zsa(struct dzn_graphics_pipeline *pipeline, - D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, - const VkGraphicsPipelineCreateInfo *in) -{ - const VkPipelineRasterizationStateCreateInfo *in_rast = - in->pRasterizationState; - const VkPipelineDepthStencilStateCreateInfo *in_zsa = - in_rast->rasterizerDiscardEnable ? NULL : in->pDepthStencilState; - - if (!in_zsa) - return; - - /* TODO: depthBoundsTestEnable */ - - out->DepthStencilState.DepthEnable = in_zsa->depthTestEnable; - out->DepthStencilState.DepthWriteMask = - in_zsa->depthWriteEnable ? - D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; - out->DepthStencilState.DepthFunc = - dzn_translate_compare_op(in_zsa->depthCompareOp); - out->DepthStencilState.StencilEnable = in_zsa->stencilTestEnable; - if (in_zsa->stencilTestEnable) { - out->DepthStencilState.FrontFace.StencilFailOp = - translate_stencil_op(in_zsa->front.failOp); - out->DepthStencilState.FrontFace.StencilDepthFailOp = - translate_stencil_op(in_zsa->front.depthFailOp); - out->DepthStencilState.FrontFace.StencilPassOp = - translate_stencil_op(in_zsa->front.passOp); - out->DepthStencilState.FrontFace.StencilFunc = - dzn_translate_compare_op(in_zsa->front.compareOp); - out->DepthStencilState.BackFace.StencilFailOp = - translate_stencil_op(in_zsa->back.failOp); - out->DepthStencilState.BackFace.StencilDepthFailOp = - translate_stencil_op(in_zsa->back.depthFailOp); - out->DepthStencilState.BackFace.StencilPassOp = - translate_stencil_op(in_zsa->back.passOp); - out->DepthStencilState.BackFace.StencilFunc = - dzn_translate_compare_op(in_zsa->back.compareOp); - - pipeline->zsa.stencil_test.enable = true; - - translate_stencil_test(pipeline, out, in); - } -} - -static D3D12_BLEND -translate_blend_factor(VkBlendFactor in, bool is_alpha) -{ - switch (in) { - case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO; - case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE; - case VK_BLEND_FACTOR_SRC_COLOR: - return is_alpha ? D3D12_BLEND_SRC_ALPHA : D3D12_BLEND_SRC_COLOR; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - return is_alpha ? D3D12_BLEND_INV_SRC_ALPHA : D3D12_BLEND_INV_SRC_COLOR; - case VK_BLEND_FACTOR_DST_COLOR: - return is_alpha ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_DEST_COLOR; - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - return is_alpha ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_INV_DEST_COLOR; - case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; - case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; - /* FIXME: no way to isolate the alpla and color constants */ - case VK_BLEND_FACTOR_CONSTANT_COLOR: - case VK_BLEND_FACTOR_CONSTANT_ALPHA: - return D3D12_BLEND_BLEND_FACTOR; - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: - return D3D12_BLEND_INV_BLEND_FACTOR; - case VK_BLEND_FACTOR_SRC1_COLOR: - return is_alpha ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC1_COLOR; - case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: - return is_alpha ? D3D12_BLEND_INV_SRC1_ALPHA : D3D12_BLEND_INV_SRC1_COLOR; - case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA; - case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; - default: unreachable("Invalid blend factor"); - } -} - -static D3D12_BLEND_OP -translate_blend_op(VkBlendOp in) -{ - switch (in) { - case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD; - case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT; - case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; - case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN; - case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX; - default: unreachable("Invalid blend op"); - } -} - -static D3D12_LOGIC_OP -translate_logic_op(VkLogicOp in) -{ - switch (in) { - case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR; - case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND; - case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE; - case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY; - case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED; - case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP; - case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR; - case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR; - case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR; - case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV; - case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT; - case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE; - case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED; - case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED; - case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND; - case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET; - default: unreachable("Invalid logic op"); - } -} - -static void -dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline *pipeline, - D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, - const VkGraphicsPipelineCreateInfo *in) -{ - const VkPipelineRasterizationStateCreateInfo *in_rast = - in->pRasterizationState; - const VkPipelineColorBlendStateCreateInfo *in_blend = - in_rast->rasterizerDiscardEnable ? NULL : in->pColorBlendState; - const VkPipelineMultisampleStateCreateInfo *in_ms = - in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState; - - if (!in_blend || !in_ms) - return; - - D3D12_LOGIC_OP logicop = - in_blend->logicOpEnable ? - translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP; - out->BlendState.AlphaToCoverageEnable = in_ms->alphaToCoverageEnable; - memcpy(pipeline->blend.constants, in_blend->blendConstants, - sizeof(pipeline->blend.constants)); - - for (uint32_t i = 0; i < in_blend->attachmentCount; i++) { - if (i > 0 && - !memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i], - sizeof(*in_blend->pAttachments))) - out->BlendState.IndependentBlendEnable = true; - - out->BlendState.RenderTarget[i].BlendEnable = - in_blend->pAttachments[i].blendEnable; - in_blend->logicOpEnable; - out->BlendState.RenderTarget[i].RenderTargetWriteMask = - in_blend->pAttachments[i].colorWriteMask; - - if (in_blend->logicOpEnable) { - out->BlendState.RenderTarget[i].LogicOpEnable = true; - out->BlendState.RenderTarget[i].LogicOp = logicop; - } else { - out->BlendState.RenderTarget[i].SrcBlend = - translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor, false); - out->BlendState.RenderTarget[i].DestBlend = - translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor, false); - out->BlendState.RenderTarget[i].BlendOp = - translate_blend_op(in_blend->pAttachments[i].colorBlendOp); - out->BlendState.RenderTarget[i].SrcBlendAlpha = - translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor, true); - out->BlendState.RenderTarget[i].DestBlendAlpha = - translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor, true); - out->BlendState.RenderTarget[i].BlendOpAlpha = - translate_blend_op(in_blend->pAttachments[i].alphaBlendOp); - } - } -} - - -static void -dzn_pipeline_init(struct dzn_pipeline *pipeline, - struct dzn_device *device, - VkPipelineBindPoint type, - struct dzn_pipeline_layout *layout) -{ - pipeline->type = type; - pipeline->root.sets_param_count = layout->root.sets_param_count; - pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx; - pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx; - STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type)); - memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type)); - pipeline->root.sig = layout->root.sig; - ID3D12RootSignature_AddRef(pipeline->root.sig); - - STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count)); - memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count)); - - STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets)); - memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets)); - vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); -} - -static void -dzn_pipeline_finish(struct dzn_pipeline *pipeline) -{ - if (pipeline->state) - ID3D12PipelineState_Release(pipeline->state); - if (pipeline->root.sig) - ID3D12RootSignature_Release(pipeline->root.sig); - - vk_object_base_finish(&pipeline->base); -} - -static void -dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline *pipeline, - const VkAllocationCallbacks *alloc) -{ - if (!pipeline) - return; - - for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) { - if (pipeline->indirect_cmd_sigs[i]) - ID3D12CommandSignature_Release(pipeline->indirect_cmd_sigs[i]); - } - - dzn_pipeline_finish(&pipeline->base); - vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline); -} - -static VkResult -dzn_graphics_pipeline_create(struct dzn_device *device, - VkPipelineCache cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipeline *out) -{ - VK_FROM_HANDLE(dzn_render_pass, pass, pCreateInfo->renderPass); - VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout); - const struct dzn_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; - uint32_t stage_mask = 0; - VkResult ret; - HRESULT hres = 0; - - struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *) - vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!pipeline) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - dzn_pipeline_init(&pipeline->base, device, - VK_PIPELINE_BIND_POINT_GRAPHICS, - layout); - D3D12_INPUT_ELEMENT_DESC *inputs = NULL; - D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = { - .pRootSignature = pipeline->base.root.sig, - .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, - }; - - const VkPipelineViewportStateCreateInfo *vp_info = - pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? - NULL : pCreateInfo->pViewportState; - - - ret = dzn_graphics_pipeline_translate_vi(pipeline, pAllocator, &desc, pCreateInfo, &inputs); - if (ret != VK_SUCCESS) - goto out; - - if (pCreateInfo->pDynamicState) { - for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) { - switch (pCreateInfo->pDynamicState->pDynamicStates[i]) { - case VK_DYNAMIC_STATE_VIEWPORT: - pipeline->vp.dynamic = true; - break; - case VK_DYNAMIC_STATE_SCISSOR: - pipeline->scissor.dynamic = true; - break; - case VK_DYNAMIC_STATE_STENCIL_REFERENCE: - pipeline->zsa.stencil_test.dynamic_ref = true; - break; - case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: - pipeline->zsa.stencil_test.dynamic_compare_mask = true; - break; - case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: - pipeline->zsa.stencil_test.dynamic_write_mask = true; - break; - case VK_DYNAMIC_STATE_BLEND_CONSTANTS: - pipeline->blend.dynamic_constants = true; - break; - default: unreachable("Unsupported dynamic state"); - } - } - } - - dzn_graphics_pipeline_translate_ia(pipeline, &desc, pCreateInfo); - dzn_graphics_pipeline_translate_rast(pipeline, &desc, pCreateInfo); - dzn_graphics_pipeline_translate_ms(pipeline, &desc, pCreateInfo); - dzn_graphics_pipeline_translate_zsa(pipeline, &desc, pCreateInfo); - dzn_graphics_pipeline_translate_blend(pipeline, &desc, pCreateInfo); - - desc.NumRenderTargets = subpass->color_count; - for (uint32_t i = 0; i < subpass->color_count; i++) { - uint32_t idx = subpass->colors[i].idx; - - if (idx == VK_ATTACHMENT_UNUSED) continue; - - const struct dzn_attachment *attachment = &pass->attachments[idx]; - - desc.RTVFormats[i] = - dzn_image_get_dxgi_format(attachment->format, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - VK_IMAGE_ASPECT_COLOR_BIT); - } - - if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) { - const struct dzn_attachment *attachment = - &pass->attachments[subpass->zs.idx]; - - desc.DSVFormat = - dzn_image_get_dxgi_format(attachment->format, - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, - VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT); - } - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) - stage_mask |= pCreateInfo->pStages[i].stage; - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT && - pCreateInfo->pRasterizationState && - (pCreateInfo->pRasterizationState->rasterizerDiscardEnable || - pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) { - /* Disable rasterization (AKA leave fragment shader NULL) when - * front+back culling or discard is set. - */ - continue; - } - - D3D12_SHADER_BYTECODE *slot = - dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage); - enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE; - uint16_t y_flip_mask = 0, z_flip_mask = 0; - - if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT || - (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_VERTEX_BIT && - !(stage_mask & VK_SHADER_STAGE_GEOMETRY_BIT))) { - if (pipeline->vp.dynamic) { - yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL; - } else if (vp_info) { - for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) { - if (vp_info->pViewports[i].height > 0) - y_flip_mask |= BITFIELD_BIT(i); - - if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth) - z_flip_mask |= BITFIELD_BIT(i); - } - - if (y_flip_mask && z_flip_mask) - yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL; - else if (z_flip_mask) - yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL; - else if (y_flip_mask) - yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL; - } - } - - bool force_sample_rate_shading = - pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT && - pCreateInfo->pMultisampleState && - pCreateInfo->pMultisampleState->sampleShadingEnable; - - ret = dzn_pipeline_compile_shader(device, pAllocator, - layout, &pCreateInfo->pStages[i], - yz_flip_mode, y_flip_mask, z_flip_mask, - force_sample_rate_shading, slot); - if (ret != VK_SUCCESS) - goto out; - } - - - hres = ID3D12Device1_CreateGraphicsPipelineState(device->dev, &desc, - IID_ID3D12PipelineState, - (void **)&pipeline->base.state); - if (FAILED(hres)) { - ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - goto out; - } - - ret = VK_SUCCESS; - -out: - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - D3D12_SHADER_BYTECODE *slot = - dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage); - free((void *)slot->pShaderBytecode); - } - - vk_free2(&device->vk.alloc, pAllocator, inputs); - if (ret != VK_SUCCESS) - dzn_graphics_pipeline_destroy(pipeline, pAllocator); - else - *out = dzn_graphics_pipeline_to_handle(pipeline); - - return ret; -} - -#define DZN_INDIRECT_CMD_SIG_MAX_ARGS 3 - -ID3D12CommandSignature * -dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline *pipeline, - enum dzn_indirect_draw_cmd_sig_type type) -{ - assert(type < DZN_NUM_INDIRECT_DRAW_CMD_SIGS); - - struct dzn_device *device = - container_of(pipeline->base.base.device, struct dzn_device, vk); - ID3D12CommandSignature *cmdsig = pipeline->indirect_cmd_sigs[type]; - - if (cmdsig) - return cmdsig; - - bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG; - bool indexed = type == DZN_INDIRECT_INDEXED_DRAW_CMD_SIG || triangle_fan; - - uint32_t cmd_arg_count = 0; - D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS]; - - if (triangle_fan) { - cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC { - .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW, - }; - } - - cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC { - .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, - .Constant = { - .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx, - .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4, - .Num32BitValuesToSet = 2, - }, - }; - - cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC { - .Type = indexed ? - D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED : - D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, - }; - - assert(cmd_arg_count <= ARRAY_SIZE(cmd_args)); - assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0); - - D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = { - .ByteStride = - triangle_fan ? - sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) : - sizeof(struct dzn_indirect_draw_exec_params), - .NumArgumentDescs = cmd_arg_count, - .pArgumentDescs = cmd_args, - }; - HRESULT hres = - ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc, - pipeline->base.root.sig, - IID_ID3D12CommandSignature, - (void **)&cmdsig); - if (FAILED(hres)) - return NULL; - - pipeline->indirect_cmd_sigs[type] = cmdsig; - return cmdsig; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateGraphicsPipelines(VkDevice dev, - VkPipelineCache pipelineCache, - uint32_t count, - const VkGraphicsPipelineCreateInfo *pCreateInfos, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipelines) -{ - VK_FROM_HANDLE(dzn_device, device, dev); - VkResult result = VK_SUCCESS; - - unsigned i; - for (i = 0; i < count; i++) { - result = dzn_graphics_pipeline_create(device, - pipelineCache, - &pCreateInfos[i], - pAllocator, - &pPipelines[i]); - if (result != VK_SUCCESS) { - pPipelines[i] = VK_NULL_HANDLE; - - /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it - * is not obvious what error should be report upon 2 different failures. - */ - if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT) - break; - - if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) - break; - } - } - - for (; i < count; i++) - pPipelines[i] = VK_NULL_HANDLE; - - return result; -} - -static void -dzn_compute_pipeline_destroy(struct dzn_compute_pipeline *pipeline, - const VkAllocationCallbacks *alloc) -{ - if (!pipeline) - return; - - if (pipeline->indirect_cmd_sig) - ID3D12CommandSignature_Release(pipeline->indirect_cmd_sig); - - dzn_pipeline_finish(&pipeline->base); - vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline); -} - -static VkResult -dzn_compute_pipeline_create(struct dzn_device *device, - VkPipelineCache cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipeline *out) -{ - VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout); - - struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *) - vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!pipeline) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - dzn_pipeline_init(&pipeline->base, device, - VK_PIPELINE_BIND_POINT_COMPUTE, - layout); - - D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { - .pRootSignature = pipeline->base.root.sig, - .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, - }; - - VkResult ret = - dzn_pipeline_compile_shader(device, pAllocator, layout, - &pCreateInfo->stage, - DXIL_SPIRV_YZ_FLIP_NONE, 0, 0, - false, &desc.CS); - if (ret != VK_SUCCESS) - goto out; - - if (FAILED(ID3D12Device1_CreateComputePipelineState(device->dev, &desc, - IID_ID3D12PipelineState, - (void **)&pipeline->base.state))) { - ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - goto out; - } - -out: - free((void *)desc.CS.pShaderBytecode); - if (ret != VK_SUCCESS) - dzn_compute_pipeline_destroy(pipeline, pAllocator); - else - *out = dzn_compute_pipeline_to_handle(pipeline); - - return ret; -} - -ID3D12CommandSignature * -dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline *pipeline) -{ - if (pipeline->indirect_cmd_sig) - return pipeline->indirect_cmd_sig; - - struct dzn_device *device = - container_of(pipeline->base.base.device, struct dzn_device, vk); - - D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = { - { - .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, - .Constant = { - .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx, - .DestOffsetIn32BitValues = 0, - .Num32BitValuesToSet = 3, - }, - }, - { - .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, - }, - }; - - D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = { - .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2, - .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args), - .pArgumentDescs = indirect_dispatch_args, - }; - - HRESULT hres = - ID3D12Device1_CreateCommandSignature(device->dev, &indirect_dispatch_desc, - pipeline->base.root.sig, - IID_ID3D12CommandSignature, - (void **)&pipeline->indirect_cmd_sig); - if (FAILED(hres)) - return NULL; - - return pipeline->indirect_cmd_sig; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateComputePipelines(VkDevice dev, - VkPipelineCache pipelineCache, - uint32_t count, - const VkComputePipelineCreateInfo *pCreateInfos, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipelines) -{ - VK_FROM_HANDLE(dzn_device, device, dev); - VkResult result = VK_SUCCESS; - - unsigned i; - for (i = 0; i < count; i++) { - result = dzn_compute_pipeline_create(device, - pipelineCache, - &pCreateInfos[i], - pAllocator, - &pPipelines[i]); - if (result != VK_SUCCESS) { - pPipelines[i] = VK_NULL_HANDLE; - - /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it - * is not obvious what error should be report upon 2 different failures. - */ - if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT) - break; - - if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) - break; - } - } - - for (; i < count; i++) - pPipelines[i] = VK_NULL_HANDLE; - - return result; -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyPipeline(VkDevice device, - VkPipeline pipeline, - const VkAllocationCallbacks *pAllocator) -{ - VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline); - - if (!pipe) - return; - - if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) { - struct dzn_graphics_pipeline *gfx = container_of(pipe, struct dzn_graphics_pipeline, base); - dzn_graphics_pipeline_destroy(gfx, pAllocator); - } else { - assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE); - struct dzn_compute_pipeline *compute = container_of(pipe, struct dzn_compute_pipeline, base); - dzn_compute_pipeline_destroy(compute, pAllocator); - } -} diff --git a/src/microsoft/vulkan/dzn_pipeline_cache.c b/src/microsoft/vulkan/dzn_pipeline_cache.c new file mode 100644 index 00000000000..de6e3053c0a --- /dev/null +++ b/src/microsoft/vulkan/dzn_pipeline_cache.c @@ -0,0 +1,99 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" + +static void +dzn_pipeline_cache_destroy(struct dzn_pipeline_cache *pcache, + const VkAllocationCallbacks *pAllocator) +{ + if (!pcache) + return; + + struct dzn_device *device = container_of(pcache->base.device, struct dzn_device, vk); + + vk_object_base_finish(&pcache->base); + vk_free2(&device->vk.alloc, pAllocator, pcache); +} + +static VkResult +dzn_pipeline_cache_create(struct dzn_device *device, + const VkPipelineCacheCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineCache *out) +{ + struct dzn_pipeline_cache *pcache = (struct dzn_pipeline_cache *) + vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pcache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pcache) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &pcache->base, VK_OBJECT_TYPE_PIPELINE_CACHE); + + /* TODO: cache-ism! */ + + *out = dzn_pipeline_cache_to_handle(pcache); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreatePipelineCache(VkDevice device, + const VkPipelineCacheCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineCache *pPipelineCache) +{ + return dzn_pipeline_cache_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pPipelineCache); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyPipelineCache(VkDevice device, + VkPipelineCache pipelineCache, + const VkAllocationCallbacks *pAllocator) +{ + dzn_pipeline_cache_destroy(dzn_pipeline_cache_from_handle(pipelineCache), + pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_GetPipelineCacheData(VkDevice device, + VkPipelineCache pipelineCache, + size_t *pDataSize, + void *pData) +{ + // FIXME + *pDataSize = 0; + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_MergePipelineCaches(VkDevice device, + VkPipelineCache dstCache, + uint32_t srcCacheCount, + const VkPipelineCache *pSrcCaches) +{ + // FIXME + return VK_SUCCESS; +} diff --git a/src/microsoft/vulkan/dzn_pipeline_cache.cpp b/src/microsoft/vulkan/dzn_pipeline_cache.cpp deleted file mode 100644 index de6e3053c0a..00000000000 --- a/src/microsoft/vulkan/dzn_pipeline_cache.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" - -#include "vk_alloc.h" - -static void -dzn_pipeline_cache_destroy(struct dzn_pipeline_cache *pcache, - const VkAllocationCallbacks *pAllocator) -{ - if (!pcache) - return; - - struct dzn_device *device = container_of(pcache->base.device, struct dzn_device, vk); - - vk_object_base_finish(&pcache->base); - vk_free2(&device->vk.alloc, pAllocator, pcache); -} - -static VkResult -dzn_pipeline_cache_create(struct dzn_device *device, - const VkPipelineCacheCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipelineCache *out) -{ - struct dzn_pipeline_cache *pcache = (struct dzn_pipeline_cache *) - vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pcache), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!pcache) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &pcache->base, VK_OBJECT_TYPE_PIPELINE_CACHE); - - /* TODO: cache-ism! */ - - *out = dzn_pipeline_cache_to_handle(pcache); - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreatePipelineCache(VkDevice device, - const VkPipelineCacheCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipelineCache *pPipelineCache) -{ - return dzn_pipeline_cache_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pPipelineCache); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyPipelineCache(VkDevice device, - VkPipelineCache pipelineCache, - const VkAllocationCallbacks *pAllocator) -{ - dzn_pipeline_cache_destroy(dzn_pipeline_cache_from_handle(pipelineCache), - pAllocator); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_GetPipelineCacheData(VkDevice device, - VkPipelineCache pipelineCache, - size_t *pDataSize, - void *pData) -{ - // FIXME - *pDataSize = 0; - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_MergePipelineCaches(VkDevice device, - VkPipelineCache dstCache, - uint32_t srcCacheCount, - const VkPipelineCache *pSrcCaches) -{ - // FIXME - return VK_SUCCESS; -} diff --git a/src/microsoft/vulkan/dzn_private.h b/src/microsoft/vulkan/dzn_private.h index c4492843f59..0b08e517bdf 100644 --- a/src/microsoft/vulkan/dzn_private.h +++ b/src/microsoft/vulkan/dzn_private.h @@ -173,7 +173,7 @@ struct dzn_physical_device { DXGI_ADAPTER_DESC1 adapter_desc; uint32_t queue_family_count; - struct { + struct dzn_queue_family { VkQueueFamilyProperties props; D3D12_COMMAND_QUEUE_DESC desc; } queue_families[MAX_QUEUE_FAMILIES]; @@ -206,6 +206,10 @@ dzn_physical_device_get_mem_type_mask_for_resource(const struct dzn_physical_dev #define dzn_debug_ignored_stype(sType) \ mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType)) +#ifdef __cplusplus +extern "C" { +#endif + IDXGIFactory4 * dxgi_get_factory(bool debug); @@ -221,6 +225,10 @@ d3d12_enable_gpu_validation(); ID3D12Device1 * d3d12_create_device(IDXGIAdapter1 *adapter, bool experimental_features); +#ifdef __cplusplus +} +#endif + struct dzn_queue { struct vk_queue vk; diff --git a/src/microsoft/vulkan/dzn_query.c b/src/microsoft/vulkan/dzn_query.c new file mode 100644 index 00000000000..0739e22091f --- /dev/null +++ b/src/microsoft/vulkan/dzn_query.c @@ -0,0 +1,345 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_debug_report.h" +#include "vk_util.h" + +static D3D12_QUERY_HEAP_TYPE +dzn_query_pool_get_heap_type(VkQueryType in) +{ + switch (in) { + case VK_QUERY_TYPE_OCCLUSION: return D3D12_QUERY_HEAP_TYPE_OCCLUSION; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS; + case VK_QUERY_TYPE_TIMESTAMP: return D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + default: unreachable("Unsupported query type"); + } +} + +D3D12_QUERY_TYPE +dzn_query_pool_get_query_type(const struct dzn_query_pool *qpool, + VkQueryControlFlags flags) +{ + switch (qpool->heap_type) { + case D3D12_QUERY_HEAP_TYPE_OCCLUSION: + return flags & VK_QUERY_CONTROL_PRECISE_BIT ? + D3D12_QUERY_TYPE_OCCLUSION : D3D12_QUERY_TYPE_BINARY_OCCLUSION; + case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_TYPE_PIPELINE_STATISTICS; + case D3D12_QUERY_HEAP_TYPE_TIMESTAMP: return D3D12_QUERY_TYPE_TIMESTAMP; + default: unreachable("Unsupported query type"); + } +} + +static void +dzn_query_pool_destroy(struct dzn_query_pool *qpool, + const VkAllocationCallbacks *alloc) +{ + if (!qpool) + return; + + struct dzn_device *device = container_of(qpool->base.device, struct dzn_device, vk); + + if (qpool->collect_map) + ID3D12Resource_Unmap(qpool->collect_buffer, 0, NULL); + + if (qpool->collect_buffer) + ID3D12Resource_Release(qpool->collect_buffer); + + if (qpool->resolve_buffer) + ID3D12Resource_Release(qpool->resolve_buffer); + + if (qpool->heap) + ID3D12QueryHeap_Release(qpool->heap); + + for (uint32_t q = 0; q < qpool->query_count; q++) { + if (qpool->queries[q].fence) + ID3D12Fence_Release(qpool->queries[q].fence); + } + + mtx_destroy(&qpool->queries_lock); + vk_object_base_finish(&qpool->base); + vk_free2(&device->vk.alloc, alloc, qpool); +} + +static VkResult +dzn_query_pool_create(struct dzn_device *device, + const VkQueryPoolCreateInfo *info, + const VkAllocationCallbacks *alloc, + VkQueryPool *out) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct dzn_query_pool, qpool, 1); + VK_MULTIALLOC_DECL(&ma, struct dzn_query, queries, info->queryCount); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &qpool->base, VK_OBJECT_TYPE_QUERY_POOL); + + mtx_init(&qpool->queries_lock, mtx_plain); + qpool->query_count = info->queryCount; + qpool->queries = queries; + + D3D12_QUERY_HEAP_DESC desc = { 0 }; + qpool->heap_type = desc.Type = dzn_query_pool_get_heap_type(info->queryType); + desc.Count = info->queryCount; + desc.NodeMask = 0; + + HRESULT hres = + ID3D12Device1_CreateQueryHeap(device->dev, &desc, + &IID_ID3D12QueryHeap, + &qpool->heap); + if (FAILED(hres)) { + dzn_query_pool_destroy(qpool, alloc); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + switch (info->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + qpool->query_size = sizeof(uint64_t); + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + qpool->pipeline_statistics = info->pipelineStatistics; + qpool->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS); + break; + default: unreachable("Unsupported query type"); + } + + D3D12_HEAP_PROPERTIES hprops; + ID3D12Device1_GetCustomHeapProperties(device->dev, &hprops, 0, + D3D12_HEAP_TYPE_DEFAULT); + D3D12_RESOURCE_DESC rdesc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = info->queryCount * qpool->query_size, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE, + }; + + hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops, + D3D12_HEAP_FLAG_NONE, + &rdesc, + D3D12_RESOURCE_STATE_COPY_DEST, + NULL, + &IID_ID3D12Resource, + &qpool->resolve_buffer); + if (FAILED(hres)) { + dzn_query_pool_destroy(qpool, alloc); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + ID3D12Device1_GetCustomHeapProperties(device->dev, &hprops, 0, + D3D12_HEAP_TYPE_READBACK); + rdesc.Width = info->queryCount * (qpool->query_size + sizeof(uint64_t)); + hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops, + D3D12_HEAP_FLAG_NONE, + &rdesc, + D3D12_RESOURCE_STATE_COPY_DEST, + NULL, + &IID_ID3D12Resource, + &qpool->collect_buffer); + if (FAILED(hres)) { + dzn_query_pool_destroy(qpool, alloc); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + hres = ID3D12Resource_Map(qpool->collect_buffer, 0, NULL, (void **)&qpool->collect_map); + if (FAILED(hres)) { + dzn_query_pool_destroy(qpool, alloc); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memset(qpool->collect_map, 0, rdesc.Width); + + *out = dzn_query_pool_to_handle(qpool); + return VK_SUCCESS; +} + +uint32_t +dzn_query_pool_get_result_offset(const struct dzn_query_pool *qpool, uint32_t query) +{ + return query * qpool->query_size; +} + +uint32_t +dzn_query_pool_get_result_size(const struct dzn_query_pool *qpool, uint32_t query_count) +{ + return query_count * qpool->query_size; +} + +uint32_t +dzn_query_pool_get_availability_offset(const struct dzn_query_pool *qpool, uint32_t query) +{ + return (qpool->query_count * qpool->query_size) + (sizeof(uint64_t) * query); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateQueryPool(VkDevice device, + const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkQueryPool *pQueryPool) +{ + return dzn_query_pool_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pQueryPool); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyQueryPool(VkDevice device, + VkQueryPool queryPool, + const VkAllocationCallbacks *pAllocator) +{ + dzn_query_pool_destroy(dzn_query_pool_from_handle(queryPool), pAllocator); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_ResetQueryPool(VkDevice device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + mtx_lock(&qpool->queries_lock); + for (uint32_t q = 0; q < queryCount; q++) { + struct dzn_query *query = &qpool->queries[firstQuery + q]; + + query->fence_value = 0; + if (query->fence) { + ID3D12Fence_Release(query->fence); + query->fence = NULL; + } + } + mtx_lock(&qpool->queries_lock); + + memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_result_offset(qpool, firstQuery), + 0, queryCount * qpool->query_size); + memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_availability_offset(qpool, firstQuery), + 0, queryCount * sizeof(uint64_t)); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_GetQueryPoolResults(VkDevice device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void *pData, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + uint32_t step = (flags & VK_QUERY_RESULT_64_BIT) ? + sizeof(uint64_t) : sizeof(uint32_t); + VkResult result = VK_SUCCESS; + + for (uint32_t q = 0; q < queryCount; q++) { + struct dzn_query *query = &qpool->queries[q + firstQuery]; + + uint8_t *dst_ptr = (uint8_t *)pData + (stride * q); + uint8_t *src_ptr = + (uint8_t *)qpool->collect_map + + dzn_query_pool_get_result_offset(qpool, firstQuery + q); + uint64_t available = 0; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ID3D12Fence *query_fence = NULL; + uint64_t query_fence_val = 0; + + while (true) { + mtx_lock(&qpool->queries_lock); + if (query->fence) { + query_fence = query->fence; + ID3D12Fence_AddRef(query_fence); + } + query_fence_val = query->fence_value; + mtx_unlock(&qpool->queries_lock); + + if (query_fence) + break; + + /* Check again in 10ms. + * FIXME: decrease the polling period if it happens to hurt latency. + */ + Sleep(10); + } + + ID3D12Fence_SetEventOnCompletion(query_fence, query_fence_val, NULL); + ID3D12Fence_Release(query_fence); + available = UINT64_MAX; + } else { + ID3D12Fence *query_fence = NULL; + mtx_lock(&qpool->queries_lock); + if (query->fence) { + query_fence = query->fence; + ID3D12Fence_AddRef(query_fence); + } + uint64_t query_fence_val = query->fence_value; + mtx_unlock(&qpool->queries_lock); + + if (query_fence) { + if (ID3D12Fence_GetCompletedValue(query_fence) >= query_fence_val) + available = UINT64_MAX; + ID3D12Fence_Release(query_fence); + } + } + + if (qpool->heap_type != D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) { + if (available) + memcpy(dst_ptr, src_ptr, step); + else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) + memset(dst_ptr, 0, step); + + dst_ptr += step; + } else { + for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) { + if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics)) + continue; + + if (available) + memcpy(dst_ptr, src_ptr + (c * sizeof(uint64_t)), step); + else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) + memset(dst_ptr, 0, step); + + dst_ptr += step; + } + } + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + memcpy(dst_ptr, &available, step); + + if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) + result = VK_NOT_READY; + } + + return result; +} diff --git a/src/microsoft/vulkan/dzn_query.cpp b/src/microsoft/vulkan/dzn_query.cpp deleted file mode 100644 index 6d8712f05a0..00000000000 --- a/src/microsoft/vulkan/dzn_query.cpp +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" - -#include "vk_alloc.h" -#include "vk_debug_report.h" -#include "vk_util.h" - -static D3D12_QUERY_HEAP_TYPE -dzn_query_pool_get_heap_type(VkQueryType in) -{ - switch (in) { - case VK_QUERY_TYPE_OCCLUSION: return D3D12_QUERY_HEAP_TYPE_OCCLUSION; - case VK_QUERY_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS; - case VK_QUERY_TYPE_TIMESTAMP: return D3D12_QUERY_HEAP_TYPE_TIMESTAMP; - default: unreachable("Unsupported query type"); - } -} - -D3D12_QUERY_TYPE -dzn_query_pool_get_query_type(const struct dzn_query_pool *qpool, - VkQueryControlFlags flags) -{ - switch (qpool->heap_type) { - case D3D12_QUERY_HEAP_TYPE_OCCLUSION: - return flags & VK_QUERY_CONTROL_PRECISE_BIT ? - D3D12_QUERY_TYPE_OCCLUSION : D3D12_QUERY_TYPE_BINARY_OCCLUSION; - case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_TYPE_PIPELINE_STATISTICS; - case D3D12_QUERY_HEAP_TYPE_TIMESTAMP: return D3D12_QUERY_TYPE_TIMESTAMP; - default: unreachable("Unsupported query type"); - } -} - -static void -dzn_query_pool_destroy(struct dzn_query_pool *qpool, - const VkAllocationCallbacks *alloc) -{ - if (!qpool) - return; - - struct dzn_device *device = container_of(qpool->base.device, struct dzn_device, vk); - - if (qpool->collect_map) - ID3D12Resource_Unmap(qpool->collect_buffer, 0, NULL); - - if (qpool->collect_buffer) - ID3D12Resource_Release(qpool->collect_buffer); - - if (qpool->resolve_buffer) - ID3D12Resource_Release(qpool->resolve_buffer); - - if (qpool->heap) - ID3D12QueryHeap_Release(qpool->heap); - - for (uint32_t q = 0; q < qpool->query_count; q++) { - if (qpool->queries[q].fence) - ID3D12Fence_Release(qpool->queries[q].fence); - } - - mtx_destroy(&qpool->queries_lock); - vk_object_base_finish(&qpool->base); - vk_free2(&device->vk.alloc, alloc, qpool); -} - -static VkResult -dzn_query_pool_create(struct dzn_device *device, - const VkQueryPoolCreateInfo *info, - const VkAllocationCallbacks *alloc, - VkQueryPool *out) -{ - VK_MULTIALLOC(ma); - VK_MULTIALLOC_DECL(&ma, struct dzn_query_pool, qpool, 1); - VK_MULTIALLOC_DECL(&ma, struct dzn_query, queries, info->queryCount); - - if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &qpool->base, VK_OBJECT_TYPE_QUERY_POOL); - - mtx_init(&qpool->queries_lock, mtx_plain); - qpool->query_count = info->queryCount; - qpool->queries = queries; - - D3D12_QUERY_HEAP_DESC desc = { 0 }; - qpool->heap_type = desc.Type = dzn_query_pool_get_heap_type(info->queryType); - desc.Count = info->queryCount; - desc.NodeMask = 0; - - HRESULT hres = - ID3D12Device1_CreateQueryHeap(device->dev, &desc, - IID_ID3D12QueryHeap, - (void **)&qpool->heap); - if (FAILED(hres)) { - dzn_query_pool_destroy(qpool, alloc); - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - - switch (info->queryType) { - case VK_QUERY_TYPE_OCCLUSION: - case VK_QUERY_TYPE_TIMESTAMP: - qpool->query_size = sizeof(uint64_t); - break; - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - qpool->pipeline_statistics = info->pipelineStatistics; - qpool->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS); - break; - default: unreachable("Unsupported query type"); - } - - D3D12_HEAP_PROPERTIES hprops; - ID3D12Device1_GetCustomHeapProperties(device->dev, &hprops, 0, - D3D12_HEAP_TYPE_DEFAULT); - D3D12_RESOURCE_DESC rdesc = { - .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, - .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, - .Width = info->queryCount * qpool->query_size, - .Height = 1, - .DepthOrArraySize = 1, - .MipLevels = 1, - .Format = DXGI_FORMAT_UNKNOWN, - .SampleDesc = { .Count = 1, .Quality = 0 }, - .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - .Flags = D3D12_RESOURCE_FLAG_NONE, - }; - - hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops, - D3D12_HEAP_FLAG_NONE, - &rdesc, - D3D12_RESOURCE_STATE_COPY_DEST, - NULL, - IID_ID3D12Resource, - (void **)&qpool->resolve_buffer); - if (FAILED(hres)) { - dzn_query_pool_destroy(qpool, alloc); - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - - ID3D12Device1_GetCustomHeapProperties(device->dev, &hprops, 0, - D3D12_HEAP_TYPE_READBACK); - rdesc.Width = info->queryCount * (qpool->query_size + sizeof(uint64_t)); - hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops, - D3D12_HEAP_FLAG_NONE, - &rdesc, - D3D12_RESOURCE_STATE_COPY_DEST, - NULL, - IID_ID3D12Resource, - (void **)&qpool->collect_buffer); - if (FAILED(hres)) { - dzn_query_pool_destroy(qpool, alloc); - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - - hres = ID3D12Resource_Map(qpool->collect_buffer, 0, NULL, (void **)&qpool->collect_map); - if (FAILED(hres)) { - dzn_query_pool_destroy(qpool, alloc); - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - memset(qpool->collect_map, 0, rdesc.Width); - - *out = dzn_query_pool_to_handle(qpool); - return VK_SUCCESS; -} - -uint32_t -dzn_query_pool_get_result_offset(const struct dzn_query_pool *qpool, uint32_t query) -{ - return query * qpool->query_size; -} - -uint32_t -dzn_query_pool_get_result_size(const struct dzn_query_pool *qpool, uint32_t query_count) -{ - return query_count * qpool->query_size; -} - -uint32_t -dzn_query_pool_get_availability_offset(const struct dzn_query_pool *qpool, uint32_t query) -{ - return (qpool->query_count * qpool->query_size) + (sizeof(uint64_t) * query); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_CreateQueryPool(VkDevice device, - const VkQueryPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkQueryPool *pQueryPool) -{ - return dzn_query_pool_create(dzn_device_from_handle(device), - pCreateInfo, pAllocator, pQueryPool); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_DestroyQueryPool(VkDevice device, - VkQueryPool queryPool, - const VkAllocationCallbacks *pAllocator) -{ - dzn_query_pool_destroy(dzn_query_pool_from_handle(queryPool), pAllocator); -} - -VKAPI_ATTR void VKAPI_CALL -dzn_ResetQueryPool(VkDevice device, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount) -{ - VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); - - mtx_lock(&qpool->queries_lock); - for (uint32_t q = 0; q < queryCount; q++) { - struct dzn_query *query = &qpool->queries[firstQuery + q]; - - query->fence_value = 0; - if (query->fence) { - ID3D12Fence_Release(query->fence); - query->fence = NULL; - } - } - mtx_lock(&qpool->queries_lock); - - memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_result_offset(qpool, firstQuery), - 0, queryCount * qpool->query_size); - memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_availability_offset(qpool, firstQuery), - 0, queryCount * sizeof(uint64_t)); -} - -VKAPI_ATTR VkResult VKAPI_CALL -dzn_GetQueryPoolResults(VkDevice device, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - size_t dataSize, - void *pData, - VkDeviceSize stride, - VkQueryResultFlags flags) -{ - VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); - - uint32_t step = (flags & VK_QUERY_RESULT_64_BIT) ? - sizeof(uint64_t) : sizeof(uint32_t); - VkResult result = VK_SUCCESS; - - for (uint32_t q = 0; q < queryCount; q++) { - struct dzn_query *query = &qpool->queries[q + firstQuery]; - - uint8_t *dst_ptr = (uint8_t *)pData + (stride * q); - uint8_t *src_ptr = - (uint8_t *)qpool->collect_map + - dzn_query_pool_get_result_offset(qpool, firstQuery + q); - uint64_t available = 0; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - ID3D12Fence *query_fence = NULL; - uint64_t query_fence_val = 0; - - while (true) { - mtx_lock(&qpool->queries_lock); - if (query->fence) { - query_fence = query->fence; - ID3D12Fence_AddRef(query_fence); - } - query_fence_val = query->fence_value; - mtx_unlock(&qpool->queries_lock); - - if (query_fence) - break; - - /* Check again in 10ms. - * FIXME: decrease the polling period if it happens to hurt latency. - */ - Sleep(10); - } - - ID3D12Fence_SetEventOnCompletion(query_fence, query_fence_val, NULL); - ID3D12Fence_Release(query_fence); - available = UINT64_MAX; - } else { - ID3D12Fence *query_fence = NULL; - mtx_lock(&qpool->queries_lock); - if (query->fence) { - query_fence = query->fence; - ID3D12Fence_AddRef(query_fence); - } - uint64_t query_fence_val = query->fence_value; - mtx_unlock(&qpool->queries_lock); - - if (query_fence) { - if (ID3D12Fence_GetCompletedValue(query_fence) >= query_fence_val) - available = UINT64_MAX; - ID3D12Fence_Release(query_fence); - } - } - - if (qpool->heap_type != D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) { - if (available) - memcpy(dst_ptr, src_ptr, step); - else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) - memset(dst_ptr, 0, step); - - dst_ptr += step; - } else { - for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) { - if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics)) - continue; - - if (available) - memcpy(dst_ptr, src_ptr + (c * sizeof(uint64_t)), step); - else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) - memset(dst_ptr, 0, step); - - dst_ptr += step; - } - } - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) - memcpy(dst_ptr, &available, step); - - if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) - result = VK_NOT_READY; - } - - return result; -} diff --git a/src/microsoft/vulkan/dzn_sync.c b/src/microsoft/vulkan/dzn_sync.c new file mode 100644 index 00000000000..1379c332c4e --- /dev/null +++ b/src/microsoft/vulkan/dzn_sync.c @@ -0,0 +1,210 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_debug_report.h" +#include "vk_util.h" + +#include "util/macros.h" +#include "util/os_time.h" + +static VkResult +dzn_sync_init(struct vk_device *device, + struct vk_sync *sync, + uint64_t initial_value) +{ + struct dzn_sync *dsync = container_of(sync, struct dzn_sync, vk); + struct dzn_device *ddev = container_of(device, struct dzn_device, vk); + + assert(!(sync->flags & VK_SYNC_IS_SHAREABLE)); + + if (FAILED(ID3D12Device1_CreateFence(ddev->dev, initial_value, + D3D12_FENCE_FLAG_NONE, + &IID_ID3D12Fence, + &dsync->fence))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return VK_SUCCESS; +} + +static void +dzn_sync_finish(struct vk_device *device, + struct vk_sync *sync) +{ + struct dzn_sync *dsync = container_of(sync, struct dzn_sync, vk); + + ID3D12Fence_Release(dsync->fence); +} + +static VkResult +dzn_sync_signal(struct vk_device *device, + struct vk_sync *sync, + uint64_t value) +{ + struct dzn_sync *dsync = container_of(sync, struct dzn_sync, vk); + + if (!(sync->flags & VK_SYNC_IS_TIMELINE)) + value = 1; + + if (FAILED(ID3D12Fence_Signal(dsync->fence, value))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return VK_SUCCESS; +} + +static VkResult +dzn_sync_get_value(struct vk_device *device, + struct vk_sync *sync, + uint64_t *value) +{ + struct dzn_sync *dsync = container_of(sync, struct dzn_sync, vk); + + *value = ID3D12Fence_GetCompletedValue(dsync->fence); + return VK_SUCCESS; +} + +static VkResult +dzn_sync_reset(struct vk_device *device, + struct vk_sync *sync) +{ + struct dzn_sync *dsync = container_of(sync, struct dzn_sync, vk); + + if (FAILED(ID3D12Fence_Signal(dsync->fence, 0))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return VK_SUCCESS; +} + +static VkResult +dzn_sync_move(struct vk_device *device, + struct vk_sync *dst, + struct vk_sync *src) +{ + struct dzn_device *ddev = container_of(device, struct dzn_device, vk); + struct dzn_sync *ddst = container_of(dst, struct dzn_sync, vk); + struct dzn_sync *dsrc = container_of(src, struct dzn_sync, vk); + ID3D12Fence *new_fence; + + if (FAILED(ID3D12Device1_CreateFence(ddev->dev, 0, + D3D12_FENCE_FLAG_NONE, + &IID_ID3D12Fence, + &new_fence))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + ID3D12Fence_Release(ddst->fence); + ddst->fence = dsrc->fence; + dsrc->fence = new_fence; + return VK_SUCCESS; +} + +static VkResult +dzn_sync_wait(struct vk_device *device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + struct dzn_device *ddev = container_of(device, struct dzn_device, vk); + + HANDLE event = CreateEventA(NULL, FALSE, FALSE, NULL); + if (event == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + STACK_ARRAY(ID3D12Fence *, fences, wait_count); + STACK_ARRAY(uint64_t, values, wait_count); + + for (uint32_t i = 0; i < wait_count; i++) { + struct dzn_sync *sync = container_of(waits[i].sync, struct dzn_sync, vk); + + fences[i] = sync->fence; + values[i] = (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? waits[i].wait_value : 1; + } + + D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags = + (wait_flags & VK_SYNC_WAIT_ANY) ? + D3D12_MULTIPLE_FENCE_WAIT_FLAG_ANY : + D3D12_MULTIPLE_FENCE_WAIT_FLAG_ALL; + + if (FAILED(ID3D12Device1_SetEventOnMultipleFenceCompletion(ddev->dev, + fences, + values, + wait_count, + flags, + event))) { + STACK_ARRAY_FINISH(fences); + STACK_ARRAY_FINISH(values); + CloseHandle(event); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + DWORD timeout_ms; + + if (abs_timeout_ns == OS_TIMEOUT_INFINITE) { + timeout_ms = INFINITE; + } else { + uint64_t cur_time = os_time_get_nano(); + uint64_t rel_timeout_ns = + abs_timeout_ns > cur_time ? abs_timeout_ns - cur_time : 0; + + timeout_ms = (rel_timeout_ns / 1000000) + (rel_timeout_ns % 1000000 ? 1 : 0); + } + + DWORD res = + WaitForSingleObject(event, timeout_ms); + + CloseHandle(event); + + STACK_ARRAY_FINISH(fences); + STACK_ARRAY_FINISH(values); + + if (res == WAIT_TIMEOUT) + return VK_TIMEOUT; + else if (res != WAIT_OBJECT_0) + return vk_error(device, VK_ERROR_UNKNOWN); + + return VK_SUCCESS; +} + +const struct vk_sync_type dzn_sync_type = { + .size = sizeof(struct dzn_sync), + .features = (enum vk_sync_features) + (VK_SYNC_FEATURE_BINARY | + VK_SYNC_FEATURE_TIMELINE | + VK_SYNC_FEATURE_GPU_WAIT | + VK_SYNC_FEATURE_GPU_MULTI_WAIT | + VK_SYNC_FEATURE_CPU_WAIT | + VK_SYNC_FEATURE_CPU_RESET | + VK_SYNC_FEATURE_CPU_SIGNAL | + VK_SYNC_FEATURE_WAIT_ANY | + VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL), + + .init = dzn_sync_init, + .finish = dzn_sync_finish, + .signal = dzn_sync_signal, + .get_value = dzn_sync_get_value, + .reset = dzn_sync_reset, + .move = dzn_sync_move, + .wait_many = dzn_sync_wait, +}; diff --git a/src/microsoft/vulkan/dzn_sync.cpp b/src/microsoft/vulkan/dzn_sync.cpp deleted file mode 100644 index 4251e497352..00000000000 --- a/src/microsoft/vulkan/dzn_sync.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" - -#include "vk_alloc.h" -#include "vk_debug_report.h" -#include "vk_util.h" - -#include "util/macros.h" -#include "util/os_time.h" - -static VkResult -dzn_sync_init(struct vk_device *device, - struct vk_sync *sync, - uint64_t initial_value) -{ - struct dzn_sync *dsync = container_of(sync, struct dzn_sync, vk); - struct dzn_device *ddev = container_of(device, struct dzn_device, vk); - - assert(!(sync->flags & VK_SYNC_IS_SHAREABLE)); - - if (FAILED(ID3D12Device1_CreateFence(ddev->dev, initial_value, - D3D12_FENCE_FLAG_NONE, - IID_ID3D12Fence, - (void **)&dsync->fence))) - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - - return VK_SUCCESS; -} - -static void -dzn_sync_finish(struct vk_device *device, - struct vk_sync *sync) -{ - struct dzn_sync *dsync = container_of(sync, struct dzn_sync, vk); - - ID3D12Fence_Release(dsync->fence); -} - -static VkResult -dzn_sync_signal(struct vk_device *device, - struct vk_sync *sync, - uint64_t value) -{ - struct dzn_sync *dsync = container_of(sync, struct dzn_sync, vk); - - if (!(sync->flags & VK_SYNC_IS_TIMELINE)) - value = 1; - - if (FAILED(ID3D12Fence_Signal(dsync->fence, value))) - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - - return VK_SUCCESS; -} - -static VkResult -dzn_sync_get_value(struct vk_device *device, - struct vk_sync *sync, - uint64_t *value) -{ - struct dzn_sync *dsync = container_of(sync, struct dzn_sync, vk); - - *value = ID3D12Fence_GetCompletedValue(dsync->fence); - return VK_SUCCESS; -} - -static VkResult -dzn_sync_reset(struct vk_device *device, - struct vk_sync *sync) -{ - struct dzn_sync *dsync = container_of(sync, struct dzn_sync, vk); - - if (FAILED(ID3D12Fence_Signal(dsync->fence, 0))) - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - - return VK_SUCCESS; -} - -static VkResult -dzn_sync_move(struct vk_device *device, - struct vk_sync *dst, - struct vk_sync *src) -{ - struct dzn_device *ddev = container_of(device, struct dzn_device, vk); - struct dzn_sync *ddst = container_of(dst, struct dzn_sync, vk); - struct dzn_sync *dsrc = container_of(src, struct dzn_sync, vk); - ID3D12Fence *new_fence; - - if (FAILED(ID3D12Device1_CreateFence(ddev->dev, 0, - D3D12_FENCE_FLAG_NONE, - IID_ID3D12Fence, - (void **)&new_fence))) - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - - ID3D12Fence_Release(ddst->fence); - ddst->fence = dsrc->fence; - dsrc->fence = new_fence; - return VK_SUCCESS; -} - -static VkResult -dzn_sync_wait(struct vk_device *device, - uint32_t wait_count, - const struct vk_sync_wait *waits, - enum vk_sync_wait_flags wait_flags, - uint64_t abs_timeout_ns) -{ - struct dzn_device *ddev = container_of(device, struct dzn_device, vk); - - HANDLE event = CreateEventA(NULL, FALSE, FALSE, NULL); - if (event == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - STACK_ARRAY(ID3D12Fence *, fences, wait_count); - STACK_ARRAY(uint64_t, values, wait_count); - - for (uint32_t i = 0; i < wait_count; i++) { - struct dzn_sync *sync = container_of(waits[i].sync, struct dzn_sync, vk); - - fences[i] = sync->fence; - values[i] = (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? waits[i].wait_value : 1; - } - - D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags = - (wait_flags & VK_SYNC_WAIT_ANY) ? - D3D12_MULTIPLE_FENCE_WAIT_FLAG_ANY : - D3D12_MULTIPLE_FENCE_WAIT_FLAG_ALL; - - if (FAILED(ID3D12Device1_SetEventOnMultipleFenceCompletion(ddev->dev, - fences, - values, - wait_count, - flags, - event))) { - STACK_ARRAY_FINISH(fences); - STACK_ARRAY_FINISH(values); - CloseHandle(event); - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - DWORD timeout_ms; - - if (abs_timeout_ns == OS_TIMEOUT_INFINITE) { - timeout_ms = INFINITE; - } else { - uint64_t cur_time = os_time_get_nano(); - uint64_t rel_timeout_ns = - abs_timeout_ns > cur_time ? abs_timeout_ns - cur_time : 0; - - timeout_ms = (rel_timeout_ns / 1000000) + (rel_timeout_ns % 1000000 ? 1 : 0); - } - - DWORD res = - WaitForSingleObject(event, timeout_ms); - - CloseHandle(event); - - STACK_ARRAY_FINISH(fences); - STACK_ARRAY_FINISH(values); - - if (res == WAIT_TIMEOUT) - return VK_TIMEOUT; - else if (res != WAIT_OBJECT_0) - return vk_error(device, VK_ERROR_UNKNOWN); - - return VK_SUCCESS; -} - -const struct vk_sync_type dzn_sync_type = { - .size = sizeof(struct dzn_sync), - .features = (enum vk_sync_features) - (VK_SYNC_FEATURE_BINARY | - VK_SYNC_FEATURE_TIMELINE | - VK_SYNC_FEATURE_GPU_WAIT | - VK_SYNC_FEATURE_GPU_MULTI_WAIT | - VK_SYNC_FEATURE_CPU_WAIT | - VK_SYNC_FEATURE_CPU_RESET | - VK_SYNC_FEATURE_CPU_SIGNAL | - VK_SYNC_FEATURE_WAIT_ANY | - VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL), - - .init = dzn_sync_init, - .finish = dzn_sync_finish, - .signal = dzn_sync_signal, - .get_value = dzn_sync_get_value, - .reset = dzn_sync_reset, - .move = dzn_sync_move, - .wait_many = dzn_sync_wait, -}; diff --git a/src/microsoft/vulkan/dzn_wsi.c b/src/microsoft/vulkan/dzn_wsi.c new file mode 100644 index 00000000000..1cb95ef0a00 --- /dev/null +++ b/src/microsoft/vulkan/dzn_wsi.c @@ -0,0 +1,64 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" +#include "vk_util.h" + +static PFN_vkVoidFunction VKAPI_PTR +dzn_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) +{ + VK_FROM_HANDLE(dzn_physical_device, pdevice, physicalDevice); + return vk_instance_get_proc_addr_unchecked(pdevice->vk.instance, pName); +} + +void +dzn_wsi_finish(struct dzn_physical_device *physical_device) +{ + wsi_device_finish(&physical_device->wsi_device, + &physical_device->vk.instance->alloc); +} + +VkResult +dzn_wsi_init(struct dzn_physical_device *physical_device) +{ + VkResult result; + + /* TODO: implement a proper, non-sw winsys for D3D12 */ + bool sw_device = true; + + result = wsi_device_init(&physical_device->wsi_device, + dzn_physical_device_to_handle(physical_device), + dzn_wsi_proc_addr, + &physical_device->vk.instance->alloc, + -1, NULL, sw_device); + + if (result != VK_SUCCESS) + return result; + + physical_device->wsi_device.supports_modifiers = false; + physical_device->vk.wsi_device = &physical_device->wsi_device; + physical_device->wsi_device.signal_semaphore_with_memory = true; + physical_device->wsi_device.signal_fence_with_memory = true; + + return VK_SUCCESS; +} diff --git a/src/microsoft/vulkan/dzn_wsi.cpp b/src/microsoft/vulkan/dzn_wsi.cpp deleted file mode 100644 index 1cb95ef0a00..00000000000 --- a/src/microsoft/vulkan/dzn_wsi.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright © Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dzn_private.h" -#include "vk_util.h" - -static PFN_vkVoidFunction VKAPI_PTR -dzn_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) -{ - VK_FROM_HANDLE(dzn_physical_device, pdevice, physicalDevice); - return vk_instance_get_proc_addr_unchecked(pdevice->vk.instance, pName); -} - -void -dzn_wsi_finish(struct dzn_physical_device *physical_device) -{ - wsi_device_finish(&physical_device->wsi_device, - &physical_device->vk.instance->alloc); -} - -VkResult -dzn_wsi_init(struct dzn_physical_device *physical_device) -{ - VkResult result; - - /* TODO: implement a proper, non-sw winsys for D3D12 */ - bool sw_device = true; - - result = wsi_device_init(&physical_device->wsi_device, - dzn_physical_device_to_handle(physical_device), - dzn_wsi_proc_addr, - &physical_device->vk.instance->alloc, - -1, NULL, sw_device); - - if (result != VK_SUCCESS) - return result; - - physical_device->wsi_device.supports_modifiers = false; - physical_device->vk.wsi_device = &physical_device->wsi_device; - physical_device->wsi_device.signal_semaphore_with_memory = true; - physical_device->wsi_device.signal_fence_with_memory = true; - - return VK_SUCCESS; -} diff --git a/src/microsoft/vulkan/meson.build b/src/microsoft/vulkan/meson.build index 2d8ce7a8092..ac35a87c3c7 100644 --- a/src/microsoft/vulkan/meson.build +++ b/src/microsoft/vulkan/meson.build @@ -31,20 +31,20 @@ dzn_entrypoints = custom_target( ) libdzn_files = files( - 'dzn_cmd_buffer.cpp', - 'dzn_descriptor_set.cpp', - 'dzn_device.cpp', - 'dzn_image.cpp', - 'dzn_meta.cpp', + 'dzn_cmd_buffer.c', + 'dzn_descriptor_set.c', + 'dzn_device.c', + 'dzn_image.c', + 'dzn_meta.c', 'dzn_nir.c', - 'dzn_pass.cpp', - 'dzn_pipeline_cache.cpp', - 'dzn_pipeline.cpp', - 'dzn_query.cpp', - 'dzn_sync.cpp', + 'dzn_pass.c', + 'dzn_pipeline_cache.c', + 'dzn_pipeline.c', + 'dzn_query.c', + 'dzn_sync.c', 'dzn_util.cpp', 'dzn_util.c', - 'dzn_wsi.cpp', + 'dzn_wsi.c', ) dzn_deps = [ -- cgit v1.2.3