summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--meson.build5
-rw-r--r--meson_options.txt2
-rw-r--r--src/meson.build2
-rw-r--r--src/microsoft/meson.build5
-rw-r--r--src/microsoft/vulkan/dzn_cmd_buffer.cpp4119
-rw-r--r--src/microsoft/vulkan/dzn_cmd_exec_functions41
-rw-r--r--src/microsoft/vulkan/dzn_descriptor_set.cpp1802
-rw-r--r--src/microsoft/vulkan/dzn_device.cpp2632
-rw-r--r--src/microsoft/vulkan/dzn_image.cpp1220
-rw-r--r--src/microsoft/vulkan/dzn_meta.cpp744
-rw-r--r--src/microsoft/vulkan/dzn_nir.c513
-rw-r--r--src/microsoft/vulkan/dzn_nir.h138
-rw-r--r--src/microsoft/vulkan/dzn_pass.cpp159
-rw-r--r--src/microsoft/vulkan/dzn_pipeline.cpp1184
-rw-r--r--src/microsoft/vulkan/dzn_pipeline_cache.cpp99
-rw-r--r--src/microsoft/vulkan/dzn_private.h1060
-rw-r--r--src/microsoft/vulkan/dzn_query.cpp327
-rw-r--r--src/microsoft/vulkan/dzn_sync.cpp203
-rw-r--r--src/microsoft/vulkan/dzn_util.c234
-rw-r--r--src/microsoft/vulkan/dzn_util.cpp226
-rw-r--r--src/microsoft/vulkan/dzn_wsi.cpp64
-rw-r--r--src/microsoft/vulkan/meson.build123
-rw-r--r--src/microsoft/vulkan/vulkan_dzn.def4
23 files changed, 14901 insertions, 5 deletions
diff --git a/meson.build b/meson.build
index 6724d2cd23e..507c9730095 100644
--- a/meson.build
+++ b/meson.build
@@ -276,6 +276,7 @@ with_freedreno_virtio = get_option('freedreno-virtio')
with_broadcom_vk = _vulkan_drivers.contains('broadcom')
with_imagination_vk = _vulkan_drivers.contains('imagination-experimental')
with_imagination_srv = get_option('imagination-srv')
+with_microsoft_vk = _vulkan_drivers.contains('microsoft-experimental')
with_any_vk = _vulkan_drivers.length() != 0
with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
@@ -667,11 +668,11 @@ if with_gallium_zink
endif
dep_dxheaders = null_dep
-if with_gallium_d3d12 or with_microsoft_clc
+if with_gallium_d3d12 or with_microsoft_clc or with_microsoft_vk
dep_dxheaders = dependency('directx-headers', required : false)
if not dep_dxheaders.found()
dep_dxheaders = dependency('DirectX-Headers', fallback : ['DirectX-Headers', 'dep_dxheaders'],
- required : with_gallium_d3d12
+ required : with_gallium_d3d12 or with_microsoft_vk
)
endif
endif
diff --git a/meson_options.txt b/meson_options.txt
index 7eaf69e28d8..9e51ccd3ac7 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -192,7 +192,7 @@ option(
'vulkan-drivers',
type : 'array',
value : ['auto'],
- choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'panfrost', 'swrast', 'virtio-experimental'],
+ choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'microsoft-experimental', 'panfrost', 'swrast', 'virtio-experimental'],
description : 'List of vulkan drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
)
option(
diff --git a/src/meson.build b/src/meson.build
index 2ef90f2efba..1d42e08df30 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -104,7 +104,7 @@ endif
if with_gallium_virgl or with_virtio_vk
subdir('virtio')
endif
-if with_microsoft_clc or with_gallium_d3d12 or with_spirv_to_dxil
+if with_microsoft_clc or with_gallium_d3d12 or with_spirv_to_dxil or with_microsoft_vk
subdir('microsoft')
endif
if with_gallium_nouveau
diff --git a/src/microsoft/meson.build b/src/microsoft/meson.build
index a243d9e0f70..ea961bcefef 100644
--- a/src/microsoft/meson.build
+++ b/src/microsoft/meson.build
@@ -26,6 +26,9 @@ endif
if with_gallium_d3d12
subdir('resource_state_manager')
endif
-if with_spirv_to_dxil
+if with_spirv_to_dxil or with_microsoft_vk
subdir('spirv_to_dxil')
endif
+if with_microsoft_vk
+ subdir('vulkan')
+endif
diff --git a/src/microsoft/vulkan/dzn_cmd_buffer.cpp b/src/microsoft/vulkan/dzn_cmd_buffer.cpp
new file mode 100644
index 00000000000..bb9cf426c92
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_cmd_buffer.cpp
@@ -0,0 +1,4119 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include "vk_alloc.h"
+#include "vk_debug_report.h"
+#include "vk_format.h"
+#include "vk_util.h"
+
+static void
+dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf)
+{
+ if (!cbuf)
+ return;
+
+ dzn_cmd_buffer *cmdbuf = container_of(cbuf, dzn_cmd_buffer, vk);
+ dzn_device *device = container_of(cbuf->base.device, dzn_device, vk);
+
+ if (cmdbuf->cmdlist)
+ cmdbuf->cmdlist->Release();
+
+ if (cmdbuf->cmdalloc)
+ cmdbuf->cmdalloc->Release();
+
+ list_for_each_entry_safe(dzn_internal_resource, res, &cmdbuf->internal_bufs, link) {
+ list_del(&res->link);
+ res->res->Release();
+ vk_free(&cbuf->pool->alloc, res);
+ }
+
+ dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool);
+ dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool);
+ dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool);
+ dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool);
+ util_dynarray_fini(&cmdbuf->events.wait);
+ util_dynarray_fini(&cmdbuf->events.signal);
+ util_dynarray_fini(&cmdbuf->queries.reset);
+ util_dynarray_fini(&cmdbuf->queries.wait);
+ util_dynarray_fini(&cmdbuf->queries.signal);
+
+ if (cmdbuf->rtvs.ht) {
+ hash_table_foreach(cmdbuf->rtvs.ht, he)
+ vk_free(&cbuf->pool->alloc, he->data);
+ _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL);
+ }
+
+ if (cmdbuf->dsvs.ht) {
+ hash_table_foreach(cmdbuf->dsvs.ht, he)
+ vk_free(&cbuf->pool->alloc, he->data);
+ _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL);
+ }
+
+ if (cmdbuf->events.ht)
+ _mesa_hash_table_destroy(cmdbuf->events.ht, NULL);
+
+ if (cmdbuf->queries.ht) {
+ hash_table_foreach(cmdbuf->queries.ht, he) {
+ dzn_cmd_buffer_query_pool_state *qpstate =
+ (dzn_cmd_buffer_query_pool_state *)he->data;
+ util_dynarray_fini(&qpstate->reset);
+ util_dynarray_fini(&qpstate->collect);
+ util_dynarray_fini(&qpstate->wait);
+ util_dynarray_fini(&qpstate->signal);
+ vk_free(&cbuf->pool->alloc, he->data);
+ }
+ _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL);
+ }
+
+ vk_command_buffer_finish(&cmdbuf->vk);
+ vk_free(&cbuf->pool->alloc, cmdbuf);
+}
+
+static uint32_t
+dzn_cmd_buffer_rtv_key_hash_function(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(dzn_cmd_buffer_rtv_key));
+}
+
+static bool
+dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b)
+{
+ return memcmp(a, b, sizeof(dzn_cmd_buffer_rtv_key)) == 0;
+}
+
+static uint32_t
+dzn_cmd_buffer_dsv_key_hash_function(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(dzn_cmd_buffer_dsv_key));
+}
+
+static bool
+dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b)
+{
+ return memcmp(a, b, sizeof(dzn_cmd_buffer_dsv_key)) == 0;
+}
+
+static VkResult
+dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info,
+ VkCommandBuffer *out)
+{
+ VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool);
+ dzn_device *device = container_of(pool->base.device, dzn_device, vk);
+ dzn_physical_device *pdev =
+ container_of(device->vk.physical, dzn_physical_device, vk);
+
+ assert(pool->queue_family_index < pdev->queue_family_count);
+
+ D3D12_COMMAND_LIST_TYPE type =
+ pdev->queue_families[pool->queue_family_index].desc.Type;
+
+ dzn_cmd_buffer *cmdbuf = (dzn_cmd_buffer *)
+ vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!cmdbuf)
+ return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ VkResult result =
+ vk_command_buffer_init(&cmdbuf->vk, pool, info->level);
+ if (result != VK_SUCCESS) {
+ vk_free(&pool->alloc, cmdbuf);
+ return result;
+ }
+
+ memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
+ list_inithead(&cmdbuf->internal_bufs);
+ util_dynarray_init(&cmdbuf->events.wait, NULL);
+ util_dynarray_init(&cmdbuf->events.signal, NULL);
+ util_dynarray_init(&cmdbuf->queries.reset, NULL);
+ util_dynarray_init(&cmdbuf->queries.wait, NULL);
+ util_dynarray_init(&cmdbuf->queries.signal, NULL);
+ dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device,
+ D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
+ false, &pool->alloc);
+ dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device,
+ D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
+ false, &pool->alloc);
+ dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device,
+ D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
+ true, &pool->alloc);
+ dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device,
+ D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
+ true, &pool->alloc);
+
+ cmdbuf->events.ht =
+ _mesa_pointer_hash_table_create(NULL);
+ cmdbuf->queries.ht =
+ _mesa_pointer_hash_table_create(NULL);
+ cmdbuf->rtvs.ht =
+ _mesa_hash_table_create(NULL,
+ dzn_cmd_buffer_rtv_key_hash_function,
+ dzn_cmd_buffer_rtv_key_equals_function);
+ cmdbuf->dsvs.ht =
+ _mesa_hash_table_create(NULL,
+ dzn_cmd_buffer_dsv_key_hash_function,
+ dzn_cmd_buffer_dsv_key_equals_function);
+ if (!cmdbuf->events.ht || !cmdbuf->queries.ht ||
+ !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) {
+ result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto out;
+ }
+
+ cmdbuf->vk.destroy = dzn_cmd_buffer_destroy;
+
+ if (FAILED(device->dev->CreateCommandAllocator(type,
+ IID_PPV_ARGS(&cmdbuf->cmdalloc)))) {
+ result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto out;
+ }
+
+ if (FAILED(device->dev->CreateCommandList(0, type,
+ cmdbuf->cmdalloc, NULL,
+ IID_PPV_ARGS(&cmdbuf->cmdlist)))) {
+ result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto out;
+ }
+
+out:
+ if (result != VK_SUCCESS)
+ dzn_cmd_buffer_destroy(&cmdbuf->vk);
+ else
+ *out = dzn_cmd_buffer_to_handle(cmdbuf);
+
+ return result;
+}
+
+VkResult
+dzn_cmd_buffer_reset(dzn_cmd_buffer *cmdbuf)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+
+ /* Reset the state */
+ memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
+
+ /* TODO: Return resources to the pool */
+ list_for_each_entry_safe(dzn_internal_resource, res, &cmdbuf->internal_bufs, link) {
+ list_del(&res->link);
+ res->res->Release();
+ vk_free(&cmdbuf->vk.pool->alloc, res);
+ }
+
+ cmdbuf->error = VK_SUCCESS;
+ util_dynarray_clear(&cmdbuf->events.wait);
+ util_dynarray_clear(&cmdbuf->events.signal);
+ util_dynarray_clear(&cmdbuf->queries.reset);
+ util_dynarray_clear(&cmdbuf->queries.wait);
+ util_dynarray_clear(&cmdbuf->queries.signal);
+ hash_table_foreach(cmdbuf->rtvs.ht, he)
+ vk_free(&cmdbuf->vk.pool->alloc, he->data);
+ _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL);
+ dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool);
+ hash_table_foreach(cmdbuf->dsvs.ht, he)
+ vk_free(&cmdbuf->vk.pool->alloc, he->data);
+ _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL);
+ hash_table_foreach(cmdbuf->queries.ht, he) {
+ dzn_cmd_buffer_query_pool_state *qpstate =
+ (dzn_cmd_buffer_query_pool_state *)he->data;
+ util_dynarray_fini(&qpstate->reset);
+ util_dynarray_fini(&qpstate->collect);
+ util_dynarray_fini(&qpstate->wait);
+ util_dynarray_fini(&qpstate->signal);
+ vk_free(&cmdbuf->vk.pool->alloc, he->data);
+ }
+ _mesa_hash_table_clear(cmdbuf->queries.ht, NULL);
+ _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
+ dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool);
+ dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool);
+ dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool);
+ vk_command_buffer_reset(&cmdbuf->vk);
+
+ /* cmdlist->Reset() doesn't return the memory back the the command list
+ * allocator, and cmdalloc->Reset() can only be called if there's no live
+ * cmdlist allocated from the allocator, so we need to release and create
+ * a new command list.
+ */
+ cmdbuf->cmdlist->Release();
+ cmdbuf->cmdlist = NULL;
+ cmdbuf->cmdalloc->Reset();
+ if (FAILED(device->dev->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT,
+ cmdbuf->cmdalloc, NULL,
+ IID_PPV_ARGS(&cmdbuf->cmdlist)))) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ return cmdbuf->error;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_AllocateCommandBuffers(VkDevice device,
+ const VkCommandBufferAllocateInfo *pAllocateInfo,
+ VkCommandBuffer *pCommandBuffers)
+{
+ VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool);
+ VK_FROM_HANDLE(dzn_device, dev, device);
+ VkResult result = VK_SUCCESS;
+ uint32_t i;
+
+ for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
+ result = dzn_cmd_buffer_create(pAllocateInfo,
+ &pCommandBuffers[i]);
+ if (result != VK_SUCCESS)
+ break;
+ }
+
+ if (result != VK_SUCCESS) {
+ dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool,
+ i, pCommandBuffers);
+ for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
+ pCommandBuffers[i] = VK_NULL_HANDLE;
+ }
+
+ return result;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_ResetCommandBuffer(VkCommandBuffer commandBuffer,
+ VkCommandBufferResetFlags flags)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ return dzn_cmd_buffer_reset(cmdbuf);
+}
+
+VkResult
+dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,
+ const VkCommandBufferBeginInfo *info)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ /* If this is the first vkBeginCommandBuffer, we must *initialize* the
+ * command buffer's state. Otherwise, we must *reset* its state. In both
+ * cases we reset it.
+ *
+ * From the Vulkan 1.0 spec:
+ *
+ * If a command buffer is in the executable state and the command buffer
+ * was allocated from a command pool with the
+ * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then
+ * vkBeginCommandBuffer implicitly resets the command buffer, behaving
+ * as if vkResetCommandBuffer had been called with
+ * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts
+ * the command buffer in the recording state.
+ */
+ return dzn_cmd_buffer_reset(cmdbuf);
+}
+
+static void
+dzn_cmd_buffer_gather_events(dzn_cmd_buffer *cmdbuf)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+
+ if (cmdbuf->error != VK_SUCCESS)
+ goto out;
+
+ hash_table_foreach(cmdbuf->events.ht, he) {
+ enum dzn_event_state state = (enum dzn_event_state)(uintptr_t)he->data;
+
+ if (state != DZN_EVENT_STATE_EXTERNAL_WAIT) {
+ dzn_cmd_event_signal signal = { (dzn_event *)he->key, state == DZN_EVENT_STATE_SET };
+ dzn_cmd_event_signal *entry = (dzn_cmd_event_signal *)
+ util_dynarray_grow(&cmdbuf->events.signal, dzn_cmd_event_signal, 1);
+
+ if (!entry) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ break;
+ }
+
+ *entry = signal;
+ }
+ }
+
+out:
+ _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
+}
+
+static VkResult
+dzn_cmd_buffer_dynbitset_reserve(dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+
+ if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
+ return VK_SUCCESS;
+
+ unsigned old_sz = array->size;
+ void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS);
+ if (!ptr) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return cmdbuf->error;
+ }
+
+ memset(ptr, 0, array->size - old_sz);
+ return VK_SUCCESS;
+}
+
+static bool
+dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit)
+{
+ uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
+
+ if (bit < nbits)
+ return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit);
+
+ return false;
+}
+
+static VkResult
+dzn_cmd_buffer_dynbitset_set(dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+
+ VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit);
+ if (result != VK_SUCCESS)
+ return result;
+
+ BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit);
+ return VK_SUCCESS;
+}
+
+static void
+dzn_cmd_buffer_dynbitset_clear(dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+
+ if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
+ return;
+
+ BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit);
+}
+
+static VkResult
+dzn_cmd_buffer_dynbitset_set_range(dzn_cmd_buffer *cmdbuf, struct util_dynarray *array,
+ uint32_t bit, uint32_t count)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+
+ VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1);
+ if (result != VK_SUCCESS)
+ return result;
+
+ BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1);
+ return VK_SUCCESS;
+}
+
+static void
+dzn_cmd_buffer_dynbitset_clear_range(dzn_cmd_buffer *cmdbuf, struct util_dynarray *array,
+ uint32_t bit, uint32_t count)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
+
+ if (!nbits)
+ return;
+
+ uint32_t end = MIN2(bit + count, nbits) - 1;
+
+ while (bit <= end) {
+ uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32));
+ BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1);
+ bit += subcount;
+ }
+}
+
+static dzn_cmd_buffer_query_pool_state *
+dzn_cmd_buffer_create_query_pool_state(dzn_cmd_buffer *cmdbuf)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ dzn_cmd_buffer_query_pool_state *state = (dzn_cmd_buffer_query_pool_state *)
+ vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!state) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return NULL;
+ }
+
+ util_dynarray_init(&state->reset, NULL);
+ util_dynarray_init(&state->collect, NULL);
+ util_dynarray_init(&state->wait, NULL);
+ util_dynarray_init(&state->signal, NULL);
+ return state;
+}
+
+static void
+dzn_cmd_buffer_destroy_query_pool_state(dzn_cmd_buffer *cmdbuf,
+ dzn_cmd_buffer_query_pool_state *state)
+{
+ util_dynarray_fini(&state->reset);
+ util_dynarray_fini(&state->collect);
+ util_dynarray_fini(&state->wait);
+ util_dynarray_fini(&state->signal);
+ vk_free(&cmdbuf->vk.pool->alloc, state);
+}
+
+static dzn_cmd_buffer_query_pool_state *
+dzn_cmd_buffer_get_query_pool_state(dzn_cmd_buffer *cmdbuf,
+ dzn_query_pool *qpool)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ dzn_cmd_buffer_query_pool_state *state = NULL;
+ struct hash_entry *he =
+ _mesa_hash_table_search(cmdbuf->queries.ht, qpool);
+
+ if (!he) {
+ state = dzn_cmd_buffer_create_query_pool_state(cmdbuf);
+ if (!state)
+ return NULL;
+
+ he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state);
+ if (!he) {
+ dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state);
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return NULL;
+ }
+ } else {
+ state = (dzn_cmd_buffer_query_pool_state *)he->data;
+ }
+
+ return state;
+}
+
+static VkResult
+dzn_cmd_buffer_collect_queries(dzn_cmd_buffer *cmdbuf,
+ const dzn_query_pool *qpool,
+ dzn_cmd_buffer_query_pool_state *state,
+ uint32_t first_query,
+ uint32_t query_count)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ uint32_t nbits = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS;
+ uint32_t start, end;
+
+ query_count = MIN2(query_count, nbits - first_query);
+ nbits = MIN2(first_query + query_count, nbits);
+
+ VkResult result =
+ dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1);
+ if (result != VK_SUCCESS)
+ return result;
+
+ BITSET_WORD *collect =
+ util_dynarray_element(&state->collect, BITSET_WORD, 0);
+ for (start = first_query, end = first_query,
+ __bitset_next_range(&start, &end, collect, nbits);
+ start < nbits;
+ __bitset_next_range(&start, &end, collect, nbits)) {
+ cmdbuf->cmdlist->ResolveQueryData(qpool->heap, qpool->queries[start].type,
+ start, end - start,
+ qpool->resolve_buffer,
+ qpool->query_size * start);
+ }
+
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = qpool->resolve_buffer,
+ .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST,
+ .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE,
+ },
+ };
+ uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query);
+ uint32_t size = dzn_query_pool_get_result_size(qpool, query_count);
+
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+
+ cmdbuf->cmdlist->CopyBufferRegion(qpool->collect_buffer, offset,
+ qpool->resolve_buffer, offset,
+ size);
+
+ for (start = first_query, end = first_query,
+ __bitset_next_range(&start, &end, collect, nbits);
+ start < nbits;
+ __bitset_next_range(&start, &end, collect, nbits)) {
+ uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
+ uint32_t count = end - start;
+
+ for (unsigned i = 0; i < count; i+= step) {
+ uint32_t sub_count = MIN2(step, count - i);
+
+ cmdbuf->cmdlist->CopyBufferRegion(qpool->collect_buffer,
+ dzn_query_pool_get_availability_offset(qpool, start + i),
+ device->queries.refs,
+ DZN_QUERY_REFS_ALL_ONES_OFFSET,
+ sizeof(uint64_t) * sub_count);
+ }
+
+ dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count);
+ dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, start, count);
+ }
+
+ DZN_SWAP(barrier.Transition.StateBefore, barrier.Transition.StateAfter);
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_cmd_buffer_collect_query_ops(dzn_cmd_buffer *cmdbuf,
+ dzn_query_pool *qpool,
+ struct util_dynarray *bitset_array,
+ struct util_dynarray *ops_array)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0);
+ uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS;
+ uint32_t start, end;
+
+ BITSET_FOREACH_RANGE(start, end, bitset, nbits) {
+ dzn_cmd_buffer_query_range range { qpool, start, end - start };
+ dzn_cmd_buffer_query_range *entry = (dzn_cmd_buffer_query_range *)
+ util_dynarray_grow(ops_array, dzn_cmd_buffer_query_range, 1);
+
+ if (!entry) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return cmdbuf->error;
+ }
+
+ *entry = range;
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_cmd_buffer_gather_queries(dzn_cmd_buffer *cmdbuf)
+{
+ hash_table_foreach(cmdbuf->queries.ht, he) {
+ dzn_query_pool *qpool = (dzn_query_pool *)he->key;
+ dzn_cmd_buffer_query_pool_state *state =
+ (dzn_cmd_buffer_query_pool_state *)he->data;
+ VkResult result =
+ dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->wait, &cmdbuf->queries.wait);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
+ dzn_cmd_buffer_gather_events(cmdbuf);
+ dzn_cmd_buffer_gather_queries(cmdbuf);
+ HRESULT hres = cmdbuf->cmdlist->Close();
+ if (FAILED(hres))
+ cmdbuf->error = vk_error(cmdbuf->vk.base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ } else {
+ cmdbuf->error = cmdbuf->vk.cmd_queue.error;
+ }
+
+ assert(cmdbuf->error == VK_SUCCESS);
+ return cmdbuf->error;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
+ const VkDependencyInfo *info)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ bool execution_barrier =
+ !info->memoryBarrierCount &&
+ !info->bufferMemoryBarrierCount &&
+ !info->imageMemoryBarrierCount;
+
+ if (execution_barrier) {
+ /* Execution barrier can be emulated with a NULL UAV barrier (AKA
+ * pipeline flush). That's the best we can do with the standard D3D12
+ * barrier API.
+ */
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .UAV = { .pResource = NULL },
+ };
+
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+
+ /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers.
+ * Scopes are not taken into account, but that's inherent to the current
+ * D3D12 barrier API.
+ */
+ if (info->memoryBarrierCount) {
+ D3D12_RESOURCE_BARRIER barriers[2] = {};
+
+ barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
+ barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+ barriers[0].UAV.pResource = NULL;
+ barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
+ barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+ barriers[1].Aliasing.pResourceBefore = NULL;
+ barriers[1].Aliasing.pResourceAfter = NULL;
+ cmdbuf->cmdlist->ResourceBarrier(2, barriers);
+ }
+
+ for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) {
+ VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
+ D3D12_RESOURCE_BARRIER barrier = {};
+
+ /* UAV are used only for storage buffers, skip all other buffers. */
+ if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
+ continue;
+
+ barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
+ barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+ barrier.UAV.pResource = buf->res;
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+
+ for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) {
+ const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i];
+ const VkImageSubresourceRange *range = &ibarrier->subresourceRange;
+ VK_FROM_HANDLE(dzn_image, image, ibarrier->image);
+
+ /* We use placed resource's simple model, in which only one resource
+ * pointing to a given heap is active at a given time. To make the
+ * resource active we need to add an aliasing barrier.
+ */
+ D3D12_RESOURCE_BARRIER aliasing_barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Aliasing = {
+ .pResourceBefore = NULL,
+ .pResourceAfter = image->res,
+ },
+ };
+
+ cmdbuf->cmdlist->ResourceBarrier(1, &aliasing_barrier);
+
+ D3D12_RESOURCE_BARRIER transition_barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = image->res,
+ .StateAfter = dzn_image_layout_to_state(ibarrier->newLayout),
+ },
+ };
+
+ if (ibarrier->oldLayout == VK_IMAGE_LAYOUT_UNDEFINED ||
+ ibarrier->oldLayout == VK_IMAGE_LAYOUT_PREINITIALIZED)
+ transition_barrier.Transition.StateBefore = image->mem->initial_state;
+ else
+ transition_barrier.Transition.StateBefore = dzn_image_layout_to_state(ibarrier->oldLayout);
+
+ if (transition_barrier.Transition.StateBefore == transition_barrier.Transition.StateAfter)
+ continue;
+
+ /* some layouts map to the same states, and NOP-barriers are illegal */
+ uint32_t layer_count = dzn_get_layer_count(image, range);
+ uint32_t level_count = dzn_get_level_count(image, range);
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ for (uint32_t lvl = 0; lvl < level_count; lvl++) {
+ dzn_foreach_aspect(aspect, range->aspectMask) {
+ transition_barrier.Transition.Subresource =
+ dzn_image_range_get_subresource_index(image, range, aspect, lvl, layer);
+ cmdbuf->cmdlist->ResourceBarrier(1, &transition_barrier);
+ }
+ }
+ }
+ }
+}
+
+D3D12_CPU_DESCRIPTOR_HANDLE
+dzn_cmd_buffer_get_dsv(dzn_cmd_buffer *cmdbuf,
+ const dzn_image *image,
+ const D3D12_DEPTH_STENCIL_VIEW_DESC *desc)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ dzn_cmd_buffer_dsv_key key { image, *desc };
+ struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key);
+ struct dzn_cmd_buffer_dsv_entry *dsve;
+
+ if (!he) {
+ dzn_descriptor_heap *heap;
+ uint32_t slot;
+
+ // TODO: error handling
+ dsve = (dzn_cmd_buffer_dsv_entry *)
+ vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ dsve->key = key;
+ dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot);
+ dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
+ device->dev->CreateDepthStencilView(image->res, desc, dsve->handle);
+ _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve);
+ } else {
+ dsve = (dzn_cmd_buffer_dsv_entry *)he->data;
+ }
+
+ return dsve->handle;
+}
+
+D3D12_CPU_DESCRIPTOR_HANDLE
+dzn_cmd_buffer_get_rtv(dzn_cmd_buffer *cmdbuf,
+ const dzn_image *image,
+ const D3D12_RENDER_TARGET_VIEW_DESC *desc)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ dzn_cmd_buffer_rtv_key key { image, *desc };
+ struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key);
+ struct dzn_cmd_buffer_rtv_entry *rtve;
+
+ if (!he) {
+ struct dzn_descriptor_heap *heap;
+ uint32_t slot;
+
+ // TODO: error handling
+ rtve = (dzn_cmd_buffer_rtv_entry *)
+ vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ rtve->key = key;
+ dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
+ rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
+ device->dev->CreateRenderTargetView(image->res, desc, rtve->handle);
+ he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve);
+ } else {
+ rtve = (dzn_cmd_buffer_rtv_entry *)he->data;
+ }
+
+ return rtve->handle;
+}
+
+static VkResult
+dzn_cmd_buffer_alloc_internal_buf(dzn_cmd_buffer *cmdbuf,
+ uint32_t size,
+ D3D12_HEAP_TYPE heap_type,
+ D3D12_RESOURCE_STATES init_state,
+ ID3D12Resource **out)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ ComPtr<ID3D12Resource> res;
+ *out = NULL;
+
+ /* Align size on 64k (the default alignment) */
+ size = ALIGN_POT(size, 64 * 1024);
+
+ D3D12_HEAP_PROPERTIES hprops =
+ device->dev->GetCustomHeapProperties(0, heap_type);
+ D3D12_RESOURCE_DESC rdesc = {
+ .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
+ .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
+ .Width = size,
+ .Height = 1,
+ .DepthOrArraySize = 1,
+ .MipLevels = 1,
+ .Format = DXGI_FORMAT_UNKNOWN,
+ .SampleDesc = { .Count = 1, .Quality = 0 },
+ .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
+ .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
+ };
+
+ HRESULT hres =
+ device->dev->CreateCommittedResource(&hprops, D3D12_HEAP_FLAG_NONE, &rdesc,
+ init_state,
+ NULL, IID_PPV_ARGS(&res));
+ if (FAILED(hres)) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ return cmdbuf->error;
+ }
+
+ dzn_internal_resource *entry = (dzn_internal_resource *)
+ vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!entry) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ return cmdbuf->error;
+ }
+
+ entry->res = res.Detach();
+ list_addtail(&entry->link, &cmdbuf->internal_bufs);
+ *out = entry->res;
+ return VK_SUCCESS;
+}
+
+static void
+dzn_cmd_buffer_clear_rects_with_copy(dzn_cmd_buffer *cmdbuf,
+ const dzn_image *image,
+ VkImageLayout layout,
+ const VkClearColorValue *color,
+ const VkImageSubresourceRange *range,
+ uint32_t rect_count, D3D12_RECT *rects)
+{
+ enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
+ uint32_t blksize = util_format_get_blocksize(pfmt);
+ uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = {};
+ uint32_t raw[4] = {};
+
+ assert(blksize <= sizeof(raw));
+ assert(!(sizeof(buf) % blksize));
+
+ util_format_write_4(pfmt, (void *)color, 0, (void *)raw, 0, 0, 0, 1, 1);
+
+ uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
+ while (fill_step % blksize)
+ fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
+
+ uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel);
+ uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel);
+ uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step);
+ uint32_t res_size = max_h * row_pitch;
+
+ assert(fill_step <= sizeof(buf));
+
+ for (uint32_t i = 0; i < fill_step; i += blksize)
+ memcpy(&buf[i], raw, blksize);
+
+ ID3D12Resource *src_res;
+
+ VkResult result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
+ D3D12_HEAP_TYPE_UPLOAD,
+ D3D12_RESOURCE_STATE_GENERIC_READ,
+ &src_res);
+ if (result != VK_SUCCESS)
+ return;
+
+ assert(!(res_size % fill_step));
+
+ uint8_t *cpu_ptr;
+ src_res->Map(0, NULL, (void **)&cpu_ptr);
+ for (uint32_t i = 0; i < res_size; i += fill_step)
+ memcpy(&cpu_ptr[i], buf, fill_step);
+
+ src_res->Unmap(0, NULL);
+
+ D3D12_TEXTURE_COPY_LOCATION src_loc = {
+ .pResource = src_res,
+ .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
+ .PlacedFootprint = {
+ .Offset = 0,
+ .Footprint = {
+ .Width = max_w,
+ .Height = max_h,
+ .Depth = 1,
+ .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step),
+ },
+ },
+ };
+
+ D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(layout);
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = src_res,
+ .StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ,
+ .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE,
+ },
+ };
+
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+
+ barrier.Transition.pResource = image->res;
+
+ assert(dzn_get_level_count(image, range) == 1);
+ uint32_t layer_count = dzn_get_layer_count(image, range);
+
+ dzn_foreach_aspect(aspect, range->aspectMask) {
+ VkImageSubresourceLayers subres = {
+ .aspectMask = (VkImageAspectFlags)aspect,
+ .mipLevel = range->baseMipLevel,
+ .baseArrayLayer = range->baseArrayLayer,
+ .layerCount = layer_count,
+ };
+
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) {
+ barrier.Transition.Subresource =
+ dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
+ barrier.Transition.StateBefore = dst_state;
+ barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+
+ D3D12_TEXTURE_COPY_LOCATION dst_loc =
+ dzn_image_get_copy_loc(image, &subres, aspect, layer);
+
+ src_loc.PlacedFootprint.Footprint.Format =
+ dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
+ dst_loc.PlacedFootprint.Footprint.Format :
+ image->desc.Format;
+
+ for (uint32_t r = 0; r < rect_count; r++) {
+ D3D12_BOX src_box = {
+ .left = 0,
+ .top = 0,
+ .front = 0,
+ .right = (UINT)(rects[r].right - rects[r].left),
+ .bottom = (UINT)(rects[r].bottom - rects[r].top),
+ .back = 1,
+ };
+
+ cmdbuf->cmdlist->CopyTextureRegion(&dst_loc,
+ rects[r].left, rects[r].top, 0,
+ &src_loc, &src_box);
+ }
+
+ if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) {
+ barrier.Transition.StateAfter = dst_state;
+ barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+ }
+ }
+}
+
+static VkClearColorValue
+adjust_clear_color(VkFormat format, const VkClearColorValue &col)
+{
+ VkClearColorValue out = col;
+
+ // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things
+ // manually where it matters, like here, in the clear path.
+ if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
+ DZN_SWAP(out.float32[0], out.float32[1]);
+ DZN_SWAP(out.float32[2], out.float32[3]);
+ }
+
+ return out;
+}
+
+static void
+dzn_cmd_buffer_clear_ranges_with_copy(dzn_cmd_buffer *cmdbuf,
+ const dzn_image *image,
+ VkImageLayout layout,
+ const VkClearColorValue *color,
+ uint32_t range_count,
+ const VkImageSubresourceRange *ranges)
+{
+ enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
+ uint32_t blksize = util_format_get_blocksize(pfmt);
+ uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = {};
+ uint32_t raw[4] = {};
+
+ assert(blksize <= sizeof(raw));
+ assert(!(sizeof(buf) % blksize));
+
+ util_format_write_4(pfmt, (void *)color, 0, (void *)raw, 0, 0, 0, 1, 1);
+
+ uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
+ while (fill_step % blksize)
+ fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
+
+ uint32_t res_size = 0;
+ for (uint32_t r = 0; r < range_count; r++) {
+ uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel);
+ uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel);
+ uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel);
+ uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step);
+
+ res_size = MAX2(res_size, h * d * row_pitch);
+ }
+
+ assert(fill_step <= sizeof(buf));
+
+ for (uint32_t i = 0; i < fill_step; i += blksize)
+ memcpy(&buf[i], raw, blksize);
+
+ ID3D12Resource *src_res;
+
+ VkResult result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
+ D3D12_HEAP_TYPE_UPLOAD,
+ D3D12_RESOURCE_STATE_GENERIC_READ,
+ &src_res);
+ if (result != VK_SUCCESS)
+ return;
+
+ assert(!(res_size % fill_step));
+
+ uint8_t *cpu_ptr;
+ src_res->Map(0, NULL, (void **)&cpu_ptr);
+ for (uint32_t i = 0; i < res_size; i += fill_step)
+ memcpy(&cpu_ptr[i], buf, fill_step);
+
+ src_res->Unmap(0, NULL);
+
+ D3D12_TEXTURE_COPY_LOCATION src_loc = {
+ .pResource = src_res,
+ .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
+ .PlacedFootprint = {
+ .Offset = 0,
+ },
+ };
+
+ D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(layout);
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = src_res,
+ .StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ,
+ .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE,
+ },
+ };
+
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+
+ barrier.Transition.pResource = image->res;
+ for (uint32_t r = 0; r < range_count; r++) {
+ uint32_t level_count = dzn_get_level_count(image, &ranges[r]);
+ uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]);
+
+ dzn_foreach_aspect(aspect, ranges[r].aspectMask) {
+ for (uint32_t lvl = 0; lvl < level_count; lvl++) {
+ uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl);
+ uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl);
+ uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl);
+ VkImageSubresourceLayers subres = {
+ .aspectMask = (VkImageAspectFlags)aspect,
+ .mipLevel = ranges[r].baseMipLevel + lvl,
+ .baseArrayLayer = ranges[r].baseArrayLayer,
+ .layerCount = layer_count,
+ };
+
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) {
+ barrier.Transition.Subresource =
+ dzn_image_range_get_subresource_index(image, &ranges[r], aspect, lvl, layer);
+ barrier.Transition.StateBefore = dst_state;
+ barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+
+ D3D12_TEXTURE_COPY_LOCATION dst_loc =
+ dzn_image_get_copy_loc(image, &subres, aspect, layer);
+
+ src_loc.PlacedFootprint.Footprint.Format =
+ dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
+ dst_loc.PlacedFootprint.Footprint.Format :
+ image->desc.Format;
+ src_loc.PlacedFootprint.Footprint.Width = w;
+ src_loc.PlacedFootprint.Footprint.Height = h;
+ src_loc.PlacedFootprint.Footprint.Depth = d;
+ src_loc.PlacedFootprint.Footprint.RowPitch =
+ ALIGN_NPOT(w * blksize, fill_step);
+ D3D12_BOX src_box = {
+ .left = 0,
+ .top = 0,
+ .front = 0,
+ .right = w,
+ .bottom = h,
+ .back = d,
+ };
+
+ cmdbuf->cmdlist->CopyTextureRegion(&dst_loc, 0, 0, 0,
+ &src_loc, &src_box);
+
+ if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) {
+ barrier.Transition.StateAfter = dst_state;
+ barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+ }
+ }
+ }
+ }
+}
+
+static void
+dzn_cmd_buffer_clear_attachment(dzn_cmd_buffer *cmdbuf,
+ uint32_t idx,
+ const VkClearValue *value,
+ VkImageAspectFlags aspects,
+ uint32_t base_layer,
+ uint32_t layer_count,
+ uint32_t rect_count,
+ D3D12_RECT *rects)
+{
+ if (idx == VK_ATTACHMENT_UNUSED)
+ return;
+
+ dzn_image_view *view = cmdbuf->state.framebuffer->attachments[idx];
+ dzn_image *image = container_of(view->vk.image, dzn_image, vk);
+
+ VkImageSubresourceRange range = {
+ .aspectMask = aspects,
+ .baseMipLevel = view->vk.base_mip_level,
+ .levelCount = 1,
+ .baseArrayLayer = view->vk.base_array_layer + base_layer,
+ .layerCount = layer_count,
+ };
+ bool all_layers =
+ base_layer == 0 &&
+ (layer_count == view->vk.layer_count ||
+ layer_count == VK_REMAINING_ARRAY_LAYERS);
+
+ if (vk_format_is_depth_or_stencil(view->vk.format)) {
+ D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
+
+ if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
+ flags |= D3D12_CLEAR_FLAG_DEPTH;
+ if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
+ flags |= D3D12_CLEAR_FLAG_STENCIL;
+
+ if (flags != 0) {
+ auto desc = dzn_image_get_dsv_desc(image, &range, 0);
+ auto handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
+ cmdbuf->cmdlist->ClearDepthStencilView(handle, flags,
+ value->depthStencil.depth,
+ value->depthStencil.stencil,
+ rect_count, rects);
+ }
+ } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
+ VkClearColorValue color = adjust_clear_color(view->vk.format, value->color);
+ bool clear_with_cpy = false;
+ float vals[4];
+
+ if (vk_format_is_sint(view->vk.format)) {
+ for (uint32_t i = 0; i < 4; i++) {
+ vals[i] = color.int32[i];
+ if (color.int32[i] != (int32_t)vals[i]) {
+ clear_with_cpy = true;
+ break;
+ }
+ }
+ } else if (vk_format_is_uint(view->vk.format)) {
+ for (uint32_t i = 0; i < 4; i++) {
+ vals[i] = color.uint32[i];
+ if (color.uint32[i] != (uint32_t)vals[i]) {
+ clear_with_cpy = true;
+ break;
+ }
+ }
+ } else {
+ for (uint32_t i = 0; i < 4; i++)
+ vals[i] = color.float32[i];
+ }
+
+ if (clear_with_cpy) {
+ dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image,
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ &value->color,
+ &range, rect_count, rects);
+ } else {
+ auto desc = dzn_image_get_rtv_desc(image, &range, 0);
+ auto handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
+ cmdbuf->cmdlist->ClearRenderTargetView(handle, vals, rect_count, rects);
+ }
+ }
+}
+
+static void
+dzn_cmd_buffer_clear_color(dzn_cmd_buffer *cmdbuf,
+ const dzn_image *image,
+ VkImageLayout layout,
+ const VkClearColorValue *col,
+ uint32_t range_count,
+ const VkImageSubresourceRange *ranges)
+{
+ if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {
+ dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
+ return;
+ }
+
+ VkClearColorValue color = adjust_clear_color(image->vk.format, *col);
+ float clear_vals[4];
+
+ enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
+
+ if (util_format_is_pure_sint(pfmt)) {
+ for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
+ clear_vals[c] = color.int32[c];
+ if (color.int32[c] != (int32_t)clear_vals[c]) {
+ dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
+ return;
+ }
+ }
+ } else if (util_format_is_pure_uint(pfmt)) {
+ for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
+ clear_vals[c] = color.uint32[c];
+ if (color.uint32[c] != (uint32_t)clear_vals[c]) {
+ dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
+ return;
+ }
+ }
+ } else {
+ memcpy(clear_vals, color.float32, sizeof(clear_vals));
+ }
+
+ for (uint32_t r = 0; r < range_count; r++) {
+ const VkImageSubresourceRange *range = &ranges[r];
+ uint32_t layer_count = dzn_get_layer_count(image, range);
+ uint32_t level_count = dzn_get_level_count(image, range);
+
+ for (uint32_t lvl = 0; lvl < level_count; lvl++) {
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = image->res,
+ .StateBefore = dzn_image_layout_to_state(layout),
+ .StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET,
+ },
+ };
+
+ if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) {
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ barrier.Transition.Subresource =
+ dzn_image_range_get_subresource_index(image, range,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ lvl, layer);
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+ }
+
+ VkImageSubresourceRange view_range = *range;
+
+ if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
+ view_range.baseArrayLayer = 0;
+ view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl);
+ }
+
+ auto desc = dzn_image_get_rtv_desc(image, &view_range, lvl);
+ auto handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
+ cmdbuf->cmdlist->ClearRenderTargetView(handle, clear_vals, 0, NULL);
+
+ if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) {
+ DZN_SWAP(barrier.Transition.StateBefore, barrier.Transition.StateAfter);
+
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ barrier.Transition.Subresource =
+ dzn_image_range_get_subresource_index(image, range, VK_IMAGE_ASPECT_COLOR_BIT, lvl, layer);
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+ }
+ }
+ }
+}
+
+static void
+dzn_cmd_buffer_clear_zs(dzn_cmd_buffer *cmdbuf,
+ const dzn_image *image,
+ VkImageLayout layout,
+ const VkClearDepthStencilValue *zs,
+ uint32_t range_count,
+ const VkImageSubresourceRange *ranges)
+{
+ assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
+
+ for (uint32_t r = 0; r < range_count; r++) {
+ const VkImageSubresourceRange *range = &ranges[r];
+ uint32_t layer_count = dzn_get_layer_count(image, range);
+ uint32_t level_count = dzn_get_level_count(image, range);
+
+ D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
+
+ if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+ flags |= D3D12_CLEAR_FLAG_DEPTH;
+ if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
+ flags |= D3D12_CLEAR_FLAG_STENCIL;
+
+ for (uint32_t lvl = 0; lvl < level_count; lvl++) {
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = image->res,
+ .StateBefore = dzn_image_layout_to_state(layout),
+ .StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE,
+ },
+ };
+
+ if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) {
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ dzn_foreach_aspect(aspect, range->aspectMask) {
+ barrier.Transition.Subresource =
+ dzn_image_range_get_subresource_index(image, range, aspect, lvl, layer);
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+ }
+ }
+
+ auto desc = dzn_image_get_dsv_desc(image, range, lvl);
+ auto handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
+ cmdbuf->cmdlist->ClearDepthStencilView(handle, flags,
+ zs->depth, zs->stencil,
+ 0, NULL);
+
+ if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) {
+ DZN_SWAP(barrier.Transition.StateBefore, barrier.Transition.StateAfter);
+
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ dzn_foreach_aspect(aspect, range->aspectMask) {
+ barrier.Transition.Subresource =
+ dzn_image_range_get_subresource_index(image, range, aspect, lvl, layer);
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+ }
+ }
+ }
+ }
+}
+
+static void
+dzn_cmd_buffer_copy_buf2img_region(dzn_cmd_buffer *cmdbuf,
+ const VkCopyBufferToImageInfo2 *info,
+ uint32_t r,
+ VkImageAspectFlagBits aspect,
+ uint32_t l)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
+ VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage);
+
+ ID3D12Device *dev = device->dev;
+ ID3D12GraphicsCommandList *cmdlist = cmdbuf->cmdlist;
+
+ const VkBufferImageCopy2 *region = &info->pRegions[r];
+ enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format);
+ uint32_t blkh = util_format_get_blockheight(pfmt);
+ uint32_t blkd = util_format_get_blockdepth(pfmt);
+
+ D3D12_TEXTURE_COPY_LOCATION dst_img_loc =
+ dzn_image_get_copy_loc(dst_image, &region->imageSubresource, aspect, l);
+ D3D12_TEXTURE_COPY_LOCATION src_buf_loc =
+ dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, region, aspect, l);
+
+ if (dzn_buffer_supports_region_copy(&src_buf_loc)) {
+ /* RowPitch and Offset are properly aligned, we can copy
+ * the whole thing in one call.
+ */
+ D3D12_BOX src_box = {
+ .left = 0,
+ .top = 0,
+ .front = 0,
+ .right = region->imageExtent.width,
+ .bottom = region->imageExtent.height,
+ .back = region->imageExtent.depth,
+ };
+
+ cmdlist->CopyTextureRegion(&dst_img_loc, region->imageOffset.x,
+ region->imageOffset.y, region->imageOffset.z,
+ &src_buf_loc, &src_box);
+ return;
+ }
+
+ /* Copy line-by-line if things are not properly aligned. */
+ D3D12_BOX src_box = {
+ .top = 0,
+ .front = 0,
+ .bottom = blkh,
+ .back = blkd,
+ };
+
+ for (uint32_t z = 0; z < region->imageExtent.depth; z += blkd) {
+ for (uint32_t y = 0; y < region->imageExtent.height; y += blkh) {
+ uint32_t src_x;
+
+ D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc =
+ dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format,
+ region, &src_buf_loc,
+ y, z, &src_x);
+
+ src_box.left = src_x;
+ src_box.right = src_x + region->imageExtent.width;
+ cmdlist->CopyTextureRegion(&dst_img_loc,
+ region->imageOffset.x,
+ region->imageOffset.y + y,
+ region->imageOffset.z + z,
+ &src_buf_line_loc, &src_box);
+ }
+ }
+}
+
+static void
+dzn_cmd_buffer_copy_img2buf_region(dzn_cmd_buffer *cmdbuf,
+ const VkCopyImageToBufferInfo2 *info,
+ uint32_t r,
+ VkImageAspectFlagBits aspect,
+ uint32_t l)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_image, src_image, info->srcImage);
+ VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
+
+ ID3D12Device *dev = device->dev;
+ ID3D12GraphicsCommandList *cmdlist = cmdbuf->cmdlist;
+
+ const VkBufferImageCopy2 *region = &info->pRegions[r];
+ enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format);
+ uint32_t blkh = util_format_get_blockheight(pfmt);
+ uint32_t blkd = util_format_get_blockdepth(pfmt);
+
+ D3D12_TEXTURE_COPY_LOCATION src_img_loc =
+ dzn_image_get_copy_loc(src_image, &region->imageSubresource, aspect, l);
+ D3D12_TEXTURE_COPY_LOCATION dst_buf_loc =
+ dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, region, aspect, l);
+
+ if (dzn_buffer_supports_region_copy(&dst_buf_loc)) {
+ /* RowPitch and Offset are properly aligned on 256 bytes, we can copy
+ * the whole thing in one call.
+ */
+ D3D12_BOX src_box = {
+ .left = (UINT)region->imageOffset.x,
+ .top = (UINT)region->imageOffset.y,
+ .front = (UINT)region->imageOffset.z,
+ .right = (UINT)(region->imageOffset.x + region->imageExtent.width),
+ .bottom = (UINT)(region->imageOffset.y + region->imageExtent.height),
+ .back = (UINT)(region->imageOffset.z + region->imageExtent.depth),
+ };
+
+ cmdlist->CopyTextureRegion(&dst_buf_loc, 0, 0, 0,
+ &src_img_loc, &src_box);
+ return;
+ }
+
+ D3D12_BOX src_box = {
+ .left = (UINT)region->imageOffset.x,
+ .right = (UINT)(region->imageOffset.x + region->imageExtent.width),
+ };
+
+ /* Copy line-by-line if things are not properly aligned. */
+ for (uint32_t z = 0; z < region->imageExtent.depth; z += blkd) {
+ src_box.front = region->imageOffset.z + z;
+ src_box.back = src_box.front + blkd;
+
+ for (uint32_t y = 0; y < region->imageExtent.height; y += blkh) {
+ uint32_t dst_x;
+
+ D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc =
+ dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format,
+ region, &dst_buf_loc,
+ y, z, &dst_x);
+
+ src_box.top = region->imageOffset.y + y;
+ src_box.bottom = src_box.top + blkh;
+
+ cmdlist->CopyTextureRegion(&dst_buf_line_loc, dst_x, 0, 0,
+ &src_img_loc, &src_box);
+ }
+ }
+}
+
+static void
+dzn_cmd_buffer_copy_img_chunk(dzn_cmd_buffer *cmdbuf,
+ const VkCopyImageInfo2 *info,
+ D3D12_RESOURCE_DESC &tmp_desc,
+ D3D12_TEXTURE_COPY_LOCATION &tmp_loc,
+ uint32_t r,
+ VkImageAspectFlagBits aspect,
+ uint32_t l)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_image, src, info->srcImage);
+ VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
+
+ ID3D12Device *dev = device->dev;
+ ID3D12GraphicsCommandList *cmdlist = cmdbuf->cmdlist;
+
+ const VkImageCopy2 *region = &info->pRegions[r];
+ const VkImageSubresourceLayers *src_subres = &region->srcSubresource;
+ const VkImageSubresourceLayers *dst_subres = &region->dstSubresource;
+ VkFormat src_format =
+ dzn_image_get_plane_format(src->vk.format, aspect);
+ VkFormat dst_format =
+ dzn_image_get_plane_format(dst->vk.format, aspect);
+
+ enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format);
+ uint32_t src_blkw = util_format_get_blockwidth(src_pfmt);
+ uint32_t src_blkh = util_format_get_blockheight(src_pfmt);
+ uint32_t src_blkd = util_format_get_blockdepth(src_pfmt);
+ enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format);
+ uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt);
+ uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt);
+ uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt);
+
+ assert(src_subres->layerCount == dst_subres->layerCount);
+ assert(src_subres->aspectMask == dst_subres->aspectMask);
+
+ auto dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, l);
+ auto src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, l);
+
+ D3D12_BOX src_box = {
+ .left = (UINT)MAX2(region->srcOffset.x, 0),
+ .top = (UINT)MAX2(region->srcOffset.y, 0),
+ .front = (UINT)MAX2(region->srcOffset.z, 0),
+ .right = (UINT)region->srcOffset.x + region->extent.width,
+ .bottom = (UINT)region->srcOffset.y + region->extent.height,
+ .back = (UINT)region->srcOffset.z + region->extent.depth,
+ };
+
+ if (!tmp_loc.pResource) {
+ cmdlist->CopyTextureRegion(&dst_loc, region->dstOffset.x,
+ region->dstOffset.y, region->dstOffset.z,
+ &src_loc, &src_box);
+ return;
+ }
+
+ tmp_desc.Format =
+ dzn_image_get_placed_footprint_format(src->vk.format, aspect);
+ tmp_desc.Width = region->extent.width;
+ tmp_desc.Height = region->extent.height;
+
+ dev->GetCopyableFootprints(&tmp_desc,
+ 0, 1, 0,
+ &tmp_loc.PlacedFootprint,
+ NULL, NULL, NULL);
+
+ tmp_loc.PlacedFootprint.Footprint.Depth = region->extent.depth;
+
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = tmp_loc.pResource,
+ .Subresource = 0,
+ .StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE,
+ .StateAfter = D3D12_RESOURCE_STATE_COPY_DEST,
+ },
+ };
+
+ if (r > 0 || l > 0)
+ cmdlist->ResourceBarrier(1, &barrier);
+
+ cmdlist->CopyTextureRegion(&tmp_loc, 0, 0, 0, &src_loc, &src_box);
+
+ DZN_SWAP(barrier.Transition.StateBefore, barrier.Transition.StateAfter);
+ cmdlist->ResourceBarrier(1, &barrier);
+
+ tmp_desc.Format =
+ dzn_image_get_placed_footprint_format(dst->vk.format, aspect);
+ if (src_blkw != dst_blkw)
+ tmp_desc.Width = DIV_ROUND_UP(region->extent.width, src_blkw) * dst_blkw;
+ if (src_blkh != dst_blkh)
+ tmp_desc.Height = DIV_ROUND_UP(region->extent.height, src_blkh) * dst_blkh;
+
+ device->dev->GetCopyableFootprints(&tmp_desc,
+ 0, 1, 0,
+ &tmp_loc.PlacedFootprint,
+ NULL, NULL, NULL);
+
+ if (src_blkd != dst_blkd) {
+ tmp_loc.PlacedFootprint.Footprint.Depth =
+ DIV_ROUND_UP(region->extent.depth, src_blkd) * dst_blkd;
+ } else {
+ tmp_loc.PlacedFootprint.Footprint.Depth = region->extent.depth;
+ }
+
+ D3D12_BOX tmp_box = {
+ .left = 0,
+ .top = 0,
+ .front = 0,
+ .right = tmp_loc.PlacedFootprint.Footprint.Width,
+ .bottom = tmp_loc.PlacedFootprint.Footprint.Height,
+ .back = tmp_loc.PlacedFootprint.Footprint.Depth,
+ };
+
+ cmdlist->CopyTextureRegion(&dst_loc,
+ region->dstOffset.x,
+ region->dstOffset.y,
+ region->dstOffset.z,
+ &tmp_loc, &tmp_box);
+}
+
+static void
+dzn_cmd_buffer_blit_prepare_src_view(dzn_cmd_buffer *cmdbuf,
+ VkImage image,
+ VkImageAspectFlagBits aspect,
+ const VkImageSubresourceLayers *subres,
+ dzn_descriptor_heap *heap,
+ uint32_t heap_slot)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_image, img, image);
+ VkImageViewCreateInfo iview_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = image,
+ .format = img->vk.format,
+ .subresourceRange = {
+ .aspectMask = (VkImageAspectFlags)aspect,
+ .baseMipLevel = subres->mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = subres->baseArrayLayer,
+ .layerCount = subres->layerCount,
+ },
+ };
+
+ if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
+ iview_info.components.r = VK_COMPONENT_SWIZZLE_G;
+ iview_info.components.g = VK_COMPONENT_SWIZZLE_G;
+ iview_info.components.b = VK_COMPONENT_SWIZZLE_G;
+ iview_info.components.a = VK_COMPONENT_SWIZZLE_G;
+ } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
+ iview_info.components.r = VK_COMPONENT_SWIZZLE_R;
+ iview_info.components.g = VK_COMPONENT_SWIZZLE_R;
+ iview_info.components.b = VK_COMPONENT_SWIZZLE_R;
+ iview_info.components.a = VK_COMPONENT_SWIZZLE_R;
+ }
+
+ switch (img->vk.image_type) {
+ case VK_IMAGE_TYPE_1D:
+ iview_info.viewType = img->vk.array_layers > 1 ?
+ VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ iview_info.viewType = img->vk.array_layers > 1 ?
+ VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
+ break;
+ case VK_IMAGE_TYPE_3D:
+ iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
+ break;
+ default:
+ unreachable("Invalid type");
+ }
+
+ dzn_image_view iview;
+ dzn_image_view_init(device, &iview, &iview_info);
+ dzn_descriptor_heap_write_image_view_desc(heap, heap_slot, false, false, &iview);
+ dzn_image_view_finish(&iview);
+
+ D3D12_GPU_DESCRIPTOR_HANDLE handle =
+ dzn_descriptor_heap_get_gpu_handle(heap, heap_slot);
+ cmdbuf->cmdlist->SetGraphicsRootDescriptorTable(0, handle);
+}
+
+static void
+dzn_cmd_buffer_blit_prepare_dst_view(dzn_cmd_buffer *cmdbuf,
+ dzn_image *img,
+ VkImageAspectFlagBits aspect,
+ uint32_t level, uint32_t layer)
+{
+ bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
+ VkImageSubresourceRange range = {
+ .aspectMask = (VkImageAspectFlags)aspect,
+ .baseMipLevel = level,
+ .levelCount = 1,
+ .baseArrayLayer = layer,
+ .layerCount = 1,
+ };
+
+ if (ds) {
+ auto desc = dzn_image_get_dsv_desc(img, &range, 0);
+ auto handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc);
+ cmdbuf->cmdlist->OMSetRenderTargets(0, NULL, TRUE, &handle);
+ } else {
+ auto desc = dzn_image_get_rtv_desc(img, &range, 0);
+ auto handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc);
+ cmdbuf->cmdlist->OMSetRenderTargets(1, &handle, FALSE, NULL);
+ }
+}
+
+static void
+dzn_cmd_buffer_blit_set_pipeline(dzn_cmd_buffer *cmdbuf,
+ const dzn_image *src,
+ const dzn_image *dst,
+ VkImageAspectFlagBits aspect,
+ VkFilter filter, bool resolve)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format);
+ VkImageUsageFlags usage =
+ vk_format_is_depth_or_stencil(dst->vk.format) ?
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT :
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ struct dzn_meta_blit_key ctx_key = {
+ .out_format = dzn_image_get_dxgi_format(dst->vk.format, usage, aspect),
+ .samples = (uint32_t)src->vk.samples,
+ .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ?
+ FRAG_RESULT_DEPTH :
+ aspect == VK_IMAGE_ASPECT_STENCIL_BIT ?
+ FRAG_RESULT_STENCIL :
+ FRAG_RESULT_DATA0),
+ .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT :
+ util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT :
+ aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT :
+ GLSL_TYPE_FLOAT),
+ .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D :
+ src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D :
+ src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS :
+ GLSL_SAMPLER_DIM_3D),
+ .src_is_array = src->vk.array_layers > 1,
+ .resolve = resolve,
+ .linear_filter = filter == VK_FILTER_LINEAR,
+ .padding = 0,
+ };
+
+ const dzn_meta_blit *ctx =
+ dzn_meta_blits_get_context(device, &ctx_key);
+ assert(ctx);
+
+ cmdbuf->cmdlist->SetGraphicsRootSignature(ctx->root_sig);
+ cmdbuf->cmdlist->SetPipelineState(ctx->pipeline_state);
+}
+
+static void
+dzn_cmd_buffer_blit_set_2d_region(dzn_cmd_buffer *cmdbuf,
+ const dzn_image *src,
+ const VkImageSubresourceLayers *src_subres,
+ const VkOffset3D *src_offsets,
+ const dzn_image *dst,
+ const VkImageSubresourceLayers *dst_subres,
+ const VkOffset3D *dst_offsets,
+ bool normalize_src_coords)
+{
+ uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel);
+ uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel);
+ uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel);
+ uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel);
+
+ float dst_pos[4] = {
+ (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f),
+ (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f),
+ };
+
+ float src_pos[4] = {
+ (float)src_offsets[0].x, (float)src_offsets[0].y,
+ (float)src_offsets[1].x, (float)src_offsets[1].y,
+ };
+
+ if (normalize_src_coords) {
+ src_pos[0] /= src_w;
+ src_pos[1] /= src_h;
+ src_pos[2] /= src_w;
+ src_pos[3] /= src_h;
+ }
+
+ float coords[] = {
+ dst_pos[0], dst_pos[1], src_pos[0], src_pos[1],
+ dst_pos[2], dst_pos[1], src_pos[2], src_pos[1],
+ dst_pos[0], dst_pos[3], src_pos[0], src_pos[3],
+ dst_pos[2], dst_pos[3], src_pos[2], src_pos[3],
+ };
+
+ cmdbuf->cmdlist->SetGraphicsRoot32BitConstants(1, ARRAY_SIZE(coords), coords, 0);
+
+ D3D12_VIEWPORT vp = {
+ .TopLeftX = 0,
+ .TopLeftY = 0,
+ .Width = (float)dst_w,
+ .Height = (float)dst_h,
+ .MinDepth = 0,
+ .MaxDepth = 1,
+ };
+ cmdbuf->cmdlist->RSSetViewports(1, &vp);
+
+ D3D12_RECT scissor = {
+ .left = MIN2(dst_offsets[0].x, dst_offsets[1].x),
+ .top = MIN2(dst_offsets[0].y, dst_offsets[1].y),
+ .right = MAX2(dst_offsets[0].x, dst_offsets[1].x),
+ .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y),
+ };
+ cmdbuf->cmdlist->RSSetScissorRects(1, &scissor);
+}
+
+static void
+dzn_cmd_buffer_blit_issue_barriers(dzn_cmd_buffer *cmdbuf,
+ dzn_image *src, VkImageLayout src_layout,
+ const VkImageSubresourceLayers *src_subres,
+ dzn_image *dst, VkImageLayout dst_layout,
+ const VkImageSubresourceLayers *dst_subres,
+ VkImageAspectFlagBits aspect,
+ bool post)
+{
+ bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
+ D3D12_RESOURCE_BARRIER barriers[2] = {
+ {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = src->res,
+ .StateBefore = dzn_image_layout_to_state(src_layout),
+ .StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
+ },
+ },
+ {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = dst->res,
+ .StateBefore = dzn_image_layout_to_state(dst_layout),
+ .StateAfter = ds ?
+ D3D12_RESOURCE_STATE_DEPTH_WRITE :
+ D3D12_RESOURCE_STATE_RENDER_TARGET,
+ },
+ },
+ };
+
+ if (post) {
+ DZN_SWAP(barriers[0].Transition.StateBefore, barriers[0].Transition.StateAfter);
+ DZN_SWAP(barriers[1].Transition.StateBefore, barriers[1].Transition.StateAfter);
+ }
+
+ uint32_t layer_count = dzn_get_layer_count(src, src_subres);
+ uint32_t src_level = src_subres->mipLevel;
+ uint32_t dst_level = dst_subres->mipLevel;
+
+ assert(dzn_get_layer_count(dst, dst_subres) == layer_count);
+ assert(src_level < src->vk.mip_levels);
+ assert(dst_level < dst->vk.mip_levels);
+
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ barriers[0].Transition.Subresource =
+ dzn_image_layers_get_subresource_index(src, src_subres, aspect, layer);
+ barriers[1].Transition.Subresource =
+ dzn_image_layers_get_subresource_index(dst, dst_subres, aspect, layer);
+ cmdbuf->cmdlist->ResourceBarrier(ARRAY_SIZE(barriers), barriers);
+ }
+}
+
+static void
+dzn_cmd_buffer_blit_region(dzn_cmd_buffer *cmdbuf,
+ const VkBlitImageInfo2 *info,
+ dzn_descriptor_heap *heap,
+ uint32_t *heap_slot,
+ uint32_t r)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_image, src, info->srcImage);
+ VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
+
+ ID3D12Device *dev = device->dev;
+ const VkImageBlit2 *region = &info->pRegions[r];
+
+ dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
+ dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, false);
+ dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
+ src, info->srcImageLayout, &region->srcSubresource,
+ dst, info->dstImageLayout, &region->dstSubresource,
+ aspect, false);
+ dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage,
+ aspect, &region->srcSubresource,
+ heap, (*heap_slot)++);
+ dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
+ src, &region->srcSubresource, region->srcOffsets,
+ dst, &region->dstSubresource, region->dstOffsets,
+ src->vk.samples == 1);
+
+ uint32_t dst_depth =
+ region->dstOffsets[1].z > region->dstOffsets[0].z ?
+ region->dstOffsets[1].z - region->dstOffsets[0].z :
+ region->dstOffsets[0].z - region->dstOffsets[1].z;
+ uint32_t src_depth =
+ region->srcOffsets[1].z > region->srcOffsets[0].z ?
+ region->srcOffsets[1].z - region->srcOffsets[0].z :
+ region->srcOffsets[0].z - region->srcOffsets[1].z;
+
+ uint32_t layer_count = dzn_get_layer_count(src, &region->srcSubresource);
+ uint32_t dst_level = region->dstSubresource.mipLevel;
+
+ float src_slice_step = layer_count > 1 ? 1 : (float)src_depth / dst_depth;
+ if (region->srcOffsets[0].z > region->srcOffsets[1].z)
+ src_slice_step = -src_slice_step;
+ float src_z_coord = layer_count > 1 ?
+ 0 : (float)region->srcOffsets[0].z + (src_slice_step * 0.5f);
+ uint32_t slice_count = layer_count > 1 ? layer_count : dst_depth;
+ uint32_t dst_z_coord = layer_count > 1 ?
+ region->dstSubresource.baseArrayLayer :
+ region->dstOffsets[0].z;
+ if (region->dstOffsets[0].z > region->dstOffsets[1].z)
+ dst_z_coord--;
+
+ uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ?
+ 1 : -1;
+
+ /* Normalize the src coordinates/step */
+ if (layer_count == 1 && src->vk.samples == 1) {
+ src_z_coord /= src->vk.extent.depth;
+ src_slice_step /= src->vk.extent.depth;
+ }
+
+ for (uint32_t slice = 0; slice < slice_count; slice++) {
+ dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord);
+ cmdbuf->cmdlist->SetGraphicsRoot32BitConstants(1, 1, &src_z_coord, 16);
+ cmdbuf->cmdlist->DrawInstanced(4, 1, 0, 0);
+ src_z_coord += src_slice_step;
+ dst_z_coord += dst_slice_step;
+ }
+
+ dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
+ src, info->srcImageLayout, &region->srcSubresource,
+ dst, info->dstImageLayout, &region->dstSubresource,
+ aspect, true);
+ }
+}
+
+static void
+dzn_cmd_buffer_resolve_region(dzn_cmd_buffer *cmdbuf,
+ const VkResolveImageInfo2 *info,
+ dzn_descriptor_heap *heap,
+ uint32_t *heap_slot,
+ uint32_t r)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_image, src, info->srcImage);
+ VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
+
+ ID3D12Device *dev = device->dev;
+ const VkImageResolve2 *region = &info->pRegions[r];
+
+ dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
+ dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, true);
+ dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
+ src, info->srcImageLayout, &region->srcSubresource,
+ dst, info->dstImageLayout, &region->dstSubresource,
+ aspect, false);
+ dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect,
+ &region->srcSubresource,
+ heap, (*heap_slot)++);
+
+ VkOffset3D src_offset[2] = {
+ {
+ .x = region->srcOffset.x,
+ .y = region->srcOffset.y,
+ },
+ {
+ .x = (int32_t)(region->srcOffset.x + region->extent.width),
+ .y = (int32_t)(region->srcOffset.y + region->extent.height),
+ },
+ };
+ VkOffset3D dst_offset[2] = {
+ {
+ .x = region->dstOffset.x,
+ .y = region->dstOffset.y,
+ },
+ {
+ .x = (int32_t)(region->dstOffset.x + region->extent.width),
+ .y = (int32_t)(region->dstOffset.y + region->extent.height),
+ },
+ };
+
+ dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
+ src, &region->srcSubresource, src_offset,
+ dst, &region->dstSubresource, dst_offset,
+ false);
+
+ uint32_t layer_count = dzn_get_layer_count(src, &region->srcSubresource);
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ float src_z_coord = layer;
+
+ dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf,
+ dst, aspect, region->dstSubresource.mipLevel,
+ region->dstSubresource.baseArrayLayer + layer);
+ cmdbuf->cmdlist->SetGraphicsRoot32BitConstants(1, 1, &src_z_coord, 16);
+ cmdbuf->cmdlist->DrawInstanced(4, 1, 0, 0);
+ }
+
+ dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
+ src, info->srcImageLayout, &region->srcSubresource,
+ dst, info->dstImageLayout, &region->dstSubresource,
+ aspect, true);
+ }
+}
+
+static void
+dzn_cmd_buffer_clear_attachments(dzn_cmd_buffer *cmdbuf,
+ uint32_t attachment_count,
+ const VkClearAttachment *attachments,
+ uint32_t rect_count,
+ const VkClearRect *rects)
+{
+ struct dzn_render_pass *pass = cmdbuf->state.pass;
+ const struct dzn_subpass *subpass = &pass->subpasses[cmdbuf->state.subpass];
+
+ for (unsigned i = 0; i < attachment_count; i++) {
+ uint32_t idx;
+ if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
+ idx = subpass->colors[attachments[i].colorAttachment].idx;
+ else
+ idx = subpass->zs.idx;
+
+ for (uint32_t j = 0; j < rect_count; j++) {
+ D3D12_RECT rect;
+
+ dzn_translate_rect(&rect, &rects[j].rect);
+ dzn_cmd_buffer_clear_attachment(cmdbuf,
+ idx, &attachments[i].clearValue,
+ attachments[i].aspectMask,
+ rects[j].baseArrayLayer,
+ rects[j].layerCount,
+ 1, &rect);
+ }
+ }
+}
+
+static void
+dzn_cmd_buffer_attachment_ref_transition(dzn_cmd_buffer *cmdbuf,
+ const dzn_attachment_ref *att)
+{
+ const dzn_image_view *iview = cmdbuf->state.framebuffer->attachments[att->idx];
+ const dzn_image *image = container_of(iview->vk.image, dzn_image, vk);
+
+ if (att->before == att->during)
+ return;
+
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = image->res,
+ .Subresource = 0, // YOLO
+ .StateBefore = att->before,
+ .StateAfter = att->during,
+ },
+ };
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+}
+
+void
+dzn_cmd_buffer_attachment_transition(dzn_cmd_buffer *cmdbuf,
+ const dzn_attachment *att)
+{
+ const dzn_image_view *iview = cmdbuf->state.framebuffer->attachments[att->idx];
+ const dzn_image *image = container_of(iview->vk.image, dzn_image, vk);
+
+ if (att->last == att->after)
+ return;
+
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = image->res,
+ .Subresource = 0, // YOLO
+ .StateBefore = att->last,
+ .StateAfter = att->after,
+ },
+ };
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+}
+
+static void
+dzn_cmd_buffer_resolve_attachment(dzn_cmd_buffer *cmdbuf, uint32_t i)
+{
+ const struct dzn_subpass *subpass =
+ &cmdbuf->state.pass->subpasses[cmdbuf->state.subpass];
+
+ if (subpass->resolve[i].idx == VK_ATTACHMENT_UNUSED)
+ return;
+
+ const dzn_framebuffer *framebuffer = cmdbuf->state.framebuffer;
+ struct dzn_image_view *src = framebuffer->attachments[subpass->colors[i].idx];
+ struct dzn_image *src_img = container_of(src->vk.image, dzn_image, vk);
+ struct dzn_image_view *dst = framebuffer->attachments[subpass->resolve[i].idx];
+ struct dzn_image *dst_img = container_of(dst->vk.image, dzn_image, vk);
+ D3D12_RESOURCE_BARRIER barriers[2];
+ uint32_t barrier_count = 0;
+
+ /* TODO: 2DArrays/3D */
+ if (subpass->colors[i].during != D3D12_RESOURCE_STATE_RESOLVE_SOURCE) {
+ barriers[barrier_count++] = D3D12_RESOURCE_BARRIER {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = src_img->res,
+ .Subresource = 0,
+ .StateBefore = subpass->colors[i].during,
+ .StateAfter = D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
+ },
+ };
+ }
+
+ if (subpass->resolve[i].during != D3D12_RESOURCE_STATE_RESOLVE_DEST) {
+ barriers[barrier_count++] = D3D12_RESOURCE_BARRIER {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = dst_img->res,
+ .Subresource = 0,
+ .StateBefore = subpass->resolve[i].during,
+ .StateAfter = D3D12_RESOURCE_STATE_RESOLVE_DEST,
+ },
+ };
+ }
+
+ if (barrier_count)
+ cmdbuf->cmdlist->ResourceBarrier(barrier_count, barriers);
+
+ cmdbuf->cmdlist->ResolveSubresource(dst_img->res, 0,
+ src_img->res, 0,
+ dst->srv_desc.Format);
+
+ for (uint32_t b = 0; b < barrier_count; b++)
+ DZN_SWAP(barriers[b].Transition.StateBefore, barriers[b].Transition.StateAfter);
+
+ if (barrier_count)
+ cmdbuf->cmdlist->ResourceBarrier(barrier_count, barriers);
+}
+
+static void
+dzn_cmd_buffer_begin_subpass(dzn_cmd_buffer *cmdbuf)
+{
+ struct dzn_framebuffer *framebuffer = cmdbuf->state.framebuffer;
+ struct dzn_render_pass *pass = cmdbuf->state.pass;
+ const struct dzn_subpass *subpass = &pass->subpasses[cmdbuf->state.subpass];
+
+ D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { };
+ D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 };
+
+ for (uint32_t i = 0; i < subpass->color_count; i++) {
+ if (subpass->colors[i].idx == VK_ATTACHMENT_UNUSED) continue;
+
+ dzn_image_view *iview = framebuffer->attachments[subpass->colors[i].idx];
+ dzn_image *img = container_of(iview->vk.image, dzn_image, vk);
+
+ rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc);
+ }
+
+ if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) {
+ dzn_image_view *iview = framebuffer->attachments[subpass->zs.idx];
+ dzn_image *img = container_of(iview->vk.image, dzn_image, vk);
+
+ zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc);
+ }
+
+ cmdbuf->cmdlist->OMSetRenderTargets(subpass->color_count,
+ subpass->color_count ? rt_handles : NULL,
+ FALSE, zs_handle.ptr ? &zs_handle : NULL);
+
+ for (uint32_t i = 0; i < subpass->color_count; i++)
+ dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->colors[i]);
+ for (uint32_t i = 0; i < subpass->input_count; i++)
+ dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->inputs[i]);
+
+ if (subpass->zs.idx != VK_ATTACHMENT_UNUSED)
+ dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->zs);
+}
+
+static void
+dzn_cmd_buffer_end_subpass(dzn_cmd_buffer *cmdbuf)
+{
+ const dzn_subpass *subpass = &cmdbuf->state.pass->subpasses[cmdbuf->state.subpass];
+
+ for (uint32_t i = 0; i < subpass->color_count; i++)
+ dzn_cmd_buffer_resolve_attachment(cmdbuf, i);
+}
+
+static void
+dzn_cmd_buffer_update_pipeline(dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
+{
+ const dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
+
+ if (!pipeline)
+ return;
+
+ if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) {
+ if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
+ const dzn_graphics_pipeline *gfx =
+ reinterpret_cast<const dzn_graphics_pipeline *>(pipeline);
+ cmdbuf->cmdlist->SetGraphicsRootSignature(pipeline->root.sig);
+ cmdbuf->cmdlist->IASetPrimitiveTopology(gfx->ia.topology);
+ } else {
+ cmdbuf->cmdlist->SetComputeRootSignature(pipeline->root.sig);
+ }
+ }
+
+ if (cmdbuf->state.pipeline != pipeline) {
+ cmdbuf->cmdlist->SetPipelineState(pipeline->state);
+ cmdbuf->state.pipeline = pipeline;
+ }
+}
+
+static void
+dzn_cmd_buffer_update_heaps(dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ struct dzn_descriptor_state *desc_state =
+ &cmdbuf->state.bindpoint[bindpoint].desc_state;
+ dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = {
+ desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV],
+ desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]
+ };
+ uint32_t new_heap_offsets[NUM_POOL_TYPES] = {};
+ bool update_root_desc_table[NUM_POOL_TYPES] = {};
+ const struct dzn_pipeline *pipeline =
+ cmdbuf->state.bindpoint[bindpoint].pipeline;
+
+ if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_HEAPS))
+ goto set_heaps;
+
+ dzn_foreach_pool_type (type) {
+ uint32_t desc_count = pipeline->desc_count[type];
+ if (!desc_count)
+ continue;
+
+ dzn_descriptor_heap_pool *pool =
+ type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ?
+ &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool;
+ uint32_t dst_offset = 0;
+ dzn_descriptor_heap *dst_heap = NULL;
+ uint32_t dst_heap_offset = 0;
+
+ dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count,
+ &dst_heap, &dst_heap_offset);
+ new_heap_offsets[type] = dst_heap_offset;
+ update_root_desc_table[type] = true;
+
+ for (uint32_t s = 0; s < MAX_SETS; s++) {
+ const struct dzn_descriptor_set *set = desc_state->sets[s].set;
+ if (!set) continue;
+
+ uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type];
+ uint32_t set_desc_count = pipeline->sets[s].range_desc_count[type];
+ if (set_desc_count) {
+ mtx_lock(&set->pool->defragment_lock);
+ dzn_descriptor_heap_copy(dst_heap, dst_heap_offset + set_heap_offset,
+ &set->pool->heaps[type], set->heap_offsets[type],
+ set_desc_count);
+ mtx_unlock(&set->pool->defragment_lock);
+ }
+
+ if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) {
+ uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
+ for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
+ uint32_t desc_heap_offset =
+ pipeline->sets[s].dynamic_buffer_heap_offsets[o].srv;
+ dzn_buffer_desc bdesc = set->dynamic_buffers[o];
+ bdesc.offset += desc_state->sets[s].dynamic_offsets[o];
+
+ dzn_descriptor_heap_write_buffer_desc(dst_heap,
+ dst_heap_offset + set_heap_offset + desc_heap_offset,
+ false, &bdesc);
+
+ if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav != ~0) {
+ desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav;
+ dzn_descriptor_heap_write_buffer_desc(dst_heap,
+ dst_heap_offset + set_heap_offset + desc_heap_offset,
+ true, &bdesc);
+ }
+ }
+ }
+ }
+
+ new_heaps[type] = dst_heap;
+ }
+
+set_heaps:
+ if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
+ new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) {
+ ID3D12DescriptorHeap *desc_heaps[2];
+ uint32_t num_desc_heaps = 0;
+ if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV])
+ desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap;
+ if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])
+ desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap;
+ cmdbuf->cmdlist->SetDescriptorHeaps(num_desc_heaps, desc_heaps);
+
+ for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++)
+ cmdbuf->state.heaps[h] = new_heaps[h];
+ }
+
+ for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) {
+ D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r];
+
+ if (!update_root_desc_table[type])
+ continue;
+
+ D3D12_GPU_DESCRIPTOR_HANDLE handle =
+ dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]);
+
+ if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
+ cmdbuf->cmdlist->SetGraphicsRootDescriptorTable(r, handle);
+ else
+ cmdbuf->cmdlist->SetComputeRootDescriptorTable(r, handle);
+ }
+}
+
+static void
+dzn_cmd_buffer_update_sysvals(dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
+{
+ if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS))
+ return;
+
+ const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
+ uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx;
+
+ if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
+ cmdbuf->cmdlist->SetGraphicsRoot32BitConstants(sysval_cbv_param_idx,
+ sizeof(cmdbuf->state.sysvals.gfx) / 4,
+ &cmdbuf->state.sysvals.gfx, 0);
+ } else {
+ cmdbuf->cmdlist->SetComputeRoot32BitConstants(sysval_cbv_param_idx,
+ sizeof(cmdbuf->state.sysvals.compute) / 4,
+ &cmdbuf->state.sysvals.compute, 0);
+ }
+}
+
+static void
+dzn_cmd_buffer_update_viewports(dzn_cmd_buffer *cmdbuf)
+{
+ const dzn_graphics_pipeline *pipeline =
+ reinterpret_cast<const dzn_graphics_pipeline *>(cmdbuf->state.pipeline);
+
+ if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) ||
+ !pipeline->vp.count)
+ return;
+
+ cmdbuf->cmdlist->RSSetViewports(pipeline->vp.count, cmdbuf->state.viewports);
+}
+
+static void
+dzn_cmd_buffer_update_scissors(dzn_cmd_buffer *cmdbuf)
+{
+ const dzn_graphics_pipeline *pipeline =
+ reinterpret_cast<const dzn_graphics_pipeline *>(cmdbuf->state.pipeline);
+
+ if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS))
+ return;
+
+ if (!pipeline->scissor.count) {
+ /* Apply a scissor delimiting the render area. */
+ cmdbuf->cmdlist->RSSetScissorRects(1, &cmdbuf->state.render_area);
+ return;
+ }
+
+ D3D12_RECT scissors[MAX_SCISSOR];
+ uint32_t scissor_count = pipeline->scissor.count;
+
+ memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count);
+ for (uint32_t i = 0; i < pipeline->scissor.count; i++) {
+ scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render_area.left);
+ scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render_area.top);
+ scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render_area.right);
+ scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render_area.bottom);
+ }
+
+ cmdbuf->cmdlist->RSSetScissorRects(pipeline->scissor.count, scissors);
+}
+
+static void
+dzn_cmd_buffer_update_vbviews(dzn_cmd_buffer *cmdbuf)
+{
+ const dzn_graphics_pipeline *pipeline =
+ reinterpret_cast<const dzn_graphics_pipeline *>(cmdbuf->state.pipeline);
+ unsigned start, end;
+
+ BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS)
+ cmdbuf->cmdlist->IASetVertexBuffers(start, end - start, cmdbuf->state.vb.views);
+
+ BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS);
+}
+
+static void
+dzn_cmd_buffer_update_ibview(dzn_cmd_buffer *cmdbuf)
+{
+ if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB))
+ return;
+
+ cmdbuf->cmdlist->IASetIndexBuffer(&cmdbuf->state.ib.view);
+}
+
+static void
+dzn_cmd_buffer_update_push_constants(dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
+{
+ struct dzn_cmd_buffer_push_constant_state *state =
+ bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ?
+ &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute;
+
+ uint32_t offset = state->offset / 4;
+ uint32_t end = ALIGN(state->end, 4) / 4;
+ uint32_t count = end - offset;
+
+ if (!count)
+ return;
+
+ uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx;
+ uint32_t *vals = state->values + offset;
+
+ if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
+ cmdbuf->cmdlist->SetGraphicsRoot32BitConstants(slot, count, vals, offset);
+ else
+ cmdbuf->cmdlist->SetComputeRoot32BitConstants(slot, count, vals, offset);
+
+ state->offset = 0;
+ state->end = 0;
+}
+
+void
+dzn_cmd_buffer_update_zsa(dzn_cmd_buffer *cmdbuf)
+{
+ if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) {
+ const dzn_graphics_pipeline *gfx = (const dzn_graphics_pipeline *)
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
+ uint32_t ref =
+ gfx->zsa.stencil_test.front.uses_ref ?
+ cmdbuf->state.zsa.stencil_test.front.ref :
+ cmdbuf->state.zsa.stencil_test.back.ref;
+ cmdbuf->cmdlist->OMSetStencilRef(ref);
+ }
+}
+
+static VkResult
+dzn_cmd_buffer_triangle_fan_create_index(dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4;
+ uint32_t triangle_count = MAX2(*vertex_count, 2) - 2;
+
+ *vertex_count = triangle_count * 3;
+ if (!*vertex_count)
+ return VK_SUCCESS;
+
+ ID3D12Resource *index_buf;
+ VkResult result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size,
+ D3D12_HEAP_TYPE_UPLOAD,
+ D3D12_RESOURCE_STATE_GENERIC_READ,
+ &index_buf);
+ if (result != VK_SUCCESS)
+ return result;
+
+ void *cpu_ptr;
+ index_buf->Map(0, NULL, &cpu_ptr);
+
+ /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
+ if (index_size == 2) {
+ uint16_t *indices = (uint16_t *)cpu_ptr;
+ for (uint32_t t = 0; t < triangle_count; t++) {
+ indices[t * 3] = t + 1;
+ indices[(t * 3) + 1] = t + 2;
+ indices[(t * 3) + 2] = 0;
+ }
+ cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
+ } else {
+ uint32_t *indices = (uint32_t *)cpu_ptr;
+ for (uint32_t t = 0; t < triangle_count; t++) {
+ indices[t * 3] = t + 1;
+ indices[(t * 3) + 1] = t + 2;
+ indices[(t * 3) + 2] = 0;
+ }
+ cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
+ }
+
+ cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size;
+ cmdbuf->state.ib.view.BufferLocation = index_buf->GetGPUVirtualAddress();
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_cmd_buffer_triangle_fan_rewrite_index(dzn_cmd_buffer *cmdbuf,
+ uint32_t *index_count,
+ uint32_t *first_index)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ uint32_t triangle_count = MAX2(*index_count, 2) - 2;
+
+ *index_count = triangle_count * 3;
+ if (!*index_count)
+ return VK_SUCCESS;
+
+ /* New index is always 32bit to make the compute shader rewriting the
+ * index simpler */
+ ID3D12Resource *new_index_buf;
+ VkResult result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4,
+ D3D12_HEAP_TYPE_DEFAULT,
+ D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+ &new_index_buf);
+ if (result != VK_SUCCESS)
+ return result;
+
+ D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu =
+ cmdbuf->state.ib.view.BufferLocation;
+
+ enum dzn_index_type index_type =
+ dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format);
+ const dzn_meta_triangle_fan_rewrite_index *rewrite_index =
+ &device->triangle_fan[index_type];
+
+ const dzn_pipeline *compute_pipeline =
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline;
+
+ struct dzn_triangle_fan_rewrite_index_params params = {
+ .first_index = *first_index,
+ };
+
+ cmdbuf->cmdlist->SetComputeRootSignature(rewrite_index->root_sig);
+ cmdbuf->cmdlist->SetPipelineState(rewrite_index->pipeline_state);
+ cmdbuf->cmdlist->SetComputeRootUnorderedAccessView(0, new_index_buf->GetGPUVirtualAddress());
+ cmdbuf->cmdlist->SetComputeRoot32BitConstants(1, sizeof(params) / 4,
+ &params, 0);
+ cmdbuf->cmdlist->SetComputeRootShaderResourceView(2, old_index_buf_gpu);
+ cmdbuf->cmdlist->Dispatch(triangle_count, 1, 1);
+
+ D3D12_RESOURCE_BARRIER post_barriers[] = {
+ {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ /* Transition the exec buffer to indirect arg so it can be
+ * pass to ExecuteIndirect() as an argument buffer.
+ */
+ .Transition = {
+ .pResource = new_index_buf,
+ .Subresource = 0,
+ .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+ .StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER,
+ },
+ },
+ };
+
+ cmdbuf->cmdlist->ResourceBarrier(ARRAY_SIZE(post_barriers), post_barriers);
+
+ /* We don't mess up with the driver state when executing our internal
+ * compute shader, but we still change the D3D12 state, so let's mark
+ * things dirty if needed.
+ */
+ cmdbuf->state.pipeline = NULL;
+ if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
+ DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
+ }
+
+ cmdbuf->state.ib.view.SizeInBytes = *index_count * 4;
+ cmdbuf->state.ib.view.BufferLocation = new_index_buf->GetGPUVirtualAddress();
+ cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
+ *first_index = 0;
+ return VK_SUCCESS;
+}
+
+static void
+dzn_cmd_buffer_prepare_draw(dzn_cmd_buffer *cmdbuf, bool indexed)
+{
+ dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ dzn_cmd_buffer_update_viewports(cmdbuf);
+ dzn_cmd_buffer_update_scissors(cmdbuf);
+ dzn_cmd_buffer_update_vbviews(cmdbuf);
+ dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ dzn_cmd_buffer_update_zsa(cmdbuf);
+
+ if (indexed)
+ dzn_cmd_buffer_update_ibview(cmdbuf);
+
+ /* Reset the dirty states */
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty = 0;
+ cmdbuf->state.dirty = 0;
+}
+
+static uint32_t
+dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(dzn_cmd_buffer *cmdbuf, bool indexed)
+{
+ dzn_graphics_pipeline *pipeline = (dzn_graphics_pipeline *)
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
+
+ if (!pipeline->ia.triangle_fan)
+ return 0;
+
+ uint32_t max_triangles;
+
+ if (indexed) {
+ uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2;
+ uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size;
+
+ max_triangles = MAX2(max_indices, 2) - 2;
+ } else {
+ uint32_t max_vertex = 0;
+ for (uint32_t i = 0; i < pipeline->vb.count; i++) {
+ max_vertex =
+ MAX2(max_vertex,
+ cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes);
+ }
+
+ max_triangles = MAX2(max_vertex, 2) - 2;
+ }
+
+ return max_triangles * 3;
+}
+
+static void
+dzn_cmd_buffer_indirect_draw(dzn_cmd_buffer *cmdbuf,
+ dzn_buffer *draw_buf,
+ size_t draw_buf_offset,
+ uint32_t draw_count,
+ uint32_t draw_buf_stride,
+ bool indexed)
+{
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ dzn_graphics_pipeline *pipeline = (dzn_graphics_pipeline *)
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
+ bool triangle_fan = pipeline->ia.triangle_fan;
+ uint32_t min_draw_buf_stride =
+ indexed ?
+ sizeof(struct dzn_indirect_indexed_draw_params) :
+ sizeof(struct dzn_indirect_draw_params);
+
+ draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride;
+ assert(draw_buf_stride >= min_draw_buf_stride);
+ assert((draw_buf_stride & 3) == 0);
+
+ uint32_t sysvals_stride = ALIGN_POT(sizeof(cmdbuf->state.sysvals.gfx), 256);
+ uint32_t exec_buf_stride = 32;
+ uint32_t triangle_fan_index_buf_stride =
+ dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, indexed) *
+ sizeof(uint32_t);
+ uint32_t triangle_fan_exec_buf_stride =
+ sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
+ ID3D12Resource *exec_buf;
+ VkResult result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf, draw_count * exec_buf_stride,
+ D3D12_HEAP_TYPE_DEFAULT,
+ D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+ &exec_buf);
+ if (result != VK_SUCCESS)
+ return;
+
+ D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu =
+ draw_buf->res->GetGPUVirtualAddress() + draw_buf_offset;
+ ID3D12Resource *triangle_fan_index_buf = NULL;
+ ID3D12Resource *triangle_fan_exec_buf = NULL;
+
+ if (triangle_fan_index_buf_stride) {
+ result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
+ draw_count * triangle_fan_index_buf_stride,
+ D3D12_HEAP_TYPE_DEFAULT,
+ D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+ &triangle_fan_index_buf);
+ if (result != VK_SUCCESS)
+ return;
+
+ result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
+ draw_count * triangle_fan_exec_buf_stride,
+ D3D12_HEAP_TYPE_DEFAULT,
+ D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+ &triangle_fan_exec_buf);
+ if (result != VK_SUCCESS)
+ return;
+ }
+
+ struct dzn_indirect_draw_triangle_fan_rewrite_params params = {
+ .draw_buf_stride = draw_buf_stride,
+ .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride,
+ .triangle_fan_index_buf_start =
+ triangle_fan_index_buf ?
+ triangle_fan_index_buf->GetGPUVirtualAddress() : 0,
+ };
+ uint32_t params_size =
+ triangle_fan_index_buf_stride > 0 ?
+ sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) :
+ sizeof(struct dzn_indirect_draw_rewrite_params);
+
+ enum dzn_indirect_draw_type draw_type;
+
+ if (indexed && triangle_fan_index_buf_stride > 0)
+ draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
+ else if (!indexed && triangle_fan_index_buf_stride > 0)
+ draw_type = DZN_INDIRECT_DRAW_TRIANGLE_FAN;
+ else if (indexed)
+ draw_type = DZN_INDIRECT_INDEXED_DRAW;
+ else
+ draw_type = DZN_INDIRECT_DRAW;
+
+ dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type];
+
+ const dzn_pipeline *compute_pipeline =
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline;
+
+ cmdbuf->cmdlist->SetComputeRootSignature(indirect_draw->root_sig);
+ cmdbuf->cmdlist->SetPipelineState(indirect_draw->pipeline_state);
+ cmdbuf->cmdlist->SetComputeRoot32BitConstants(0, params_size / 4, (const void *)&params, 0);
+ cmdbuf->cmdlist->SetComputeRootShaderResourceView(1, draw_buf_gpu);
+ cmdbuf->cmdlist->SetComputeRootUnorderedAccessView(2, exec_buf->GetGPUVirtualAddress());
+ if (triangle_fan_exec_buf)
+ cmdbuf->cmdlist->SetComputeRootUnorderedAccessView(3, triangle_fan_exec_buf->GetGPUVirtualAddress());
+
+ cmdbuf->cmdlist->Dispatch(draw_count, 1, 1);
+
+ D3D12_RESOURCE_BARRIER post_barriers[] = {
+ {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ /* Transition the exec buffer to indirect arg so it can be
+ * pass to ExecuteIndirect() as an argument buffer.
+ */
+ .Transition = {
+ .pResource = exec_buf,
+ .Subresource = 0,
+ .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+ .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
+ },
+ },
+ {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ /* Transition the exec buffer to indirect arg so it can be
+ * pass to ExecuteIndirect() as an argument buffer.
+ */
+ .Transition = {
+ .pResource = triangle_fan_exec_buf,
+ .Subresource = 0,
+ .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+ .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
+ },
+ },
+ };
+
+ uint32_t post_barrier_count = triangle_fan_exec_buf ? 2 : 1;
+
+ cmdbuf->cmdlist->ResourceBarrier(post_barrier_count, post_barriers);
+
+ D3D12_INDEX_BUFFER_VIEW ib_view = {};
+
+ if (triangle_fan_exec_buf) {
+ auto index_type =
+ indexed ?
+ dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format) :
+ DZN_NO_INDEX;
+ dzn_meta_triangle_fan_rewrite_index *rewrite_index =
+ &device->triangle_fan[index_type];
+
+ struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = {};
+
+ assert(rewrite_index->root_sig);
+ assert(rewrite_index->pipeline_state);
+ assert(rewrite_index->cmd_sig);
+
+ cmdbuf->cmdlist->SetComputeRootSignature(rewrite_index->root_sig);
+ cmdbuf->cmdlist->SetPipelineState(rewrite_index->pipeline_state);
+ cmdbuf->cmdlist->SetComputeRootUnorderedAccessView(0, triangle_fan_index_buf->GetGPUVirtualAddress());
+ cmdbuf->cmdlist->SetComputeRoot32BitConstants(1, sizeof(rewrite_index_params) / 4,
+ (const void *)&rewrite_index_params, 0);
+
+ if (indexed)
+ cmdbuf->cmdlist->SetComputeRootShaderResourceView(2, cmdbuf->state.ib.view.BufferLocation);
+
+ cmdbuf->cmdlist->ExecuteIndirect(rewrite_index->cmd_sig,
+ draw_count, triangle_fan_exec_buf,
+ 0, NULL, 0);
+
+ D3D12_RESOURCE_BARRIER index_buf_barriers[] = {
+ {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = triangle_fan_index_buf,
+ .Subresource = 0,
+ .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
+ .StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER,
+ },
+ },
+ };
+
+ cmdbuf->cmdlist->ResourceBarrier(ARRAY_SIZE(index_buf_barriers), index_buf_barriers);
+
+ /* After our triangle-fan lowering the draw is indexed */
+ indexed = true;
+ ib_view = cmdbuf->state.ib.view;
+ cmdbuf->state.ib.view.BufferLocation = triangle_fan_index_buf->GetGPUVirtualAddress();
+ cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride;
+ cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
+ }
+
+ /* We don't mess up with the driver state when executing our internal
+ * compute shader, but we still change the D3D12 state, so let's mark
+ * things dirty if needed.
+ */
+ cmdbuf->state.pipeline = NULL;
+ if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
+ DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
+ }
+
+ cmdbuf->state.sysvals.gfx.first_vertex = 0;
+ cmdbuf->state.sysvals.gfx.base_instance = 0;
+ cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed;
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
+ DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
+
+ dzn_cmd_buffer_prepare_draw(cmdbuf, indexed);
+
+ /* Restore the old IB view if we modified it during the triangle fan lowering */
+ if (ib_view.SizeInBytes) {
+ cmdbuf->state.ib.view = ib_view;
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
+ }
+
+ enum dzn_indirect_draw_cmd_sig_type cmd_sig_type =
+ triangle_fan_index_buf_stride > 0 ?
+ DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG :
+ indexed ?
+ DZN_INDIRECT_INDEXED_DRAW_CMD_SIG :
+ DZN_INDIRECT_DRAW_CMD_SIG;
+ ID3D12CommandSignature *cmdsig =
+ dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_type);
+
+ if (!cmdsig) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ return;
+ }
+
+ cmdbuf->cmdlist->ExecuteIndirect(cmdsig,
+ draw_count, exec_buf, 0, NULL, 0);
+}
+
+static void
+dzn_cmd_buffer_prepare_dispatch(dzn_cmd_buffer *cmdbuf)
+{
+ dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
+ dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
+ dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
+ dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
+
+ /* Reset the dirty states */
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty = 0;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
+ const VkCopyBufferInfo2 *info)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
+ VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
+
+ for (int i = 0; i < info->regionCount; i++) {
+ auto &region = info->pRegions[i];
+
+ cmdbuf->cmdlist->CopyBufferRegion(dst_buffer->res, region.dstOffset,
+ src_buffer->res, region.srcOffset,
+ region.size);
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
+ const VkCopyBufferToImageInfo2 *info)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ for (int i = 0; i < info->regionCount; i++) {
+ const VkBufferImageCopy2 &region = info->pRegions[i];
+
+ dzn_foreach_aspect(aspect, region.imageSubresource.aspectMask) {
+ for (uint32_t l = 0; l < region.imageSubresource.layerCount; l++)
+ dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l);
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
+ const VkCopyImageToBufferInfo2 *info)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ for (int i = 0; i < info->regionCount; i++) {
+ const VkBufferImageCopy2 &region = info->pRegions[i];
+
+ dzn_foreach_aspect(aspect, region.imageSubresource.aspectMask) {
+ for (uint32_t l = 0; l < region.imageSubresource.layerCount; l++)
+ dzn_cmd_buffer_copy_img2buf_region(cmdbuf,info, i, aspect, l);
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,
+ const VkCopyImageInfo2 *info)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_image, src, info->srcImage);
+ VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
+
+ assert(src->vk.samples == dst->vk.samples);
+
+ bool requires_temp_res = src->vk.format != dst->vk.format &&
+ src->vk.tiling != VK_IMAGE_TILING_LINEAR &&
+ dst->vk.tiling != VK_IMAGE_TILING_LINEAR;
+
+ /* FIXME: multisample copies only work if we copy the entire subresource
+ * and if the the copy doesn't require a temporary linear resource. When
+ * these conditions are not met we should use a blit shader.
+ */
+ if (src->vk.samples > 1) {
+ assert(requires_temp_res == false);
+
+ for (uint32_t i = 0; i < info->regionCount; i++) {
+ const VkImageCopy2 &region = info->pRegions[i];
+ uint32_t src_w = u_minify(src->vk.extent.width, region.srcSubresource.mipLevel);
+ uint32_t src_h = u_minify(src->vk.extent.width, region.srcSubresource.mipLevel);
+
+ assert(region.srcOffset.x == 0 && region.srcOffset.y == 0);
+ assert(region.extent.width == u_minify(src->vk.extent.width, region.srcSubresource.mipLevel));
+ assert(region.extent.height == u_minify(src->vk.extent.height, region.srcSubresource.mipLevel));
+ assert(region.dstOffset.x == 0 && region.dstOffset.y == 0);
+ assert(region.extent.width == u_minify(dst->vk.extent.width, region.dstSubresource.mipLevel));
+ assert(region.extent.height == u_minify(dst->vk.extent.height, region.dstSubresource.mipLevel));
+ }
+ }
+
+ D3D12_TEXTURE_COPY_LOCATION tmp_loc = {};
+ D3D12_RESOURCE_DESC tmp_desc = {
+ .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
+ .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
+ .DepthOrArraySize = 1,
+ .MipLevels = 1,
+ .Format = src->desc.Format,
+ .SampleDesc = { .Count = 1, .Quality = 0 },
+ .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
+ .Flags = D3D12_RESOURCE_FLAG_NONE,
+ };
+
+ if (requires_temp_res) {
+ ID3D12Device *dev = device->dev;
+ VkImageAspectFlags aspect = 0;
+ uint64_t max_size = 0;
+
+ if (vk_format_has_depth(src->vk.format))
+ aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
+ else if (vk_format_has_stencil(src->vk.format))
+ aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
+ else
+ aspect = VK_IMAGE_ASPECT_COLOR_BIT;
+
+ for (uint32_t i = 0; i < info->regionCount; i++) {
+ const VkImageCopy2 &region = info->pRegions[i];
+ uint64_t region_size = 0;
+
+ tmp_desc.Format =
+ dzn_image_get_dxgi_format(src->vk.format,
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+ aspect);
+ tmp_desc.Width = region.extent.width;
+ tmp_desc.Height = region.extent.height;
+
+ dev->GetCopyableFootprints(&src->desc,
+ 0, 1, 0,
+ NULL, NULL, NULL,
+ &region_size);
+ max_size = MAX2(max_size, region_size * region.extent.depth);
+ }
+
+
+ VkResult result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size,
+ D3D12_HEAP_TYPE_DEFAULT,
+ D3D12_RESOURCE_STATE_COPY_DEST,
+ &tmp_loc.pResource);
+ if (result != VK_SUCCESS)
+ return;
+
+ tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+ }
+
+ for (int i = 0; i < info->regionCount; i++) {
+ const VkImageCopy2 &region = info->pRegions[i];
+
+ dzn_foreach_aspect(aspect, region.srcSubresource.aspectMask) {
+ for (uint32_t l = 0; l < region.srcSubresource.layerCount; l++)
+ dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, tmp_desc, tmp_loc, i, aspect, l);
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,
+ const VkBlitImageInfo2 *info)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+
+ if (info->regionCount == 0)
+ return;
+
+ uint32_t desc_count = 0;
+ for (uint32_t r = 0; r < info->regionCount; r++)
+ desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
+
+ dzn_descriptor_heap *heap;
+ uint32_t heap_slot;
+ VkResult result =
+ dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
+ desc_count, &heap, &heap_slot);
+
+ if (result != VK_SUCCESS) {
+ cmdbuf->error = result;
+ return;
+ }
+
+ if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) {
+ ID3D12DescriptorHeap * const heaps[] = { heap->heap };
+ cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap;
+ cmdbuf->cmdlist->SetDescriptorHeaps(ARRAY_SIZE(heaps), heaps);
+ }
+
+ cmdbuf->cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
+
+ uint32_t heap_offset = 0;
+ for (uint32_t r = 0; r < info->regionCount; r++)
+ dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, r);
+
+ cmdbuf->state.pipeline = NULL;
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
+ if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
+ DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,
+ const VkResolveImageInfo2 *info)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+
+ if (info->regionCount == 0)
+ return;
+
+ uint32_t desc_count = 0;
+ for (uint32_t r = 0; r < info->regionCount; r++)
+ desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
+
+ dzn_descriptor_heap *heap;
+ uint32_t heap_slot;
+ VkResult result =
+ dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
+ desc_count, &heap, &heap_slot);
+ if (result != VK_SUCCESS) {
+ cmdbuf->error = result;
+ return;
+ }
+
+ if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) {
+ ID3D12DescriptorHeap * const heaps[] = { heap->heap };
+ cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap;
+ cmdbuf->cmdlist->SetDescriptorHeaps(ARRAY_SIZE(heaps), heaps);
+ }
+
+ cmdbuf->cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
+
+ uint32_t heap_offset = 0;
+ for (uint32_t r = 0; r < info->regionCount; r++)
+ dzn_cmd_buffer_resolve_region(cmdbuf, info, heap, &heap_offset, r);
+
+ cmdbuf->state.pipeline = NULL;
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
+ if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
+ DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,
+ VkImage image,
+ VkImageLayout imageLayout,
+ const VkClearColorValue *pColor,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_image, img, image);
+
+ dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
+ VkImage image,
+ VkImageLayout imageLayout,
+ const VkClearDepthStencilValue *pDepthStencil,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_image, img, image);
+
+ dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdDispatch(VkCommandBuffer commandBuffer,
+ uint32_t groupCountX,
+ uint32_t groupCountY,
+ uint32_t groupCountZ)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ cmdbuf->state.sysvals.compute.group_count_x = groupCountX;
+ cmdbuf->state.sysvals.compute.group_count_y = groupCountY;
+ cmdbuf->state.sysvals.compute.group_count_z = groupCountZ;
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
+ DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
+
+ dzn_cmd_buffer_prepare_dispatch(cmdbuf);
+ cmdbuf->cmdlist->Dispatch(groupCountX, groupCountY, groupCountZ);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,
+ VkBuffer dstBuffer,
+ VkDeviceSize dstOffset,
+ VkDeviceSize size,
+ uint32_t data)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
+
+ if (size == VK_WHOLE_SIZE)
+ size = buf->size - dstOffset;
+
+ size &= ~3ULL;
+
+ ID3D12Resource *src_res;
+ VkResult result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
+ D3D12_HEAP_TYPE_UPLOAD,
+ D3D12_RESOURCE_STATE_GENERIC_READ,
+ &src_res);
+ if (result != VK_SUCCESS)
+ return;
+
+ uint32_t *cpu_ptr;
+ src_res->Map(0, NULL, (void **)&cpu_ptr);
+ for (uint32_t i = 0; i < size / 4; i++)
+ cpu_ptr[i] = data;
+
+ src_res->Unmap(0, NULL);
+
+ cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset, src_res, 0, size);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
+ VkBuffer dstBuffer,
+ VkDeviceSize dstOffset,
+ VkDeviceSize size,
+ const void *data)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
+
+ if (size == VK_WHOLE_SIZE)
+ size = buf->size - dstOffset;
+
+ /*
+ * The spec says:
+ * "size is the number of bytes to fill, and must be either a multiple of
+ * 4, or VK_WHOLE_SIZE to fill the range from offset to the end of the
+ * buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer
+ * is not a multiple of 4, then the nearest smaller multiple is used."
+ */
+ size &= ~3ULL;
+
+ ID3D12Resource *src_res;
+ VkResult result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
+ D3D12_HEAP_TYPE_UPLOAD,
+ D3D12_RESOURCE_STATE_GENERIC_READ,
+ &src_res);
+ if (result != VK_SUCCESS)
+ return;
+
+ void *cpu_ptr;
+ src_res->Map(0, NULL, &cpu_ptr);
+ memcpy(cpu_ptr, data, size),
+ src_res->Unmap(0, NULL);
+
+ cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset, src_res, 0, size);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,
+ uint32_t attachmentCount,
+ const VkClearAttachment *pAttachments,
+ uint32_t rectCount,
+ const VkClearRect *pRects)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ dzn_cmd_buffer_clear_attachments(cmdbuf, attachmentCount, pAttachments, rectCount, pRects);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
+ const VkRenderPassBeginInfo *pRenderPassBeginInfo,
+ const VkSubpassBeginInfo *pSubpassBeginInfo)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_render_pass, pass, pRenderPassBeginInfo->renderPass);
+ VK_FROM_HANDLE(dzn_framebuffer, framebuffer, pRenderPassBeginInfo->framebuffer);
+
+ assert(pass->attachment_count == framebuffer->attachment_count);
+
+ cmdbuf->state.framebuffer = framebuffer;
+ cmdbuf->state.render_area = D3D12_RECT {
+ .left = pRenderPassBeginInfo->renderArea.offset.x,
+ .top = pRenderPassBeginInfo->renderArea.offset.y,
+ .right = (LONG)(pRenderPassBeginInfo->renderArea.offset.x + pRenderPassBeginInfo->renderArea.extent.width),
+ .bottom = (LONG)(pRenderPassBeginInfo->renderArea.offset.y + pRenderPassBeginInfo->renderArea.extent.height),
+ };
+
+ // The render area has an impact on the scissor state.
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
+ cmdbuf->state.pass = pass;
+ cmdbuf->state.subpass = 0;
+ dzn_cmd_buffer_begin_subpass(cmdbuf);
+
+ uint32_t clear_count =
+ MIN2(pRenderPassBeginInfo->clearValueCount, framebuffer->attachment_count);
+ for (int i = 0; i < clear_count; ++i) {
+ VkImageAspectFlags aspectMask = 0;
+
+ if (vk_format_is_depth_or_stencil(pass->attachments[i].format)) {
+ if (pass->attachments[i].clear.depth)
+ aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
+ if (pass->attachments[i].clear.stencil)
+ aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
+ } else if (pass->attachments[i].clear.color) {
+ aspectMask |= VK_IMAGE_ASPECT_COLOR_BIT;
+ }
+
+ dzn_cmd_buffer_clear_attachment(cmdbuf, i, &pRenderPassBeginInfo->pClearValues[i],
+ aspectMask, 0, ~0, 1, &cmdbuf->state.render_area);
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
+ const VkSubpassEndInfo *pSubpassEndInfo)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ dzn_cmd_buffer_end_subpass(cmdbuf);
+
+ for (uint32_t i = 0; i < cmdbuf->state.pass->attachment_count; i++)
+ dzn_cmd_buffer_attachment_transition(cmdbuf, &cmdbuf->state.pass->attachments[i]);
+
+ cmdbuf->state.framebuffer = NULL;
+ cmdbuf->state.pass = NULL;
+ cmdbuf->state.subpass = 0;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdNextSubpass2(VkCommandBuffer commandBuffer,
+ const VkSubpassBeginInfo *pSubpassBeginInfo,
+ const VkSubpassEndInfo *pSubpassEndInfo)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ dzn_cmd_buffer_end_subpass(cmdbuf);
+ assert(cmdbuf->state.subpass + 1 < cmdbuf->state.pass->subpass_count);
+ cmdbuf->state.subpass++;
+ dzn_cmd_buffer_begin_subpass(cmdbuf);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,
+ VkPipelineBindPoint pipelineBindPoint,
+ VkPipeline pipe)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe);
+
+ cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline;
+ cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
+ if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
+ const dzn_graphics_pipeline *gfx = (const dzn_graphics_pipeline *)pipeline;
+
+ if (!gfx->vp.dynamic) {
+ memcpy(cmdbuf->state.viewports, gfx->vp.desc,
+ gfx->vp.count * sizeof(cmdbuf->state.viewports[0]));
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
+ }
+
+ if (!gfx->scissor.dynamic) {
+ memcpy(cmdbuf->state.scissors, gfx->scissor.desc,
+ gfx->scissor.count * sizeof(cmdbuf->state.scissors[0]));
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
+ }
+
+ if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) {
+ cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref;
+ cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref;
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
+ }
+
+ for (uint32_t vb = 0; vb < gfx->vb.count; vb++)
+ cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb];
+
+ if (gfx->vb.count > 0)
+ BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1);
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
+ VkPipelineBindPoint pipelineBindPoint,
+ VkPipelineLayout layout,
+ uint32_t firstSet,
+ uint32_t descriptorSetCount,
+ const VkDescriptorSet *pDescriptorSets,
+ uint32_t dynamicOffsetCount,
+ const uint32_t *pDynamicOffsets)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
+
+ struct dzn_descriptor_state *desc_state =
+ &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state;
+ uint32_t dirty = 0;
+
+ for (uint32_t i = 0; i < descriptorSetCount; i++) {
+ uint32_t idx = firstSet + i;
+ VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]);
+
+ if (desc_state->sets[idx].set != set) {
+ desc_state->sets[idx].set = set;
+ dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
+ }
+
+ uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count;
+ if (dynamic_buffer_count) {
+ assert(dynamicOffsetCount >= dynamic_buffer_count);
+
+ for (uint32_t j = 0; j < dynamic_buffer_count; j++)
+ desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j];
+
+ dynamicOffsetCount -= dynamic_buffer_count;
+ pDynamicOffsets += dynamic_buffer_count;
+ dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
+ }
+ }
+
+ cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdSetViewport(VkCommandBuffer commandBuffer,
+ uint32_t firstViewport,
+ uint32_t viewportCount,
+ const VkViewport *pViewports)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT);
+
+ for (uint32_t i = 0; i < viewportCount; i++) {
+ uint32_t vp = i + firstViewport;
+
+ dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]);
+
+ if (pViewports[i].minDepth > pViewports[i].maxDepth)
+ cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
+ else
+ cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
+
+ if (pViewports[i].height > 0)
+ cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp);
+ else
+ cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp);
+ }
+
+ if (viewportCount) {
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
+ DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdSetScissor(VkCommandBuffer commandBuffer,
+ uint32_t firstScissor,
+ uint32_t scissorCount,
+ const VkRect2D *pScissors)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ for (uint32_t i = 0; i < scissorCount; i++)
+ dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]);
+
+ if (scissorCount)
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
+ VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
+ const void *pValues)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ struct dzn_cmd_buffer_push_constant_state *states[2];
+ uint32_t num_states = 0;
+
+ if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
+ states[num_states++] = &cmdbuf->state.push_constant.gfx;
+
+ if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
+ states[num_states++] = &cmdbuf->state.push_constant.compute;
+
+ for (uint32_t i = 0; i < num_states; i++) {
+ memcpy(((char *)states[i]->values) + offset, pValues, size);
+
+ uint32_t current_offset = states[i]->offset;
+ uint32_t current_end = states[i]->end;
+ uint32_t end = offset + size;
+ if (current_end != 0) {
+ offset = MIN2(current_offset, offset);
+ end = MAX2(current_end, end);
+ }
+ states[i]->offset = offset;
+ states[i]->end = end;
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdDraw(VkCommandBuffer commandBuffer,
+ uint32_t vertexCount,
+ uint32_t instanceCount,
+ uint32_t firstVertex,
+ uint32_t firstInstance)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ const dzn_graphics_pipeline *pipeline = (const dzn_graphics_pipeline *)
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
+
+ cmdbuf->state.sysvals.gfx.first_vertex = firstVertex;
+ cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
+ DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
+
+ if (pipeline->ia.triangle_fan) {
+ D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
+
+ VkResult result =
+ dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount);
+ if (result != VK_SUCCESS || !vertexCount)
+ return;
+
+ cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
+ dzn_cmd_buffer_prepare_draw(cmdbuf, true);
+ cmdbuf->cmdlist->DrawIndexedInstanced(vertexCount, instanceCount, 0,
+ firstVertex, firstInstance);
+
+ /* Restore the IB view if we modified it when lowering triangle fans. */
+ if (ib_view.SizeInBytes > 0) {
+ cmdbuf->state.ib.view = ib_view;
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
+ }
+ } else {
+ cmdbuf->state.sysvals.gfx.is_indexed_draw = false;
+ dzn_cmd_buffer_prepare_draw(cmdbuf, false);
+ cmdbuf->cmdlist->DrawInstanced(vertexCount, instanceCount,
+ firstVertex, firstInstance);
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,
+ uint32_t indexCount,
+ uint32_t instanceCount,
+ uint32_t firstIndex,
+ int32_t vertexOffset,
+ uint32_t firstInstance)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ const dzn_graphics_pipeline *pipeline = (const dzn_graphics_pipeline *)
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
+
+ cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset;
+ cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
+ cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
+ DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
+
+ D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
+
+ if (pipeline->ia.triangle_fan) {
+ VkResult result =
+ dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex);
+ if (result != VK_SUCCESS || !indexCount)
+ return;
+ }
+
+ dzn_cmd_buffer_prepare_draw(cmdbuf, true);
+ cmdbuf->cmdlist->DrawIndexedInstanced(indexCount, instanceCount, firstIndex,
+ vertexOffset, firstInstance);
+
+ /* Restore the IB view if we modified it when lowering triangle fans. */
+ if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) {
+ cmdbuf->state.ib.view = ib_view;
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,
+ VkBuffer buffer,
+ VkDeviceSize offset,
+ uint32_t drawCount,
+ uint32_t stride)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_buffer, buf, buffer);
+
+ dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset, drawCount, stride, false);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
+ VkBuffer buffer,
+ VkDeviceSize offset,
+ uint32_t drawCount,
+ uint32_t stride)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_buffer, buf, buffer);
+
+ dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset, drawCount, stride, true);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
+ uint32_t firstBinding,
+ uint32_t bindingCount,
+ const VkBuffer *pBuffers,
+ const VkDeviceSize *pOffsets)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ if (!bindingCount)
+ return;
+
+ D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views;
+
+ for (uint32_t i = 0; i < bindingCount; i++) {
+ VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]);
+
+ vbviews[firstBinding + i].BufferLocation = buf->res->GetGPUVirtualAddress() + pOffsets[i];
+ vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i];
+ }
+
+ BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding,
+ firstBinding + bindingCount - 1);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
+ VkBuffer buffer,
+ VkDeviceSize offset,
+ VkIndexType indexType)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_buffer, buf, buffer);
+
+ cmdbuf->state.ib.view.BufferLocation = buf->res->GetGPUVirtualAddress() + offset;
+ cmdbuf->state.ib.view.SizeInBytes = buf->size - offset;
+ switch (indexType) {
+ case VK_INDEX_TYPE_UINT16:
+ cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
+ break;
+ case VK_INDEX_TYPE_UINT32:
+ cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
+ break;
+ default: unreachable("Invalid index type");
+ }
+
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdResetEvent(VkCommandBuffer commandBuffer,
+ VkEvent event,
+ VkPipelineStageFlags stageMask)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_event, evt, event);
+
+ if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, (void *)(uintptr_t)DZN_EVENT_STATE_RESET))
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdSetEvent(VkCommandBuffer commandBuffer,
+ VkEvent event,
+ VkPipelineStageFlags stageMask)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_event, evt, event);
+
+ if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, (void *)(uintptr_t)DZN_EVENT_STATE_SET))
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdWaitEvents(VkCommandBuffer commandBuffer,
+ uint32_t eventCount,
+ const VkEvent *pEvents,
+ VkPipelineStageFlags srcStageMask,
+ VkPipelineStageFlags dstStageMask,
+ uint32_t memoryBarrierCount,
+ const VkMemoryBarrier *pMemoryBarriers,
+ uint32_t bufferMemoryBarrierCount,
+ const VkBufferMemoryBarrier *pBufferMemoryBarriers,
+ uint32_t imageMemoryBarrierCount,
+ const VkImageMemoryBarrier *pImageMemoryBarriers)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+
+ /* Intra-command list wait is handle by this pipeline flush, which is
+ * overkill, but that's the best we can do with the standard D3D12 barrier
+ * API.
+ *
+ * Inter-command list is taken care of by the serialization done at the
+ * ExecuteCommandList() level:
+ * "Calling ExecuteCommandLists twice in succession (from the same thread,
+ * or different threads) guarantees that the first workload (A) finishes
+ * before the second workload (B)"
+ *
+ * HOST -> DEVICE signaling is ignored and we assume events are always
+ * signaled when we reach the vkCmdWaitEvents() point.:
+ * "Command buffers in the submission can include vkCmdWaitEvents commands
+ * that wait on events that will not be signaled by earlier commands in the
+ * queue. Such events must be signaled by the application using vkSetEvent,
+ * and the vkCmdWaitEvents commands that wait upon them must not be inside
+ * a render pass instance.
+ * The event must be set before the vkCmdWaitEvents command is executed."
+ */
+ bool flush_pipeline = false;
+
+ for (uint32_t i = 0; i < eventCount; i++) {
+ VK_FROM_HANDLE(dzn_event, event, pEvents[i]);
+
+ struct hash_entry *he =
+ _mesa_hash_table_search(cmdbuf->events.ht, event);
+ if (he) {
+ enum dzn_event_state state = (enum dzn_event_state)(uintptr_t)he->data;
+ assert(state != DZN_EVENT_STATE_RESET);
+ flush_pipeline = state == DZN_EVENT_STATE_SET;
+ } else {
+ if (!_mesa_hash_table_insert(cmdbuf->events.ht, event,
+ (void *)(uintptr_t)DZN_EVENT_STATE_EXTERNAL_WAIT)) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return;
+ }
+
+ dzn_event **entry = (dzn_event **)
+ util_dynarray_grow(&cmdbuf->events.wait, dzn_event *, 1);
+
+ if (!entry) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return;
+ }
+
+ *entry = event;
+ }
+ }
+
+ if (flush_pipeline) {
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .UAV = { .pResource = NULL },
+ };
+
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t query,
+ VkQueryControlFlags flags)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
+
+ dzn_cmd_buffer_query_pool_state *state =
+ dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
+ if (!state)
+ return;
+
+ qpool->queries[query].type = dzn_query_pool_get_query_type(qpool, flags);
+ dzn_cmd_buffer_dynbitset_clear(cmdbuf, &state->collect, query);
+ cmdbuf->cmdlist->BeginQuery(qpool->heap, qpool->queries[query].type, query);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdEndQuery(VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t query)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
+
+ dzn_cmd_buffer_query_pool_state *state =
+ dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
+ if (!state)
+ return;
+
+ dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
+ cmdbuf->cmdlist->EndQuery(qpool->heap, qpool->queries[query].type, query);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
+ VkPipelineStageFlags2 stage,
+ VkQueryPool queryPool,
+ uint32_t query)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
+
+ dzn_cmd_buffer_query_pool_state *state =
+ dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
+ if (!state)
+ return;
+
+ /* Execution barrier so the timestamp gets written after the pipeline flush. */
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .UAV = { .pResource = NULL },
+ };
+
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+
+ qpool->queries[query].type = D3D12_QUERY_TYPE_TIMESTAMP;
+ dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
+ cmdbuf->cmdlist->EndQuery(qpool->heap, qpool->queries[query].type, query);
+}
+
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t firstQuery,
+ uint32_t queryCount)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
+
+ dzn_cmd_buffer_query_pool_state *state =
+ dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
+
+ if (!state)
+ return;
+
+ uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
+
+ for (uint32_t q = 0; q < queryCount; q += q_step) {
+ uint32_t q_count = MIN2(queryCount - q, q_step);
+
+ cmdbuf->cmdlist->CopyBufferRegion(qpool->collect_buffer,
+ dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
+ device->queries.refs,
+ DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
+ q_count * sizeof(uint64_t));
+ }
+
+ q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size;
+
+ for (uint32_t q = 0; q < queryCount; q += q_step) {
+ cmdbuf->cmdlist->CopyBufferRegion(qpool->collect_buffer,
+ dzn_query_pool_get_result_offset(qpool, firstQuery + q),
+ device->queries.refs,
+ DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
+ qpool->query_size);
+ }
+
+ dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount);
+ dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t firstQuery,
+ uint32_t queryCount,
+ VkBuffer dstBuffer,
+ VkDeviceSize dstOffset,
+ VkDeviceSize stride,
+ VkQueryResultFlags flags)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
+ VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
+
+ dzn_cmd_buffer_query_pool_state *qpstate =
+ dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
+ if (!qpstate)
+ return;
+
+ if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ for (uint32_t i = 0; i < queryCount; i++) {
+ if (!dzn_cmd_buffer_dynbitset_test(&qpstate->collect, firstQuery + i) &&
+ !dzn_cmd_buffer_dynbitset_test(&qpstate->signal, firstQuery + i))
+ dzn_cmd_buffer_dynbitset_set(cmdbuf, &qpstate->wait, firstQuery + i);
+ }
+ }
+
+ VkResult result =
+ dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount);
+ if (result != VK_SUCCESS)
+ return;
+
+ bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) &&
+ stride == qpool->query_size &&
+ !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
+#define ALL_STATS \
+ (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \
+ VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \
+ VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \
+ VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \
+ VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \
+ VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \
+ VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \
+ VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \
+ VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \
+ VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \
+ VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT)
+ if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS &&
+ qpool->pipeline_statistics != ALL_STATS)
+ raw_copy = false;
+#undef ALL_STATS
+
+ D3D12_RESOURCE_BARRIER barrier = {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ .Transition = {
+ .pResource = qpool->collect_buffer,
+ .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST,
+ .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE,
+ },
+ };
+
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+
+ if (raw_copy) {
+ cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset,
+ qpool->collect_buffer,
+ dzn_query_pool_get_result_offset(qpool, firstQuery),
+ dzn_query_pool_get_result_size(qpool, queryCount));
+ } else {
+ uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t);
+
+ for (uint32_t q = 0; q < queryCount; q++) {
+ uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q);
+ uint32_t dst_counter_offset = 0;
+
+ if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
+ for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
+ if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
+ continue;
+
+ cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset + dst_counter_offset,
+ qpool->collect_buffer,
+ res_offset + (c * sizeof(uint64_t)),
+ step);
+ dst_counter_offset += step;
+ }
+ } else {
+ cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset,
+ qpool->collect_buffer,
+ res_offset, step);
+ dst_counter_offset += step;
+ }
+
+ if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+ cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset + dst_counter_offset,
+ qpool->collect_buffer,
+ dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
+ step);
+ }
+
+ dstOffset += stride;
+ }
+ }
+
+ DZN_SWAP(barrier.Transition.StateBefore, barrier.Transition.StateAfter);
+ cmdbuf->cmdlist->ResourceBarrier(1, &barrier);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
+ VkBuffer buffer,
+ VkDeviceSize offset)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+ dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk);
+ VK_FROM_HANDLE(dzn_buffer, buf, buffer);
+
+ cmdbuf->state.sysvals.compute.group_count_x = 0;
+ cmdbuf->state.sysvals.compute.group_count_y = 0;
+ cmdbuf->state.sysvals.compute.group_count_z = 0;
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
+ DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
+
+ dzn_cmd_buffer_prepare_dispatch(cmdbuf);
+
+ dzn_compute_pipeline *pipeline = (dzn_compute_pipeline *)
+ cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline;
+ ID3D12CommandSignature *cmdsig =
+ dzn_compute_pipeline_get_indirect_cmd_sig(pipeline);
+
+ if (!cmdsig) {
+ cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return;
+ }
+
+ ID3D12Resource *exec_buf;
+ VkResult result =
+ dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
+ D3D12_HEAP_TYPE_DEFAULT,
+ D3D12_RESOURCE_STATE_COPY_DEST,
+ &exec_buf);
+ if (result != VK_SUCCESS)
+ return;
+
+ cmdbuf->cmdlist->CopyBufferRegion(exec_buf, 0,
+ buf->res,
+ offset,
+ sizeof(D3D12_DISPATCH_ARGUMENTS));
+ cmdbuf->cmdlist->CopyBufferRegion(exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS),
+ buf->res,
+ offset,
+ sizeof(D3D12_DISPATCH_ARGUMENTS));
+ D3D12_RESOURCE_BARRIER barriers[] = {
+ {
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+ /* Transition the exec buffer to indirect arg so it can be
+ * passed to ExecuteIndirect() as an argument buffer.
+ */
+ .Transition = {
+ .pResource = exec_buf,
+ .Subresource = 0,
+ .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST,
+ .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
+ },
+ },
+ };
+
+ cmdbuf->cmdlist->ResourceBarrier(ARRAY_SIZE(barriers), barriers);
+
+ cmdbuf->cmdlist->ExecuteIndirect(cmdsig, 1, exec_buf, 0, NULL, 0);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,
+ float lineWidth)
+{
+ assert(lineWidth == 1.0f);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,
+ float depthBiasConstantFactor,
+ float depthBiasClamp,
+ float depthBiasSlopeFactor)
+{
+ dzn_stub();
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
+ const float blendConstants[4])
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ cmdbuf->cmdlist->OMSetBlendFactor(blendConstants);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
+ float minDepthBounds,
+ float maxDepthBounds)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ cmdbuf->cmdlist->OMSetDepthBounds(minDepthBounds, maxDepthBounds);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
+ VkStencilFaceFlags faceMask,
+ uint32_t compareMask)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask;
+
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask;
+
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
+ VkStencilFaceFlags faceMask,
+ uint32_t writeMask)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask;
+
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask;
+
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,
+ VkStencilFaceFlags faceMask,
+ uint32_t reference)
+{
+ VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
+
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ cmdbuf->state.zsa.stencil_test.front.ref = reference;
+
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ cmdbuf->state.zsa.stencil_test.back.ref = reference;
+
+ cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
+}
diff --git a/src/microsoft/vulkan/dzn_cmd_exec_functions b/src/microsoft/vulkan/dzn_cmd_exec_functions
new file mode 100644
index 00000000000..9512a8c7923
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_cmd_exec_functions
@@ -0,0 +1,41 @@
+CmdBeginQuery
+CmdBeginRenderPass2
+CmdBindDescriptorSets
+CmdBindIndexBuffer
+CmdBindPipeline
+CmdBindVertexBuffers
+CmdBlitImage2
+CmdClearAttachments
+CmdClearColorImage
+CmdClearDepthStencilImage
+CmdCopyBuffer2
+CmdCopyBufferToImage2
+CmdCopyImage2
+CmdCopyImageToBuffer2
+CmdCopyQueryPoolResults
+CmdDispatch
+CmdDispatchIndirect
+CmdDraw
+CmdDrawIndexed
+CmdDrawIndexedIndirect
+CmdDrawIndirect
+CmdEndQuery
+CmdEndRenderPass2
+CmdFillBuffer
+CmdNextSubpass2
+CmdPipelineBarrier2
+CmdPushConstants
+CmdResetEvent
+CmdResetQueryPool
+CmdResolveImage2
+CmdSetBlendConstants
+CmdSetDepthBias
+CmdSetDepthBounds
+CmdSetEvent
+CmdSetLineWidth
+CmdSetScissor
+CmdSetStencilCompareMask
+CmdSetStencilReference
+CmdSetStencilWriteMask
+CmdUpdateBuffer
+CmdWaitEvents
diff --git a/src/microsoft/vulkan/dzn_descriptor_set.cpp b/src/microsoft/vulkan/dzn_descriptor_set.cpp
new file mode 100644
index 00000000000..520d94477c0
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_descriptor_set.cpp
@@ -0,0 +1,1802 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include <wrl/client.h>
+
+#include "vk_alloc.h"
+#include "vk_descriptors.h"
+#include "vk_util.h"
+
+using Microsoft::WRL::ComPtr;
+
+static D3D12_SHADER_VISIBILITY
+translate_desc_visibility(VkShaderStageFlags in)
+{
+ switch (in) {
+ case VK_SHADER_STAGE_VERTEX_BIT: return D3D12_SHADER_VISIBILITY_VERTEX;
+ case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return D3D12_SHADER_VISIBILITY_HULL;
+ case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return D3D12_SHADER_VISIBILITY_DOMAIN;
+ case VK_SHADER_STAGE_GEOMETRY_BIT: return D3D12_SHADER_VISIBILITY_GEOMETRY;
+ case VK_SHADER_STAGE_FRAGMENT_BIT: return D3D12_SHADER_VISIBILITY_PIXEL;
+ default: return D3D12_SHADER_VISIBILITY_ALL;
+ }
+}
+
+static D3D12_DESCRIPTOR_RANGE_TYPE
+desc_type_to_range_type(VkDescriptorType in, bool writeable)
+{
+ switch (in) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ return D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ return D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ return D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ return writeable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
+ default:
+ unreachable("Unsupported desc type");
+ }
+}
+
+static bool
+is_dynamic_desc_type(VkDescriptorType desc_type)
+{
+ return (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+ desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
+}
+
+static uint32_t
+num_descs_for_type(VkDescriptorType type, bool static_sampler)
+{
+ unsigned num_descs = 1;
+
+ /* Some type map to an SRV or UAV depending on how the shaders is using the
+ * resource (NONWRITEABLE flag set or not), in that case we need to reserve
+ * slots for both the UAV and SRV descs.
+ */
+ if (dzn_descriptor_type_depends_on_shader_usage(type))
+ num_descs++;
+
+ /* There's no combined SRV+SAMPLER type in d3d12, we need an descriptor
+ * for the sampler.
+ */
+ if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ num_descs++;
+
+ /* Don't count immutable samplers, they have their own descriptor. */
+ if (static_sampler &&
+ (type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))
+ num_descs--;
+
+ return num_descs;
+}
+
+static void
+dzn_descriptor_set_layout_destroy(dzn_descriptor_set_layout *set_layout,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (!set_layout)
+ return;
+
+ dzn_device *device = container_of(set_layout->base.device, dzn_device, vk);
+
+ vk_object_base_finish(&set_layout->base);
+ vk_free2(&device->vk.alloc, pAllocator, set_layout);
+}
+
+static VkResult
+dzn_descriptor_set_layout_create(dzn_device *device,
+ const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorSetLayout *out)
+{
+ const VkDescriptorSetLayoutBinding *bindings = pCreateInfo->pBindings;
+ uint32_t binding_count = 0, static_sampler_count = 0, total_ranges = 0;
+ uint32_t dynamic_ranges_offset = 0, immutable_sampler_count = 0;
+ uint32_t range_count[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES] = {};
+
+ for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
+ D3D12_SHADER_VISIBILITY visibility =
+ translate_desc_visibility(bindings[i].stageFlags);
+ VkDescriptorType desc_type = bindings[i].descriptorType;
+ bool has_sampler =
+ desc_type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+
+ /* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding:
+ *
+ * "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or
+ * VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then
+ * pImmutableSamplers can be used to initialize a set of immutable
+ * samplers. [...] If descriptorType is not one of these descriptor
+ * types, then pImmutableSamplers is ignored.
+ *
+ * We need to be careful here and only parse pImmutableSamplers if we
+ * have one of the right descriptor types.
+ */
+ bool immutable_samplers =
+ has_sampler &&
+ bindings[i].pImmutableSamplers != NULL;
+ bool static_sampler = false;
+
+ if (immutable_samplers && bindings[i].descriptorCount == 1) {
+ VK_FROM_HANDLE(dzn_sampler, sampler, bindings[i].pImmutableSamplers[0]);
+
+ if (sampler->static_border_color != -1)
+ static_sampler = true;
+ }
+
+ if (static_sampler) {
+ static_sampler_count += bindings[i].descriptorCount;
+ } else if (has_sampler) {
+ range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]++;
+ total_ranges++;
+
+ if (immutable_samplers)
+ immutable_sampler_count += bindings[i].descriptorCount;
+ }
+
+ if (desc_type != VK_DESCRIPTOR_TYPE_SAMPLER) {
+ range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]++;
+ total_ranges++;
+
+ if (dzn_descriptor_type_depends_on_shader_usage(desc_type)) {
+ range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]++;
+ total_ranges++;
+ }
+
+ if (!is_dynamic_desc_type(desc_type)) {
+ uint32_t factor =
+ dzn_descriptor_type_depends_on_shader_usage(desc_type) ? 2 : 1;
+ dynamic_ranges_offset += bindings[i].descriptorCount * factor;
+ }
+ }
+
+ binding_count = MAX2(binding_count, bindings[i].binding + 1);
+ }
+
+ /* We need to allocate decriptor set layouts off the device allocator
+ * with DEVICE scope because they are reference counted and may not be
+ * destroyed when vkDestroyDescriptorSetLayout is called.
+ */
+ VK_MULTIALLOC(ma);
+ VK_MULTIALLOC_DECL(&ma, struct dzn_descriptor_set_layout, set_layout, 1);
+ VK_MULTIALLOC_DECL(&ma, D3D12_DESCRIPTOR_RANGE1,
+ ranges, total_ranges);
+ VK_MULTIALLOC_DECL(&ma, D3D12_STATIC_SAMPLER_DESC, static_samplers,
+ static_sampler_count);
+ VK_MULTIALLOC_DECL(&ma, const dzn_sampler *, immutable_samplers,
+ immutable_sampler_count);
+ VK_MULTIALLOC_DECL(&ma, dzn_descriptor_set_layout_binding, binfos,
+ binding_count);
+
+ if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &set_layout->base, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT);
+ set_layout->static_samplers = static_samplers;
+ set_layout->static_sampler_count = static_sampler_count;
+ set_layout->immutable_samplers = immutable_samplers;
+ set_layout->immutable_sampler_count = immutable_sampler_count;
+ set_layout->bindings = binfos;
+ set_layout->binding_count = binding_count;
+ set_layout->dynamic_buffers.range_offset = dynamic_ranges_offset;
+
+ for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) {
+ dzn_foreach_pool_type (type) {
+ if (range_count[i][type]) {
+ set_layout->ranges[i][type] = ranges;
+ set_layout->range_count[i][type] = range_count[i][type];
+ ranges += range_count[i][type];
+ }
+ }
+ }
+
+ VkDescriptorSetLayoutBinding *ordered_bindings;
+ VkResult ret =
+ vk_create_sorted_bindings(pCreateInfo->pBindings,
+ pCreateInfo->bindingCount,
+ &ordered_bindings);
+ if (ret != VK_SUCCESS)
+ return ret;
+
+ assert(binding_count ==
+ (pCreateInfo->bindingCount ?
+ (ordered_bindings[pCreateInfo->bindingCount - 1].binding + 1) : 0));
+
+ uint32_t range_idx[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES] = {};
+ uint32_t static_sampler_idx = 0, immutable_sampler_idx = 0;
+ uint32_t dynamic_buffer_idx = 0;
+ uint32_t base_register = 0;
+
+ for (uint32_t i = 0; i < binding_count; i++) {
+ binfos[i].static_sampler_idx = ~0;
+ binfos[i].immutable_sampler_idx = ~0;
+ binfos[i].dynamic_buffer_idx = ~0;
+ dzn_foreach_pool_type (type)
+ binfos[i].range_idx[type] = ~0;
+ }
+
+ for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
+ VkDescriptorType desc_type = ordered_bindings[i].descriptorType;
+ uint32_t binding = ordered_bindings[i].binding;
+ uint32_t desc_count = ordered_bindings[i].descriptorCount;
+ bool has_sampler =
+ desc_type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+ bool has_immutable_samplers =
+ has_sampler &&
+ ordered_bindings[i].pImmutableSamplers != NULL;
+ bool has_static_sampler = has_immutable_samplers && desc_count == 1;
+ bool is_dynamic = is_dynamic_desc_type(desc_type);
+
+ D3D12_SHADER_VISIBILITY visibility =
+ translate_desc_visibility(ordered_bindings[i].stageFlags);
+ binfos[binding].type = desc_type;
+ binfos[binding].visibility = visibility;
+ binfos[binding].base_shader_register = base_register;
+ assert(base_register + desc_count >= base_register);
+ base_register += desc_count;
+
+ if (has_static_sampler) {
+ VK_FROM_HANDLE(dzn_sampler, sampler, ordered_bindings[i].pImmutableSamplers[0]);
+
+ /* Not all border colors are supported. */
+ if (sampler->static_border_color != -1) {
+ binfos[binding].static_sampler_idx = static_sampler_idx;
+ D3D12_STATIC_SAMPLER_DESC *desc = (D3D12_STATIC_SAMPLER_DESC *)
+ &static_samplers[static_sampler_idx];
+
+ desc->Filter = sampler->desc.Filter;
+ desc->AddressU = sampler->desc.AddressU;
+ desc->AddressV = sampler->desc.AddressV;
+ desc->AddressW = sampler->desc.AddressW;
+ desc->MipLODBias = sampler->desc.MipLODBias;
+ desc->MaxAnisotropy = sampler->desc.MaxAnisotropy;
+ desc->ComparisonFunc = sampler->desc.ComparisonFunc;
+ desc->BorderColor = sampler->static_border_color;
+ desc->MinLOD = sampler->desc.MinLOD;
+ desc->MaxLOD = sampler->desc.MaxLOD;
+ desc->ShaderRegister = binfos[binding].base_shader_register;
+ desc->ShaderVisibility = translate_desc_visibility(ordered_bindings[i].stageFlags);
+ static_sampler_idx++;
+ } else {
+ has_static_sampler = false;
+ }
+ }
+
+ if (has_immutable_samplers && !has_static_sampler) {
+ binfos[binding].immutable_sampler_idx = immutable_sampler_idx;
+ for (uint32_t s = 0; s < desc_count; s++) {
+ VK_FROM_HANDLE(dzn_sampler, sampler, ordered_bindings[i].pImmutableSamplers[s]);
+
+ immutable_samplers[immutable_sampler_idx++] = sampler;
+ }
+ }
+
+ if (is_dynamic) {
+ binfos[binding].dynamic_buffer_idx = dynamic_buffer_idx;
+ for (uint32_t d = 0; d < desc_count; d++)
+ set_layout->dynamic_buffers.bindings[dynamic_buffer_idx + d] = binding;
+ dynamic_buffer_idx += desc_count;
+ assert(dynamic_buffer_idx <= MAX_DYNAMIC_BUFFERS);
+ }
+
+ unsigned num_descs =
+ num_descs_for_type(desc_type, has_static_sampler);
+ if (!num_descs) continue;
+
+ assert(visibility < ARRAY_SIZE(set_layout->ranges));
+
+ bool has_range[NUM_POOL_TYPES] = {};
+ has_range[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] =
+ has_sampler && !has_static_sampler;
+ has_range[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] =
+ desc_type != VK_DESCRIPTOR_TYPE_SAMPLER;
+
+ dzn_foreach_pool_type (type) {
+ if (!has_range[type]) continue;
+
+ uint32_t idx = range_idx[visibility][type]++;
+ assert(idx < range_count[visibility][type]);
+
+ binfos[binding].range_idx[type] = idx;
+ D3D12_DESCRIPTOR_RANGE1 *range = (D3D12_DESCRIPTOR_RANGE1 *)
+ &set_layout->ranges[visibility][type][idx];
+ VkDescriptorType range_type = desc_type;
+ if (desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
+ range_type = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ?
+ VK_DESCRIPTOR_TYPE_SAMPLER :
+ VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+ }
+ range->RangeType = desc_type_to_range_type(range_type, false);
+ range->NumDescriptors = desc_count;
+ range->BaseShaderRegister = binfos[binding].base_shader_register;
+ range->Flags = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ?
+ D3D12_DESCRIPTOR_RANGE_FLAG_NONE :
+ D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS;
+ if (is_dynamic) {
+ range->OffsetInDescriptorsFromTableStart =
+ set_layout->dynamic_buffers.range_offset +
+ set_layout->dynamic_buffers.desc_count;
+ set_layout->dynamic_buffers.count += range->NumDescriptors;
+ set_layout->dynamic_buffers.desc_count += range->NumDescriptors;
+ } else {
+ range->OffsetInDescriptorsFromTableStart = set_layout->range_desc_count[type];
+ set_layout->range_desc_count[type] += range->NumDescriptors;
+ }
+
+ if (!dzn_descriptor_type_depends_on_shader_usage(desc_type))
+ continue;
+
+ assert(idx + 1 < range_count[visibility][type]);
+ range_idx[visibility][type]++;
+ range[1] = range[0];
+ range++;
+ range->RangeType = desc_type_to_range_type(range_type, true);
+ if (is_dynamic) {
+ range->OffsetInDescriptorsFromTableStart =
+ set_layout->dynamic_buffers.range_offset +
+ set_layout->dynamic_buffers.desc_count;
+ set_layout->dynamic_buffers.desc_count += range->NumDescriptors;
+ } else {
+ range->OffsetInDescriptorsFromTableStart = set_layout->range_desc_count[type];
+ set_layout->range_desc_count[type] += range->NumDescriptors;
+ }
+ }
+ }
+
+ free(ordered_bindings);
+
+ *out = dzn_descriptor_set_layout_to_handle(set_layout);
+ return VK_SUCCESS;
+}
+
+uint32_t
+dzn_descriptor_set_layout_get_heap_offset(const dzn_descriptor_set_layout *layout,
+ uint32_t b,
+ D3D12_DESCRIPTOR_HEAP_TYPE type,
+ bool writeable)
+{
+ assert(b < layout->binding_count);
+ D3D12_SHADER_VISIBILITY visibility = layout->bindings[b].visibility;
+ assert(visibility < ARRAY_SIZE(layout->ranges));
+ assert(type < NUM_POOL_TYPES);
+
+ uint32_t range_idx = layout->bindings[b].range_idx[type];
+
+ if (range_idx == ~0)
+ return ~0;
+
+ if (writeable &&
+ !dzn_descriptor_type_depends_on_shader_usage(layout->bindings[b].type))
+ return ~0;
+
+ if (writeable)
+ range_idx++;
+
+ assert(range_idx < layout->range_count[visibility][type]);
+ return layout->ranges[visibility][type][range_idx].OffsetInDescriptorsFromTableStart;
+}
+
+uint32_t
+dzn_descriptor_set_layout_get_desc_count(const dzn_descriptor_set_layout *layout,
+ uint32_t b)
+{
+ D3D12_SHADER_VISIBILITY visibility = layout->bindings[b].visibility;
+ assert(visibility < ARRAY_SIZE(layout->ranges));
+
+ dzn_foreach_pool_type (type) {
+ uint32_t range_idx = layout->bindings[b].range_idx[type];
+ assert(range_idx == ~0 || range_idx < layout->range_count[visibility][type]);
+
+ if (range_idx != ~0)
+ return layout->ranges[visibility][type][range_idx].NumDescriptors;
+ }
+
+ return 0;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateDescriptorSetLayout(VkDevice device,
+ const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorSetLayout *pSetLayout)
+{
+ return dzn_descriptor_set_layout_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pSetLayout);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyDescriptorSetLayout(VkDevice device,
+ VkDescriptorSetLayout descriptorSetLayout,
+ const VkAllocationCallbacks *pAllocator)
+{
+ dzn_descriptor_set_layout_destroy(dzn_descriptor_set_layout_from_handle(descriptorSetLayout),
+ pAllocator);
+}
+
+static void
+dzn_pipeline_layout_destroy(dzn_pipeline_layout *layout)
+{
+ dzn_device *device = container_of(layout->base.device, dzn_device, vk);
+
+ if (layout->root.sig)
+ layout->root.sig->Release();
+
+ vk_free(&device->vk.alloc, layout);
+}
+
+// Reserve two root parameters for the push constants and sysvals CBVs.
+#define MAX_INTERNAL_ROOT_PARAMS 2
+
+// One root parameter for samplers and the other one for views, multiplied by
+// the number of visibility combinations, plus the internal root parameters.
+#define MAX_ROOT_PARAMS ((MAX_SHADER_VISIBILITIES * 2) + MAX_INTERNAL_ROOT_PARAMS)
+
+// Maximum number of DWORDS (32-bit words) that can be used for a root signature
+#define MAX_ROOT_DWORDS 64
+
+static VkResult
+dzn_pipeline_layout_create(dzn_device *device,
+ const VkPipelineLayoutCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipelineLayout *out)
+{
+ uint32_t binding_count = 0;
+
+ for (uint32_t s = 0; s < pCreateInfo->setLayoutCount; s++) {
+ VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[s]);
+
+ if (!set_layout)
+ continue;
+
+ binding_count += set_layout->binding_count;
+ }
+
+ VK_MULTIALLOC(ma);
+ VK_MULTIALLOC_DECL(&ma, dzn_pipeline_layout, layout, 1);
+ VK_MULTIALLOC_DECL(&ma, dxil_spirv_vulkan_binding,
+ bindings, binding_count);
+
+ if (!vk_multialloc_zalloc(&ma, &device->vk.alloc,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_PIPELINE_LAYOUT);
+
+ for (uint32_t s = 0; s < pCreateInfo->setLayoutCount; s++) {
+ VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[s]);
+
+ if (!set_layout || !set_layout->binding_count)
+ continue;
+
+ layout->binding_translation[s].bindings = bindings;
+ bindings += set_layout->binding_count;
+ }
+
+ uint32_t range_count = 0, static_sampler_count = 0;
+
+ p_atomic_set(&layout->refcount, 1);
+
+ layout->root.param_count = 0;
+ dzn_foreach_pool_type (type)
+ layout->desc_count[type] = 0;
+
+ layout->set_count = pCreateInfo->setLayoutCount;
+ for (uint32_t j = 0; j < layout->set_count; j++) {
+ VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]);
+ dxil_spirv_vulkan_binding *bindings = layout->binding_translation[j].bindings;
+
+ layout->sets[j].dynamic_buffer_count = set_layout->dynamic_buffers.count;
+ memcpy(layout->sets[j].range_desc_count, set_layout->range_desc_count,
+ sizeof(layout->sets[j].range_desc_count));
+ layout->binding_translation[j].binding_count = set_layout->binding_count;
+ for (uint32_t b = 0; b < set_layout->binding_count; b++)
+ bindings[b].base_register = set_layout->bindings[b].base_shader_register;
+
+ static_sampler_count += set_layout->static_sampler_count;
+ dzn_foreach_pool_type (type) {
+ layout->sets[j].heap_offsets[type] = layout->desc_count[type];
+ layout->desc_count[type] += set_layout->range_desc_count[type];
+ for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++)
+ range_count += set_layout->range_count[i][type];
+ }
+
+ layout->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] +=
+ set_layout->dynamic_buffers.desc_count;
+ for (uint32_t o = 0, elem = 0; o < set_layout->dynamic_buffers.count; o++, elem++) {
+ uint32_t b = set_layout->dynamic_buffers.bindings[o];
+
+ if (o > 0 && set_layout->dynamic_buffers.bindings[o - 1] != b)
+ elem = 0;
+
+ uint32_t srv =
+ dzn_descriptor_set_layout_get_heap_offset(set_layout, b, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, false);
+ uint32_t uav =
+ dzn_descriptor_set_layout_get_heap_offset(set_layout, b, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, true);
+
+ layout->sets[j].dynamic_buffer_heap_offsets[o].srv = srv != ~0 ? srv + elem : ~0;
+ layout->sets[j].dynamic_buffer_heap_offsets[o].uav = uav != ~0 ? uav + elem : ~0;
+ }
+ }
+
+ D3D12_DESCRIPTOR_RANGE1 *ranges = (D3D12_DESCRIPTOR_RANGE1 *)
+ vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*ranges) * range_count, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (range_count && !ranges) {
+ dzn_pipeline_layout_destroy(layout);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ D3D12_STATIC_SAMPLER_DESC *static_sampler_descs = (D3D12_STATIC_SAMPLER_DESC *)
+ vk_alloc2(&device->vk.alloc, pAllocator,
+ sizeof(*static_sampler_descs) * static_sampler_count, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (static_sampler_count && !static_sampler_descs) {
+ vk_free2(&device->vk.alloc, pAllocator, ranges);
+ dzn_pipeline_layout_destroy(layout);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+
+ D3D12_ROOT_PARAMETER1 root_params[MAX_ROOT_PARAMS] = {};
+ D3D12_DESCRIPTOR_RANGE1 *range_ptr = ranges;
+ D3D12_ROOT_PARAMETER1 *root_param;
+ uint32_t root_dwords = 0;
+
+ for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) {
+ dzn_foreach_pool_type (type) {
+ root_param = &root_params[layout->root.param_count];
+ root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+ root_param->DescriptorTable.pDescriptorRanges = range_ptr;
+ root_param->DescriptorTable.NumDescriptorRanges = 0;
+ root_param->ShaderVisibility = (D3D12_SHADER_VISIBILITY)i;
+
+ for (uint32_t j = 0; j < pCreateInfo->setLayoutCount; j++) {
+ VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]);
+ uint32_t range_count = set_layout->range_count[i][type];
+
+ memcpy(range_ptr, set_layout->ranges[i][type],
+ range_count * sizeof(D3D12_DESCRIPTOR_RANGE1));
+ for (uint32_t k = 0; k < range_count; k++) {
+ range_ptr[k].RegisterSpace = j;
+ range_ptr[k].OffsetInDescriptorsFromTableStart +=
+ layout->sets[j].heap_offsets[type];
+ }
+ root_param->DescriptorTable.NumDescriptorRanges += range_count;
+ range_ptr += range_count;
+ }
+
+ if (root_param->DescriptorTable.NumDescriptorRanges) {
+ layout->root.type[layout->root.param_count++] = (D3D12_DESCRIPTOR_HEAP_TYPE)type;
+ root_dwords++;
+ }
+ }
+ }
+
+ layout->root.sets_param_count = layout->root.param_count;
+
+ /* Add our sysval CBV, and make it visible to all shaders */
+ layout->root.sysval_cbv_param_idx = layout->root.param_count;
+ root_param = &root_params[layout->root.param_count++];
+ root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
+ root_param->Descriptor.RegisterSpace = DZN_REGISTER_SPACE_SYSVALS;
+ root_param->Constants.ShaderRegister = 0;
+ root_param->Constants.Num32BitValues =
+ DIV_ROUND_UP(MAX2(sizeof(struct dxil_spirv_vertex_runtime_data),
+ sizeof(struct dxil_spirv_compute_runtime_data)),
+ 4);
+ root_param->ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+ root_dwords += root_param->Constants.Num32BitValues;
+
+ D3D12_STATIC_SAMPLER_DESC *static_sampler_ptr = static_sampler_descs;
+ for (uint32_t j = 0; j < pCreateInfo->setLayoutCount; j++) {
+ VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]);
+
+ memcpy(static_sampler_ptr, set_layout->static_samplers,
+ set_layout->static_sampler_count * sizeof(*set_layout->static_samplers));
+ if (j > 0) {
+ for (uint32_t k = 0; k < set_layout->static_sampler_count; k++)
+ static_sampler_ptr[k].RegisterSpace = j;
+ }
+ static_sampler_ptr += set_layout->static_sampler_count;
+ }
+
+ uint32_t push_constant_size = 0;
+ uint32_t push_constant_flags = 0;
+ for (uint32_t j = 0; j < pCreateInfo->pushConstantRangeCount; j++) {
+ const VkPushConstantRange* range = pCreateInfo->pPushConstantRanges + j;
+ push_constant_size = MAX2(push_constant_size, range->offset + range->size);
+ push_constant_flags |= range->stageFlags;
+ }
+
+ if (push_constant_size > 0) {
+ layout->root.push_constant_cbv_param_idx = layout->root.param_count;
+ D3D12_ROOT_PARAMETER1 *root_param = &root_params[layout->root.param_count++];
+
+ root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
+ root_param->Constants.ShaderRegister = 0;
+ root_param->Constants.Num32BitValues = ALIGN(push_constant_size, 4) / 4;
+ root_param->Constants.RegisterSpace = DZN_REGISTER_SPACE_PUSH_CONSTANT;
+ root_param->ShaderVisibility = translate_desc_visibility(push_constant_flags);
+ root_dwords += root_param->Constants.Num32BitValues;
+ }
+
+ assert(layout->root.param_count <= ARRAY_SIZE(root_params));
+ assert(root_dwords <= MAX_ROOT_DWORDS);
+
+ D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
+ .Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
+ .Desc_1_1 = {
+ .NumParameters = layout->root.param_count,
+ .pParameters = layout->root.param_count ? root_params : NULL,
+ .NumStaticSamplers =static_sampler_count,
+ .pStaticSamplers = static_sampler_descs,
+ /* TODO Only enable this flag when needed (optimization) */
+ .Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT,
+ },
+ };
+
+ layout->root.sig = dzn_device_create_root_sig(device, &root_sig_desc);
+ vk_free2(&device->vk.alloc, pAllocator, ranges);
+ vk_free2(&device->vk.alloc, pAllocator, static_sampler_descs);
+
+ if (!layout->root.sig) {
+ dzn_pipeline_layout_destroy(layout);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ *out = dzn_pipeline_layout_to_handle(layout);
+ return VK_SUCCESS;
+}
+
+dzn_pipeline_layout *
+dzn_pipeline_layout_ref(dzn_pipeline_layout *layout)
+{
+ if (layout)
+ p_atomic_inc(&layout->refcount);
+
+ return layout;
+}
+
+void
+dzn_pipeline_layout_unref(dzn_pipeline_layout *layout)
+{
+ if (layout) {
+ if (p_atomic_dec_zero(&layout->refcount))
+ dzn_pipeline_layout_destroy(layout);
+ }
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreatePipelineLayout(VkDevice device,
+ const VkPipelineLayoutCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipelineLayout *pPipelineLayout)
+{
+ return dzn_pipeline_layout_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pPipelineLayout);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyPipelineLayout(VkDevice device,
+ VkPipelineLayout layout,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
+
+ dzn_pipeline_layout_unref(playout);
+}
+
+static D3D12_DESCRIPTOR_HEAP_TYPE
+desc_type_to_heap_type(VkDescriptorType in)
+{
+ switch (in) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ return D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ return D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ default:
+ unreachable("Unsupported desc type");
+ }
+}
+
+bool
+dzn_descriptor_type_depends_on_shader_usage(VkDescriptorType type)
+{
+ return type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER ||
+ type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
+ type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
+ type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
+}
+
+void
+dzn_descriptor_heap_finish(dzn_descriptor_heap *heap)
+{
+ if (heap->heap)
+ heap->heap->Release();
+
+ if (heap->dev)
+ heap->dev->Release();
+}
+
+VkResult
+dzn_descriptor_heap_init(dzn_descriptor_heap *heap,
+ dzn_device *device,
+ D3D12_DESCRIPTOR_HEAP_TYPE type,
+ uint32_t desc_count,
+ bool shader_visible)
+{
+ heap->desc_count = desc_count;
+ heap->type = type;
+ heap->dev = device->dev;
+ heap->dev->AddRef();
+ heap->desc_sz = device->dev->GetDescriptorHandleIncrementSize(type);
+
+ D3D12_DESCRIPTOR_HEAP_DESC desc = {
+ .Type = type,
+ .NumDescriptors = desc_count,
+ .Flags = shader_visible ?
+ D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE :
+ D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
+ };
+
+ if (FAILED(device->dev->CreateDescriptorHeap(&desc,
+ IID_PPV_ARGS(&heap->heap)))) {
+ return vk_error(device,
+ shader_visible ?
+ VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ heap->cpu_base = heap->heap->GetCPUDescriptorHandleForHeapStart().ptr;
+ if (shader_visible)
+ heap->gpu_base = heap->heap->GetGPUDescriptorHandleForHeapStart().ptr;
+
+ return VK_SUCCESS;
+}
+
+D3D12_CPU_DESCRIPTOR_HANDLE
+dzn_descriptor_heap_get_cpu_handle(const dzn_descriptor_heap *heap, uint32_t desc_offset)
+{
+ return D3D12_CPU_DESCRIPTOR_HANDLE {
+ .ptr = heap->cpu_base + (desc_offset * heap->desc_sz),
+ };
+}
+
+D3D12_GPU_DESCRIPTOR_HANDLE
+dzn_descriptor_heap_get_gpu_handle(const dzn_descriptor_heap *heap, uint32_t desc_offset)
+{
+ return D3D12_GPU_DESCRIPTOR_HANDLE {
+ .ptr = heap->gpu_base ? heap->gpu_base + (desc_offset * heap->desc_sz) : 0,
+ };
+}
+
+void
+dzn_descriptor_heap_write_sampler_desc(dzn_descriptor_heap *heap,
+ uint32_t desc_offset,
+ const dzn_sampler *sampler)
+{
+ heap->dev->CreateSampler(&sampler->desc,
+ dzn_descriptor_heap_get_cpu_handle(heap, desc_offset));
+}
+
+void
+dzn_descriptor_heap_write_image_view_desc(dzn_descriptor_heap *heap,
+ uint32_t desc_offset,
+ bool writeable, bool cube_as_2darray,
+ const dzn_image_view *iview)
+{
+ D3D12_CPU_DESCRIPTOR_HANDLE view_handle =
+ dzn_descriptor_heap_get_cpu_handle(heap, desc_offset);
+ dzn_image *image = container_of(iview->vk.image, dzn_image, vk);
+
+ if (writeable) {
+ heap->dev->CreateUnorderedAccessView(image->res, NULL, &iview->uav_desc, view_handle);
+ } else if (cube_as_2darray &&
+ (iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY ||
+ iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE)) {
+ D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = iview->srv_desc;
+ srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
+ srv_desc.Texture2DArray.PlaneSlice = 0;
+ if (iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) {
+ srv_desc.Texture2DArray.MostDetailedMip =
+ iview->srv_desc.TextureCubeArray.MostDetailedMip;
+ srv_desc.Texture2DArray.MipLevels =
+ iview->srv_desc.TextureCubeArray.MipLevels;
+ srv_desc.Texture2DArray.FirstArraySlice =
+ iview->srv_desc.TextureCubeArray.First2DArrayFace;
+ srv_desc.Texture2DArray.ArraySize =
+ iview->srv_desc.TextureCubeArray.NumCubes * 6;
+ } else {
+ srv_desc.Texture2DArray.MostDetailedMip =
+ iview->srv_desc.TextureCube.MostDetailedMip;
+ srv_desc.Texture2DArray.MipLevels =
+ iview->srv_desc.TextureCube.MipLevels;
+ srv_desc.Texture2DArray.FirstArraySlice = 0;
+ srv_desc.Texture2DArray.ArraySize = 6;
+ }
+
+ heap->dev->CreateShaderResourceView(image->res, &srv_desc, view_handle);
+ } else {
+ heap->dev->CreateShaderResourceView(image->res, &iview->srv_desc, view_handle);
+ }
+}
+
+void
+dzn_descriptor_heap_write_buffer_view_desc(dzn_descriptor_heap *heap,
+ uint32_t desc_offset,
+ bool writeable,
+ const dzn_buffer_view *bview)
+{
+ D3D12_CPU_DESCRIPTOR_HANDLE view_handle =
+ dzn_descriptor_heap_get_cpu_handle(heap, desc_offset);
+
+ if (writeable)
+ heap->dev->CreateUnorderedAccessView(bview->buffer->res, NULL, &bview->uav_desc, view_handle);
+ else
+ heap->dev->CreateShaderResourceView(bview->buffer->res, &bview->srv_desc, view_handle);
+}
+
+void
+dzn_descriptor_heap_write_buffer_desc(dzn_descriptor_heap *heap,
+ uint32_t desc_offset,
+ bool writeable,
+ const dzn_buffer_desc *info)
+{
+ D3D12_CPU_DESCRIPTOR_HANDLE view_handle =
+ dzn_descriptor_heap_get_cpu_handle(heap, desc_offset);
+
+ VkDeviceSize size =
+ info->range == VK_WHOLE_SIZE ?
+ info->buffer->size - info->offset :
+ info->range;
+
+ if (info->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
+ info->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
+ assert(!writeable);
+ D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {
+ .BufferLocation = info->buffer->res->GetGPUVirtualAddress() + info->offset,
+ .SizeInBytes = ALIGN_POT(size, 256),
+ };
+ heap->dev->CreateConstantBufferView(&cbv_desc, view_handle);
+ } else if (writeable) {
+ D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {
+ .Format = DXGI_FORMAT_R32_TYPELESS,
+ .ViewDimension = D3D12_UAV_DIMENSION_BUFFER,
+ .Buffer = {
+ .FirstElement = info->offset / sizeof(uint32_t),
+ .NumElements = (UINT)size / sizeof(uint32_t),
+ .Flags = D3D12_BUFFER_UAV_FLAG_RAW,
+ },
+ };
+ heap->dev->CreateUnorderedAccessView(info->buffer->res, NULL, &uav_desc, view_handle);
+ } else {
+ D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
+ .Format = DXGI_FORMAT_R32_TYPELESS,
+ .ViewDimension = D3D12_SRV_DIMENSION_BUFFER,
+ .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
+ .Buffer = {
+ .FirstElement = info->offset / sizeof(uint32_t),
+ .NumElements = (UINT)size / sizeof(uint32_t),
+ .Flags = D3D12_BUFFER_SRV_FLAG_RAW,
+ },
+ };
+ heap->dev->CreateShaderResourceView(info->buffer->res, &srv_desc, view_handle);
+ }
+}
+
+void
+dzn_descriptor_heap_copy(dzn_descriptor_heap *dst_heap,
+ uint32_t dst_offset,
+ const dzn_descriptor_heap *src_heap,
+ uint32_t src_offset,
+ uint32_t desc_count)
+{
+ D3D12_CPU_DESCRIPTOR_HANDLE dst_handle =
+ dzn_descriptor_heap_get_cpu_handle(dst_heap, dst_offset);
+ D3D12_CPU_DESCRIPTOR_HANDLE src_handle =
+ dzn_descriptor_heap_get_cpu_handle(src_heap, src_offset);
+
+ dst_heap->dev->CopyDescriptorsSimple(desc_count,
+ dst_handle,
+ src_handle,
+ dst_heap->type);
+}
+
+struct dzn_descriptor_set_ptr {
+ uint32_t binding, elem;
+};
+
+static void
+dzn_descriptor_set_ptr_validate(const dzn_descriptor_set *set,
+ dzn_descriptor_set_ptr *ptr)
+{
+
+ if (ptr->binding >= set->layout->binding_count) {
+ ptr->binding = ~0;
+ ptr->elem = ~0;
+ return;
+ }
+
+ uint32_t desc_count =
+ dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding);
+ if (ptr->elem >= desc_count) {
+ ptr->binding = ~0;
+ ptr->elem = ~0;
+ }
+}
+
+static void
+dzn_descriptor_set_ptr_init(const dzn_descriptor_set *set,
+ dzn_descriptor_set_ptr *ptr,
+ uint32_t binding, uint32_t elem)
+{
+ ptr->binding = binding;
+ ptr->elem = elem;
+ dzn_descriptor_set_ptr_validate(set, ptr);
+}
+
+static void
+dzn_descriptor_set_ptr_move(const dzn_descriptor_set *set,
+ dzn_descriptor_set_ptr *ptr,
+ uint32_t count)
+{
+ if (ptr->binding == ~0)
+ return;
+
+ while (count) {
+ uint32_t desc_count =
+ dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding);
+
+ if (count >= desc_count - ptr->elem) {
+ count -= desc_count - ptr->elem;
+ ptr->binding++;
+ ptr->elem = 0;
+ } else {
+ ptr->elem += count;
+ count = 0;
+ }
+ }
+
+ dzn_descriptor_set_ptr_validate(set, ptr);
+}
+
+bool
+dzn_descriptor_set_ptr_is_valid(const dzn_descriptor_set_ptr *ptr)
+{
+ return ptr->binding != ~0 && ptr->elem != ~0;
+}
+
+uint32_t
+dzn_descriptor_set_remaining_descs_in_binding(const dzn_descriptor_set *set,
+ const dzn_descriptor_set_ptr *ptr)
+{
+ if (ptr->binding >= set->layout->binding_count)
+ return 0;
+
+ uint32_t desc_count =
+ dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding);
+
+ return desc_count >= ptr->elem ? desc_count - ptr->elem : 0;
+}
+
+
+uint32_t
+dzn_descriptor_set_get_heap_offset(const dzn_descriptor_set *set,
+ D3D12_DESCRIPTOR_HEAP_TYPE type,
+ const dzn_descriptor_set_ptr *ptr,
+ bool writeable)
+{
+ if (ptr->binding == ~0)
+ return ~0;
+
+ uint32_t base =
+ dzn_descriptor_set_layout_get_heap_offset(set->layout, ptr->binding, type, writeable);
+ if (base == ~0)
+ return ~0;
+
+ return base + ptr->elem;
+}
+
+void
+dzn_descriptor_set_write_sampler_desc(dzn_descriptor_set *set,
+ const dzn_descriptor_set_ptr *ptr,
+ const dzn_sampler *sampler)
+{
+ D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+ uint32_t heap_offset =
+ dzn_descriptor_set_get_heap_offset(set, type, ptr, false);
+
+ if (heap_offset != ~0) {
+ mtx_lock(&set->pool->defragment_lock);
+ dzn_descriptor_heap_write_sampler_desc(&set->pool->heaps[type],
+ set->heap_offsets[type] + heap_offset,
+ sampler);
+ mtx_unlock(&set->pool->defragment_lock);
+ }
+}
+
+uint32_t
+dzn_descriptor_set_get_dynamic_buffer_idx(const dzn_descriptor_set *set,
+ const dzn_descriptor_set_ptr *ptr)
+{
+ if (ptr->binding == ~0)
+ return ~0;
+
+ uint32_t base = set->layout->bindings[ptr->binding].dynamic_buffer_idx;
+
+ if (base == ~0)
+ return ~0;
+
+ return base + ptr->elem;
+}
+
+void
+dzn_descriptor_set_write_dynamic_buffer_desc(dzn_descriptor_set *set,
+ const dzn_descriptor_set_ptr *ptr,
+ const dzn_buffer_desc *info)
+{
+ uint32_t dynamic_buffer_idx =
+ dzn_descriptor_set_get_dynamic_buffer_idx(set, ptr);
+ if (dynamic_buffer_idx == ~0)
+ return;
+
+ assert(dynamic_buffer_idx < set->layout->dynamic_buffers.count);
+ set->dynamic_buffers[dynamic_buffer_idx] = *info;
+}
+
+VkDescriptorType
+dzn_descriptor_set_get_desc_vk_type(const dzn_descriptor_set *set,
+ const dzn_descriptor_set_ptr *ptr)
+{
+ if (ptr->binding >= set->layout->binding_count)
+ return (VkDescriptorType)~0;
+
+ return set->layout->bindings[ptr->binding].type;
+}
+
+void
+dzn_descriptor_set_write_image_view_desc(dzn_descriptor_set *set,
+ const dzn_descriptor_set_ptr *ptr,
+ bool cube_as_2darray,
+ const dzn_image_view *iview)
+{
+ D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+ uint32_t heap_offset =
+ dzn_descriptor_set_get_heap_offset(set, type, ptr, false);
+ if (heap_offset == ~0)
+ return;
+
+ mtx_lock(&set->pool->defragment_lock);
+ dzn_descriptor_heap_write_image_view_desc(&set->pool->heaps[type],
+ set->heap_offsets[type] + heap_offset,
+ false, cube_as_2darray,
+ iview);
+
+ VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr);
+ if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) {
+ heap_offset =
+ dzn_descriptor_set_get_heap_offset(set, type, ptr, true);
+ assert(heap_offset != ~0);
+ dzn_descriptor_heap_write_image_view_desc(&set->pool->heaps[type],
+ set->heap_offsets[type] + heap_offset,
+ true, cube_as_2darray,
+ iview);
+ }
+ mtx_unlock(&set->pool->defragment_lock);
+}
+
+void
+dzn_descriptor_set_write_buffer_view_desc(dzn_descriptor_set *set,
+ const dzn_descriptor_set_ptr *ptr,
+ const dzn_buffer_view *bview)
+{
+ D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+ uint32_t heap_offset =
+ dzn_descriptor_set_get_heap_offset(set, type, ptr, false);
+ if (heap_offset == ~0)
+ return;
+
+ mtx_lock(&set->pool->defragment_lock);
+ dzn_descriptor_heap_write_buffer_view_desc(&set->pool->heaps[type],
+ set->heap_offsets[type] + heap_offset,
+ false, bview);
+
+ VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr);
+ if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) {
+ heap_offset =
+ dzn_descriptor_set_get_heap_offset(set, type, ptr, true);
+ assert(heap_offset != ~0);
+ dzn_descriptor_heap_write_buffer_view_desc(&set->pool->heaps[type],
+ set->heap_offsets[type] + heap_offset,
+ true, bview);
+ }
+ mtx_unlock(&set->pool->defragment_lock);
+}
+
+void
+dzn_descriptor_set_write_buffer_desc(dzn_descriptor_set *set,
+ const dzn_descriptor_set_ptr *ptr,
+ const dzn_buffer_desc *bdesc)
+{
+ D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+ uint32_t heap_offset =
+ dzn_descriptor_set_get_heap_offset(set, type, ptr, false);
+ if (heap_offset == ~0)
+ return;
+
+ mtx_lock(&set->pool->defragment_lock);
+ dzn_descriptor_heap_write_buffer_desc(&set->pool->heaps[type],
+ set->heap_offsets[type] + heap_offset,
+ false, bdesc);
+
+ VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr);
+ if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) {
+ heap_offset =
+ dzn_descriptor_set_get_heap_offset(set, type, ptr, true);
+ assert(heap_offset != ~0);
+ dzn_descriptor_heap_write_buffer_desc(&set->pool->heaps[type],
+ set->heap_offsets[type] + heap_offset,
+ true, bdesc);
+ }
+ mtx_unlock(&set->pool->defragment_lock);
+}
+
+static void
+dzn_descriptor_set_init(dzn_descriptor_set *set,
+ dzn_device *device,
+ dzn_descriptor_pool *pool,
+ dzn_descriptor_set_layout *layout)
+{
+ vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
+
+ set->pool = pool;
+ set->layout = layout;
+
+ mtx_lock(&pool->defragment_lock);
+ dzn_foreach_pool_type(type) {
+ set->heap_offsets[type] = pool->free_offset[type];
+ set->heap_sizes[type] = layout->range_desc_count[type];
+ set->pool->free_offset[type] += layout->range_desc_count[type];
+ }
+ mtx_unlock(&pool->defragment_lock);
+
+ /* Pre-fill the immutable samplers */
+ if (layout->immutable_sampler_count) {
+ for (uint32_t b = 0; b < layout->binding_count; b++) {
+ bool has_samplers =
+ layout->bindings[b].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ layout->bindings[b].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+
+ if (!has_samplers || layout->bindings[b].immutable_sampler_idx == ~0)
+ continue;
+
+ dzn_descriptor_set_ptr ptr;
+ const dzn_sampler **sampler =
+ &layout->immutable_samplers[layout->bindings[b].immutable_sampler_idx];
+ for (dzn_descriptor_set_ptr_init(set, &ptr, b, 0);
+ dzn_descriptor_set_ptr_is_valid(&ptr);
+ dzn_descriptor_set_ptr_move(set, &ptr, 1)) {
+ dzn_descriptor_set_write_sampler_desc(set, &ptr, *sampler);
+ sampler++;
+ }
+ }
+ }
+}
+
+static void
+dzn_descriptor_set_finish(dzn_descriptor_set *set)
+{
+ vk_object_base_finish(&set->base);
+ set->pool = NULL;
+ set->layout = NULL;
+}
+
+static void
+dzn_descriptor_pool_destroy(dzn_descriptor_pool *pool,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (!pool)
+ return;
+
+ dzn_device *device = container_of(pool->base.device, dzn_device, vk);
+
+ dzn_foreach_pool_type (type) {
+ if (pool->desc_count[type])
+ dzn_descriptor_heap_finish(&pool->heaps[type]);
+ }
+
+ vk_object_base_finish(&pool->base);
+ vk_free2(&device->vk.alloc, pAllocator, pool);
+}
+
+static VkResult
+dzn_descriptor_pool_create(dzn_device *device,
+ const VkDescriptorPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorPool *out)
+{
+ VK_MULTIALLOC(ma);
+ VK_MULTIALLOC_DECL(&ma, dzn_descriptor_pool, pool, 1);
+ VK_MULTIALLOC_DECL(&ma, dzn_descriptor_set, sets, pCreateInfo->maxSets);
+
+ if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ pool->alloc = pAllocator ? *pAllocator : device->vk.alloc;
+ pool->sets = sets;
+ pool->set_count = pCreateInfo->maxSets;
+ mtx_init(&pool->defragment_lock, mtx_plain);
+
+ vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_DESCRIPTOR_POOL);
+
+ for (uint32_t p = 0; p < pCreateInfo->poolSizeCount; p++) {
+ VkDescriptorType type = pCreateInfo->pPoolSizes[p].type;
+ uint32_t num_desc = pCreateInfo->pPoolSizes[p].descriptorCount;
+
+ switch (type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] += num_desc;
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc;
+ pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] += num_desc;
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ /* Reserve one UAV and one SRV slot for those. */
+ pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc * 2;
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ break;
+ default:
+ unreachable("Unsupported desc type");
+ }
+ }
+
+ dzn_foreach_pool_type (type) {
+ if (!pool->desc_count[type])
+ continue;
+
+ VkResult result =
+ dzn_descriptor_heap_init(&pool->heaps[type], device, type, pool->desc_count[type], false);
+ if (result != VK_SUCCESS) {
+ dzn_descriptor_pool_destroy(pool, pAllocator);
+ return result;
+ }
+ }
+
+ *out = dzn_descriptor_pool_to_handle(pool);
+ return VK_SUCCESS;
+}
+
+VkResult
+dzn_descriptor_pool_defragment_heap(dzn_descriptor_pool *pool,
+ D3D12_DESCRIPTOR_HEAP_TYPE type)
+{
+ dzn_device *device = container_of(pool->base.device, dzn_device, vk);
+ dzn_descriptor_heap new_heap;
+
+ VkResult result =
+ dzn_descriptor_heap_init(&new_heap, device, type,
+ pool->heaps[type].desc_count,
+ false);
+ if (result != VK_SUCCESS)
+ return result;
+
+ mtx_lock(&pool->defragment_lock);
+ uint32_t heap_offset = 0;
+ for (uint32_t s = 0; s < pool->set_count; s++) {
+ if (!pool->sets[s].layout)
+ continue;
+
+ dzn_descriptor_heap_copy(&new_heap, heap_offset,
+ &pool->heaps[type],
+ pool->sets[s].heap_offsets[type],
+ pool->sets[s].heap_sizes[type]);
+ pool->sets[s].heap_offsets[type] = heap_offset;
+ heap_offset += pool->sets[s].heap_sizes[type];
+ }
+ mtx_unlock(&pool->defragment_lock);
+
+ dzn_descriptor_heap_finish(&pool->heaps[type]);
+ pool->heaps[type] = new_heap;
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateDescriptorPool(VkDevice device,
+ const VkDescriptorPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorPool *pDescriptorPool)
+{
+ return dzn_descriptor_pool_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pDescriptorPool);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyDescriptorPool(VkDevice device,
+ VkDescriptorPool descriptorPool,
+ const VkAllocationCallbacks *pAllocator)
+{
+ return dzn_descriptor_pool_destroy(dzn_descriptor_pool_from_handle(descriptorPool),
+ pAllocator);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_ResetDescriptorPool(VkDevice device,
+ VkDescriptorPool descriptorPool,
+ VkDescriptorPoolResetFlags flags)
+{
+ VK_FROM_HANDLE(dzn_descriptor_pool, pool, descriptorPool);
+
+ for (uint32_t s = 0; s < pool->set_count; s++)
+ dzn_descriptor_set_finish(&pool->sets[s]);
+
+ dzn_foreach_pool_type(type)
+ pool->free_offset[type] = 0;
+
+ return VK_SUCCESS;
+}
+
+void
+dzn_descriptor_heap_pool_finish(dzn_descriptor_heap_pool *pool)
+{
+ list_splicetail(&pool->active_heaps, &pool->free_heaps);
+ list_for_each_entry_safe(dzn_descriptor_heap_pool_entry, entry, &pool->free_heaps, link) {
+ list_del(&entry->link);
+ dzn_descriptor_heap_finish(&entry->heap);
+ vk_free(pool->alloc, entry);
+ }
+}
+
+void
+dzn_descriptor_heap_pool_init(dzn_descriptor_heap_pool *pool,
+ dzn_device *device,
+ D3D12_DESCRIPTOR_HEAP_TYPE type,
+ bool shader_visible,
+ const VkAllocationCallbacks *alloc)
+{
+ assert(!shader_visible ||
+ type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
+ type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
+
+ pool->alloc = alloc;
+ pool->type = type;
+ pool->shader_visible = shader_visible;
+ list_inithead(&pool->active_heaps);
+ list_inithead(&pool->free_heaps);
+ pool->offset = 0;
+ pool->desc_sz = device->dev->GetDescriptorHandleIncrementSize(type);
+}
+
+VkResult
+dzn_descriptor_heap_pool_alloc_slots(dzn_descriptor_heap_pool *pool,
+ dzn_device *device, uint32_t desc_count,
+ dzn_descriptor_heap **heap,
+ uint32_t *first_slot)
+{
+ dzn_descriptor_heap *last_heap =
+ list_is_empty(&pool->active_heaps) ?
+ NULL :
+ &(list_last_entry(&pool->active_heaps, dzn_descriptor_heap_pool_entry, link)->heap);
+ uint32_t last_heap_desc_count =
+ last_heap ? last_heap->desc_count : 0;
+
+ if (pool->offset + desc_count > last_heap_desc_count) {
+ uint32_t granularity =
+ (pool->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
+ pool->type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) ?
+ 64 * 1024 : 4 * 1024;
+ uint32_t alloc_step = ALIGN_POT(desc_count * pool->desc_sz, granularity);
+ uint32_t heap_desc_count = MAX2(alloc_step / pool->desc_sz, 16);
+ dzn_descriptor_heap_pool_entry *new_heap = NULL;
+
+ list_for_each_entry_safe(dzn_descriptor_heap_pool_entry, entry, &pool->free_heaps, link) {
+ if (entry->heap.desc_count >= heap_desc_count) {
+ new_heap = entry;
+ list_del(&entry->link);
+ break;
+ }
+ }
+
+ if (!new_heap) {
+ new_heap = (dzn_descriptor_heap_pool_entry *)
+ vk_zalloc(pool->alloc, sizeof(*new_heap), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!new_heap)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ VkResult result =
+ dzn_descriptor_heap_init(&new_heap->heap, device, pool->type,
+ heap_desc_count, pool->shader_visible);
+ if (result != VK_SUCCESS) {
+ vk_free(&device->vk.alloc, new_heap);
+ return result;
+ }
+ }
+
+ list_addtail(&new_heap->link, &pool->active_heaps);
+ pool->offset = 0;
+ last_heap = &new_heap->heap;
+ }
+
+ *heap = last_heap;
+ *first_slot = pool->offset;
+ pool->offset += desc_count;
+ return VK_SUCCESS;
+}
+
+void
+dzn_descriptor_heap_pool_reset(dzn_descriptor_heap_pool *pool)
+{
+ pool->offset = 0;
+ list_splicetail(&pool->active_heaps, &pool->free_heaps);
+ list_inithead(&pool->free_heaps);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_AllocateDescriptorSets(VkDevice dev,
+ const VkDescriptorSetAllocateInfo *pAllocateInfo,
+ VkDescriptorSet *pDescriptorSets)
+{
+ VK_FROM_HANDLE(dzn_descriptor_pool, pool, pAllocateInfo->descriptorPool);
+ VK_FROM_HANDLE(dzn_device, device, dev);
+ VkResult result;
+ unsigned i;
+
+ if (pAllocateInfo->descriptorSetCount > (pool->set_count - pool->used_set_count))
+ return VK_ERROR_OUT_OF_POOL_MEMORY;
+
+ uint32_t set_idx = 0;
+ for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
+ VK_FROM_HANDLE(dzn_descriptor_set_layout, layout, pAllocateInfo->pSetLayouts[i]);
+
+ dzn_foreach_pool_type(type) {
+ if (pool->used_desc_count[type] + layout->range_desc_count[type] > pool->desc_count[type]) {
+ dzn_FreeDescriptorSets(dev, pAllocateInfo->descriptorPool, i, pDescriptorSets);
+ return vk_error(device, VK_ERROR_OUT_OF_POOL_MEMORY);
+ }
+
+ if (pool->free_offset[type] + layout->range_desc_count[type] > pool->desc_count[type]) {
+ result = dzn_descriptor_pool_defragment_heap(pool, type);
+ if (result != VK_SUCCESS) {
+ dzn_FreeDescriptorSets(dev, pAllocateInfo->descriptorPool, i, pDescriptorSets);
+ return vk_error(device, VK_ERROR_FRAGMENTED_POOL);
+ }
+ }
+ }
+
+ dzn_descriptor_set *set = NULL;
+ for (; set_idx < pool->set_count; set_idx++) {
+ if (!pool->sets[set_idx].layout) {
+ set = &pool->sets[set_idx];
+ break;
+ }
+ }
+
+ dzn_descriptor_set_init(set, device, pool, layout);
+ pDescriptorSets[i] = dzn_descriptor_set_to_handle(set);
+ }
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_FreeDescriptorSets(VkDevice dev,
+ VkDescriptorPool descriptorPool,
+ uint32_t count,
+ const VkDescriptorSet *pDescriptorSets)
+{
+ VK_FROM_HANDLE(dzn_descriptor_pool, pool, descriptorPool);
+ VK_FROM_HANDLE(dzn_device, device, dev);
+
+ for (uint32_t s = 0; s < count; s++) {
+ VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[s]);
+
+ if (!set)
+ continue;
+
+ assert(set->pool == pool);
+
+ dzn_descriptor_set_finish(set);
+ }
+
+ mtx_lock(&pool->defragment_lock);
+ dzn_foreach_pool_type(type)
+ pool->free_offset[type] = 0;
+
+ for (uint32_t s = 0; s < pool->set_count; s++) {
+ const dzn_descriptor_set *set = &pool->sets[s];
+
+ if (set->layout) {
+ dzn_foreach_pool_type (type) {
+ pool->free_offset[type] =
+ MAX2(pool->free_offset[type],
+ set->heap_offsets[type] +
+ set->layout->range_desc_count[type]);
+ }
+ }
+ }
+ mtx_unlock(&pool->defragment_lock);
+
+ return VK_SUCCESS;
+}
+
+static void
+dzn_descriptor_set_write(const VkWriteDescriptorSet *pDescriptorWrite)
+{
+ VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorWrite->dstSet);
+
+ dzn_descriptor_set_ptr ptr;
+
+ dzn_descriptor_set_ptr_init(set, &ptr,
+ pDescriptorWrite->dstBinding,
+ pDescriptorWrite->dstArrayElement);
+ uint32_t desc_count = pDescriptorWrite->descriptorCount;
+
+ uint32_t d = 0;
+ bool cube_as_2darray =
+ pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+
+ switch (pDescriptorWrite->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count;
+ dzn_descriptor_set_ptr_move(set, &ptr, 1)) {
+ assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType);
+ const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d;
+ VK_FROM_HANDLE(dzn_sampler, sampler, pImageInfo->sampler);
+
+ if (sampler)
+ dzn_descriptor_set_write_sampler_desc(set, &ptr, sampler);
+
+ d++;
+ }
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count;
+ dzn_descriptor_set_ptr_move(set, &ptr, 1)) {
+ assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType);
+ const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d;
+ VK_FROM_HANDLE(dzn_sampler, sampler, pImageInfo->sampler);
+ VK_FROM_HANDLE(dzn_image_view, iview, pImageInfo->imageView);
+
+ if (sampler)
+ dzn_descriptor_set_write_sampler_desc(set, &ptr, sampler);
+
+ if (iview)
+ dzn_descriptor_set_write_image_view_desc(set, &ptr, cube_as_2darray, iview);
+
+ d++;
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count;
+ dzn_descriptor_set_ptr_move(set, &ptr, 1)) {
+ assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType);
+ const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d;
+ VK_FROM_HANDLE(dzn_image_view, iview, pImageInfo->imageView);
+
+ if (iview)
+ dzn_descriptor_set_write_image_view_desc(set, &ptr, cube_as_2darray, iview);
+
+ d++;
+ }
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count;
+ dzn_descriptor_set_ptr_move(set, &ptr, 1)) {
+ assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType);
+ const VkDescriptorBufferInfo *binfo = &pDescriptorWrite->pBufferInfo[d];
+ dzn_buffer_desc desc {
+ pDescriptorWrite->descriptorType,
+ dzn_buffer_from_handle(binfo->buffer),
+ binfo->range, binfo->offset
+ };
+
+ if (desc.buffer)
+ dzn_descriptor_set_write_buffer_desc(set, &ptr, &desc);
+
+ d++;
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count;
+ dzn_descriptor_set_ptr_move(set, &ptr, 1)) {
+ assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType);
+ const VkDescriptorBufferInfo *binfo = &pDescriptorWrite->pBufferInfo[d];
+ dzn_buffer_desc desc {
+ pDescriptorWrite->descriptorType,
+ dzn_buffer_from_handle(binfo->buffer),
+ binfo->range, binfo->offset
+ };
+
+ if (desc.buffer)
+ dzn_descriptor_set_write_dynamic_buffer_desc(set, &ptr, &desc);
+
+ d++;
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count;
+ dzn_descriptor_set_ptr_move(set, &ptr, 1)) {
+ assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType);
+ VK_FROM_HANDLE(dzn_buffer_view, bview, pDescriptorWrite->pTexelBufferView[d]);
+
+ if (bview)
+ dzn_descriptor_set_write_buffer_view_desc(set, &ptr, bview);
+
+ d++;
+ }
+ break;
+
+ default:
+ unreachable("invalid descriptor type");
+ break;
+ }
+
+ assert(d == pDescriptorWrite->descriptorCount);
+}
+
+static void
+dzn_descriptor_set_copy(const VkCopyDescriptorSet *pDescriptorCopy)
+{
+ VK_FROM_HANDLE(dzn_descriptor_set, src_set, pDescriptorCopy->srcSet);
+ VK_FROM_HANDLE(dzn_descriptor_set, dst_set, pDescriptorCopy->dstSet);
+ dzn_descriptor_set_ptr src_ptr, dst_ptr;
+
+ dzn_descriptor_set_ptr_init(src_set, &src_ptr,
+ pDescriptorCopy->srcBinding,
+ pDescriptorCopy->srcArrayElement);
+ dzn_descriptor_set_ptr_init(dst_set, &dst_ptr,
+ pDescriptorCopy->dstBinding,
+ pDescriptorCopy->dstArrayElement);
+
+ uint32_t copied_count = 0;
+
+ while (dzn_descriptor_set_ptr_is_valid(&src_ptr) &&
+ dzn_descriptor_set_ptr_is_valid(&dst_ptr)) {
+ VkDescriptorType src_type =
+ dzn_descriptor_set_get_desc_vk_type(src_set, &src_ptr);
+ VkDescriptorType dst_type =
+ dzn_descriptor_set_get_desc_vk_type(dst_set, &dst_ptr);
+
+ assert(copied_count < pDescriptorCopy->descriptorCount);
+ assert(src_type == dst_type);
+ uint32_t count =
+ MIN2(dzn_descriptor_set_remaining_descs_in_binding(src_set, &src_ptr),
+ dzn_descriptor_set_remaining_descs_in_binding(dst_set, &dst_ptr));
+
+ if (src_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+ src_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
+ uint32_t src_idx =
+ dzn_descriptor_set_get_dynamic_buffer_idx(src_set, &src_ptr);
+ uint32_t dst_idx =
+ dzn_descriptor_set_get_dynamic_buffer_idx(dst_set, &dst_ptr);
+
+ memcpy(&dst_set->dynamic_buffers[dst_idx],
+ &src_set->dynamic_buffers[src_idx],
+ sizeof(*dst_set->dynamic_buffers) * count);
+ } else {
+ dzn_foreach_pool_type(type) {
+ uint32_t src_heap_offset =
+ dzn_descriptor_set_get_heap_offset(src_set, type, &src_ptr, false);
+ uint32_t dst_heap_offset =
+ dzn_descriptor_set_get_heap_offset(dst_set, type, &dst_ptr, false);
+
+ if (src_heap_offset == ~0) {
+ assert(dst_heap_offset == ~0);
+ continue;
+ }
+
+ mtx_lock(&src_set->pool->defragment_lock);
+ mtx_lock(&dst_set->pool->defragment_lock);
+ dzn_descriptor_heap_copy(&dst_set->pool->heaps[type],
+ dst_set->heap_offsets[type] + dst_heap_offset,
+ &src_set->pool->heaps[type],
+ src_set->heap_offsets[type] + src_heap_offset,
+ count);
+
+ if (dzn_descriptor_type_depends_on_shader_usage(src_type)) {
+ src_heap_offset =
+ dzn_descriptor_set_get_heap_offset(src_set, type, &src_ptr, true);
+ dst_heap_offset =
+ dzn_descriptor_set_get_heap_offset(dst_set, type, &dst_ptr, true);
+ assert(src_heap_offset != ~0);
+ assert(dst_heap_offset != ~0);
+ dzn_descriptor_heap_copy(&dst_set->pool->heaps[type],
+ dst_set->heap_offsets[type] + dst_heap_offset,
+ &src_set->pool->heaps[type],
+ src_set->heap_offsets[type] + src_heap_offset,
+ count);
+ }
+ mtx_unlock(&dst_set->pool->defragment_lock);
+ mtx_unlock(&src_set->pool->defragment_lock);
+ }
+ }
+
+ dzn_descriptor_set_ptr_move(src_set, &src_ptr, count);
+ dzn_descriptor_set_ptr_move(dst_set, &dst_ptr, count);
+ copied_count += count;
+ }
+
+ assert(copied_count == pDescriptorCopy->descriptorCount);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_UpdateDescriptorSets(VkDevice _device,
+ uint32_t descriptorWriteCount,
+ const VkWriteDescriptorSet *pDescriptorWrites,
+ uint32_t descriptorCopyCount,
+ const VkCopyDescriptorSet *pDescriptorCopies)
+{
+ VK_FROM_HANDLE(dzn_device, dev, _device);
+
+ for (unsigned i = 0; i < descriptorWriteCount; i++)
+ dzn_descriptor_set_write(&pDescriptorWrites[i]);
+
+ for (unsigned i = 0; i < descriptorCopyCount; i++)
+ dzn_descriptor_set_copy(&pDescriptorCopies[i]);
+}
diff --git a/src/microsoft/vulkan/dzn_device.cpp b/src/microsoft/vulkan/dzn_device.cpp
new file mode 100644
index 00000000000..f1722261612
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_device.cpp
@@ -0,0 +1,2632 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include "vk_alloc.h"
+#include "vk_common_entrypoints.h"
+#include "vk_cmd_enqueue_entrypoints.h"
+#include "vk_debug_report.h"
+#include "vk_format.h"
+#include "vk_sync_dummy.h"
+#include "vk_util.h"
+
+#include "util/debug.h"
+#include "util/macros.h"
+
+#include "glsl_types.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <windows.h>
+#include <directx/d3d12sdklayers.h>
+
+#if defined(VK_USE_PLATFORM_WIN32_KHR) || \
+ defined(VK_USE_PLATFORM_DISPLAY_KHR)
+#define DZN_USE_WSI_PLATFORM
+#endif
+
+#define DZN_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
+
+static const vk_instance_extension_table instance_extensions = {
+ .KHR_get_physical_device_properties2 = true,
+#ifdef DZN_USE_WSI_PLATFORM
+ .KHR_surface = true,
+#endif
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+ .KHR_win32_surface = true,
+#endif
+#ifdef VK_USE_PLATFORM_DISPLAY_KHR
+ .KHR_display = true,
+ .KHR_get_display_properties2 = true,
+ .EXT_direct_mode_display = true,
+ .EXT_display_surface_counter = true,
+#endif
+ .EXT_debug_report = true,
+ .EXT_debug_utils = true,
+};
+
+static void
+dzn_physical_device_get_extensions(dzn_physical_device *pdev)
+{
+ pdev->vk.supported_extensions = vk_device_extension_table {
+#ifdef DZN_USE_WSI_PLATFORM
+ .KHR_swapchain = true,
+#endif
+ };
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_EnumerateInstanceExtensionProperties(const char *pLayerName,
+ uint32_t *pPropertyCount,
+ VkExtensionProperties *pProperties)
+{
+ /* We don't support any layers */
+ if (pLayerName)
+ return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+
+ return vk_enumerate_instance_extension_properties(
+ &instance_extensions, pPropertyCount, pProperties);
+}
+
+static const struct debug_control dzn_debug_options[] = {
+ { "sync", DZN_DEBUG_SYNC },
+ { "nir", DZN_DEBUG_NIR },
+ { "dxil", DZN_DEBUG_DXIL },
+ { "warp", DZN_DEBUG_WARP },
+ { "internal", DZN_DEBUG_INTERNAL },
+ { "signature", DZN_DEBUG_SIG },
+ { "gbv", DZN_DEBUG_GBV },
+ { NULL, 0 }
+};
+
+static void
+dzn_physical_device_destroy(dzn_physical_device *pdev)
+{
+ dzn_instance *instance = container_of(pdev->vk.instance, dzn_instance, vk);
+
+ list_del(&pdev->link);
+
+ if (pdev->dev)
+ pdev->dev->Release();
+
+ if (pdev->adapter)
+ pdev->adapter->Release();
+
+ dzn_wsi_finish(pdev);
+ vk_physical_device_finish(&pdev->vk);
+ vk_free(&instance->vk.alloc, pdev);
+}
+
+static void
+dzn_instance_destroy(dzn_instance *instance, const VkAllocationCallbacks *alloc)
+{
+ if (!instance)
+ return;
+
+ if (instance->dxc.validator)
+ instance->dxc.validator->Release();
+
+ if (instance->dxc.library)
+ instance->dxc.library->Release();
+
+ if (instance->dxc.compiler)
+ instance->dxc.compiler->Release();
+
+ list_for_each_entry_safe(dzn_physical_device, pdev,
+ &instance->physical_devices, link) {
+ dzn_physical_device_destroy(pdev);
+ }
+
+ vk_instance_finish(&instance->vk);
+ vk_free2(vk_default_allocator(), alloc, instance);
+}
+
+static VkResult
+dzn_instance_create(const VkInstanceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkInstance *out)
+{
+ dzn_instance *instance = (dzn_instance *)
+ vk_zalloc2(vk_default_allocator(), pAllocator, sizeof(*instance), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+ if (!instance)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_instance_dispatch_table dispatch_table;
+ vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
+ &dzn_instance_entrypoints,
+ true);
+
+ VkResult result =
+ vk_instance_init(&instance->vk, &instance_extensions,
+ &dispatch_table, pCreateInfo,
+ pAllocator ? pAllocator : vk_default_allocator());
+ if (result != VK_SUCCESS) {
+ vk_free2(vk_default_allocator(), pAllocator, instance);
+ return result;
+ }
+
+ list_inithead(&instance->physical_devices);
+ instance->physical_devices_enumerated = false;
+ instance->debug_flags =
+ parse_debug_string(getenv("DZN_DEBUG"), dzn_debug_options);
+
+ instance->dxc.validator = dxil_get_validator();
+ instance->dxc.library = dxc_get_library();
+ instance->dxc.compiler = dxc_get_compiler();
+ instance->d3d12.serialize_root_sig = d3d12_get_serialize_root_sig();
+
+ if (!instance->dxc.validator ||
+ !instance->dxc.library ||
+ !instance->dxc.compiler ||
+ !instance->d3d12.serialize_root_sig) {
+ dzn_instance_destroy(instance, pAllocator);
+ return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
+ }
+
+ d3d12_enable_debug_layer();
+ if (instance->debug_flags & DZN_DEBUG_GBV)
+ d3d12_enable_gpu_validation();
+
+ *out = dzn_instance_to_handle(instance);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkInstance *pInstance)
+{
+ return dzn_instance_create(pCreateInfo, pAllocator, pInstance);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyInstance(VkInstance instance,
+ const VkAllocationCallbacks *pAllocator)
+{
+ dzn_instance_destroy(dzn_instance_from_handle(instance), pAllocator);
+}
+
+static VkResult
+dzn_physical_device_create(dzn_instance *instance,
+ IDXGIAdapter1 *adapter,
+ const DXGI_ADAPTER_DESC1 *adapter_desc)
+{
+ dzn_physical_device *pdev = (dzn_physical_device *)
+ vk_zalloc(&instance->vk.alloc, sizeof(*pdev), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+
+ if (!pdev)
+ return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_physical_device_dispatch_table dispatch_table;
+ vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &dzn_physical_device_entrypoints,
+ true);
+ vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &wsi_physical_device_entrypoints,
+ false);
+
+ VkResult result =
+ vk_physical_device_init(&pdev->vk, &instance->vk,
+ NULL, /* We set up extensions later */
+ &dispatch_table);
+ if (result != VK_SUCCESS) {
+ vk_free(&instance->vk.alloc, pdev);
+ return result;
+ }
+
+ mtx_init(&pdev->dev_lock, mtx_plain);
+ pdev->adapter_desc = *adapter_desc;
+ pdev->adapter = adapter;
+ adapter->AddRef();
+ list_addtail(&pdev->link, &instance->physical_devices);
+
+ vk_warn_non_conformant_implementation("dzn");
+
+ /* TODO: correct UUIDs */
+ memset(pdev->pipeline_cache_uuid, 0, VK_UUID_SIZE);
+ memset(pdev->driver_uuid, 0, VK_UUID_SIZE);
+ memset(pdev->device_uuid, 0, VK_UUID_SIZE);
+
+ /* TODO: something something queue families */
+
+ result = dzn_wsi_init(pdev);
+ if (result != VK_SUCCESS) {
+ dzn_physical_device_destroy(pdev);
+ return result;
+ }
+
+ dzn_physical_device_get_extensions(pdev);
+
+ uint32_t num_sync_types = 0;
+ pdev->sync_types[num_sync_types++] = &dzn_sync_type;
+ pdev->sync_types[num_sync_types++] = &vk_sync_dummy_type;
+ pdev->sync_types[num_sync_types] = NULL;
+ assert(num_sync_types <= MAX_SYNC_TYPES);
+ pdev->vk.supported_sync_types = pdev->sync_types;
+
+ return VK_SUCCESS;
+}
+
+static void
+dzn_physical_device_cache_caps(dzn_physical_device *pdev)
+{
+ D3D_FEATURE_LEVEL checklist[] = {
+ D3D_FEATURE_LEVEL_11_0,
+ D3D_FEATURE_LEVEL_11_1,
+ D3D_FEATURE_LEVEL_12_0,
+ D3D_FEATURE_LEVEL_12_1,
+ D3D_FEATURE_LEVEL_12_2,
+ };
+
+ D3D12_FEATURE_DATA_FEATURE_LEVELS levels = {
+ .NumFeatureLevels = ARRAY_SIZE(checklist),
+ .pFeatureLevelsRequested = checklist,
+ };
+
+ pdev->dev->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &levels, sizeof(levels));
+ pdev->feature_level = levels.MaxSupportedFeatureLevel;
+
+ pdev->dev->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE1, &pdev->architecture, sizeof(pdev->architecture));
+ pdev->dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &pdev->options, sizeof(pdev->options));
+
+ pdev->queue_families[pdev->queue_family_count++] = {
+ .props = {
+ .queueFlags = VK_QUEUE_GRAPHICS_BIT |
+ VK_QUEUE_COMPUTE_BIT |
+ VK_QUEUE_TRANSFER_BIT,
+ .queueCount = 1,
+ .timestampValidBits = 64,
+ .minImageTransferGranularity = { 0, 0, 0 },
+ },
+ .desc = {
+ .Type = D3D12_COMMAND_LIST_TYPE_DIRECT,
+ },
+ };
+
+ pdev->queue_families[pdev->queue_family_count++] = {
+ .props = {
+ .queueFlags = VK_QUEUE_COMPUTE_BIT |
+ VK_QUEUE_TRANSFER_BIT,
+ .queueCount = 8,
+ .timestampValidBits = 64,
+ .minImageTransferGranularity = { 0, 0, 0 },
+ },
+ .desc = {
+ .Type = D3D12_COMMAND_LIST_TYPE_COMPUTE,
+ },
+ };
+
+ pdev->queue_families[pdev->queue_family_count++] = {
+ .props = {
+ .queueFlags = VK_QUEUE_TRANSFER_BIT,
+ .queueCount = 1,
+ .timestampValidBits = 0,
+ .minImageTransferGranularity = { 0, 0, 0 },
+ },
+ .desc = {
+ .Type = D3D12_COMMAND_LIST_TYPE_COPY,
+ },
+ };
+
+ assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
+
+ D3D12_COMMAND_QUEUE_DESC queue_desc = {
+ .Type = D3D12_COMMAND_LIST_TYPE_DIRECT,
+ .Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL,
+ .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE,
+ .NodeMask = 0,
+ };
+
+ ComPtr<ID3D12CommandQueue> cmdqueue;
+
+ pdev->dev->CreateCommandQueue(&queue_desc,
+ IID_PPV_ARGS(&cmdqueue));
+
+ uint64_t ts_freq;
+ cmdqueue->GetTimestampFrequency(&ts_freq);
+ pdev->timestamp_period = 1000000000.0f / ts_freq;
+}
+
+static void
+dzn_physical_device_init_memory(dzn_physical_device *pdev)
+{
+ VkPhysicalDeviceMemoryProperties *mem = &pdev->memory;
+ const DXGI_ADAPTER_DESC1 *desc = &pdev->adapter_desc;
+
+ mem->memoryHeapCount = 1;
+ mem->memoryHeaps[0] = VkMemoryHeap {
+ .size = desc->SharedSystemMemory,
+ .flags = 0,
+ };
+
+ mem->memoryTypes[mem->memoryTypeCount++] = VkMemoryType {
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ .heapIndex = 0,
+ };
+ mem->memoryTypes[mem->memoryTypeCount++] = VkMemoryType {
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ .heapIndex = 0,
+ };
+
+ if (!pdev->architecture.UMA) {
+ mem->memoryHeaps[mem->memoryHeapCount++] = VkMemoryHeap {
+ .size = desc->DedicatedVideoMemory,
+ .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+ };
+ mem->memoryTypes[mem->memoryTypeCount++] = VkMemoryType {
+ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ .heapIndex = mem->memoryHeapCount - 1,
+ };
+ } else {
+ mem->memoryHeaps[0].flags |= VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
+ mem->memoryTypes[0].propertyFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ mem->memoryTypes[1].propertyFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+
+ constexpr unsigned MaxTier2MemoryTypes = 3;
+ assert(mem->memoryTypeCount <= MaxTier2MemoryTypes);
+
+ if (pdev->options.ResourceHeapTier == D3D12_RESOURCE_HEAP_TIER_1) {
+ unsigned oldMemoryTypeCount = mem->memoryTypeCount;
+ VkMemoryType oldMemoryTypes[MaxTier2MemoryTypes];
+
+ memcpy(oldMemoryTypes, mem->memoryTypes, oldMemoryTypeCount * sizeof(VkMemoryType));
+
+ mem->memoryTypeCount = 0;
+ for (unsigned oldMemoryTypeIdx = 0; oldMemoryTypeIdx < oldMemoryTypeCount; ++oldMemoryTypeIdx) {
+ D3D12_HEAP_FLAGS flags[] = {
+ D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
+ D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES,
+ /* Note: Vulkan requires *all* images to come from the same memory type as long as
+ * the tiling property (and a few other misc properties) are the same. So, this
+ * non-RT/DS texture flag will only be used for TILING_LINEAR textures, which
+ * can't be render targets.
+ */
+ D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES
+ };
+ for (D3D12_HEAP_FLAGS flag : flags) {
+ pdev->heap_flags_for_mem_type[mem->memoryTypeCount] = flag;
+ mem->memoryTypes[mem->memoryTypeCount] = oldMemoryTypes[oldMemoryTypeIdx];
+ mem->memoryTypeCount++;
+ }
+ }
+ }
+}
+
+static D3D12_HEAP_FLAGS
+dzn_physical_device_get_heap_flags_for_mem_type(const dzn_physical_device *pdev,
+ uint32_t mem_type)
+{
+ return pdev->heap_flags_for_mem_type[mem_type];
+}
+
+uint32_t
+dzn_physical_device_get_mem_type_mask_for_resource(const dzn_physical_device *pdev,
+ const D3D12_RESOURCE_DESC *desc)
+{
+ if (pdev->options.ResourceHeapTier > D3D12_RESOURCE_HEAP_TIER_1)
+ return (1u << pdev->memory.memoryTypeCount) - 1;
+
+ D3D12_HEAP_FLAGS deny_flag;
+ if (desc->Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
+ deny_flag = D3D12_HEAP_FLAG_DENY_BUFFERS;
+ else if (desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))
+ deny_flag = D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES;
+ else
+ deny_flag = D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES;
+
+ uint32_t mask = 0;
+ for (unsigned i = 0; i < pdev->memory.memoryTypeCount; ++i) {
+ if ((pdev->heap_flags_for_mem_type[i] & deny_flag) == D3D12_HEAP_FLAG_NONE)
+ mask |= (1 << i);
+ }
+ return mask;
+}
+
+static uint32_t
+dzn_physical_device_get_max_mip_levels(bool is_3d)
+{
+ return is_3d ? 11 : 14;
+}
+
+static uint32_t
+dzn_physical_device_get_max_extent(bool is_3d)
+{
+ uint32_t max_mip = dzn_physical_device_get_max_mip_levels(is_3d);
+
+ return 1 << max_mip;
+}
+
+static uint32_t
+dzn_physical_device_get_max_array_layers()
+{
+ return dzn_physical_device_get_max_extent(false);
+}
+
+static ID3D12Device1 *
+dzn_physical_device_get_d3d12_dev(dzn_physical_device *pdev)
+{
+ dzn_instance *instance = container_of(pdev->vk.instance, dzn_instance, vk);
+
+ mtx_lock(&pdev->dev_lock);
+ if (!pdev->dev) {
+ pdev->dev = d3d12_create_device(pdev->adapter, instance->dxc.validator == nullptr);
+
+ dzn_physical_device_cache_caps(pdev);
+ dzn_physical_device_init_memory(pdev);
+ }
+ mtx_unlock(&pdev->dev_lock);
+
+ return pdev->dev;
+}
+
+D3D12_FEATURE_DATA_FORMAT_SUPPORT
+dzn_physical_device_get_format_support(dzn_physical_device *pdev,
+ VkFormat format)
+{
+ VkImageUsageFlags usage =
+ vk_format_is_depth_or_stencil(format) ?
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : 0;
+ VkImageAspectFlags aspects = 0;
+
+ if (vk_format_has_depth(format))
+ aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
+ if (vk_format_has_stencil(format))
+ aspects = VK_IMAGE_ASPECT_STENCIL_BIT;
+
+ D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = {
+ .Format = dzn_image_get_dxgi_format(format, usage, aspects),
+ };
+
+ ID3D12Device *dev = dzn_physical_device_get_d3d12_dev(pdev);
+ HRESULT hres =
+ dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT,
+ &dfmt_info, sizeof(dfmt_info));
+ assert(!FAILED(hres));
+
+ if (usage != VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)
+ return dfmt_info;
+
+ /* Depth/stencil resources have different format when they're accessed
+ * as textures, query the capabilities for this format too.
+ */
+ dzn_foreach_aspect(aspect, aspects) {
+ D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info2 = {
+ .Format = dzn_image_get_dxgi_format(format, 0, aspect),
+ };
+
+ hres = dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT,
+ &dfmt_info2, sizeof(dfmt_info2));
+ assert(!FAILED(hres));
+
+#define DS_SRV_FORMAT_SUPPORT1_MASK \
+ (D3D12_FORMAT_SUPPORT1_SHADER_LOAD | \
+ D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE | \
+ D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON | \
+ D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_MONO_TEXT | \
+ D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE | \
+ D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD | \
+ D3D12_FORMAT_SUPPORT1_SHADER_GATHER | \
+ D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW | \
+ D3D12_FORMAT_SUPPORT1_SHADER_GATHER_COMPARISON)
+
+ dfmt_info.Support1 |= dfmt_info2.Support1 & DS_SRV_FORMAT_SUPPORT1_MASK;
+ dfmt_info.Support2 |= dfmt_info2.Support2;
+ }
+
+ return dfmt_info;
+}
+
+void
+dzn_physical_device_get_format_properties(dzn_physical_device *pdev,
+ VkFormat format,
+ VkFormatProperties2 *properties)
+{
+ D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info =
+ dzn_physical_device_get_format_support(pdev, format);
+ VkFormatProperties *base_props = &properties->formatProperties;
+
+ vk_foreach_struct(ext, properties->pNext) {
+ dzn_debug_ignored_stype(ext->sType);
+ }
+
+ if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN) {
+ *base_props = VkFormatProperties { };
+ return;
+ }
+
+ ID3D12Device *dev = dzn_physical_device_get_d3d12_dev(pdev);
+
+ *base_props = VkFormatProperties {
+ .linearTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT,
+ .optimalTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT,
+ .bufferFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT,
+ };
+
+ if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER)
+ base_props->bufferFeatures |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
+
+#define TEX_FLAGS (D3D12_FORMAT_SUPPORT1_TEXTURE1D | \
+ D3D12_FORMAT_SUPPORT1_TEXTURE2D | \
+ D3D12_FORMAT_SUPPORT1_TEXTURE3D | \
+ D3D12_FORMAT_SUPPORT1_TEXTURECUBE)
+ if (dfmt_info.Support1 & TEX_FLAGS) {
+ base_props->optimalTilingFeatures |=
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+ }
+
+ if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) {
+ base_props->optimalTilingFeatures |=
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+ }
+
+ if ((dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) &&
+ (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW)) {
+ base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
+ base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
+ }
+
+#define ATOMIC_FLAGS (D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD | \
+ D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS | \
+ D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE | \
+ D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE | \
+ D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX | \
+ D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX)
+ if ((dfmt_info.Support2 & ATOMIC_FLAGS) == ATOMIC_FLAGS) {
+ base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
+ base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
+ }
+
+ if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD)
+ base_props->bufferFeatures |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
+
+ /* Color/depth/stencil attachment cap implies input attachement cap, and input
+ * attachment loads are lowered to texture loads in dozen, hence the requirement
+ * to have shader-load support.
+ */
+ if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) {
+ if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) {
+ base_props->optimalTilingFeatures |=
+ VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ }
+
+ if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BLENDABLE)
+ base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
+
+ if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) {
+ base_props->optimalTilingFeatures |=
+ VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ }
+ }
+
+ /* B4G4R4A4 support is required, but d3d12 doesn't support it. We map this
+ * format to R4G4B4A4 and adjust the SRV component-mapping to fake
+ * B4G4R4A4, but that forces us to limit the usage to sampling, which,
+ * luckily, is exactly what we need to support the required features.
+ */
+ if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
+ VkFormatFeatureFlags bgra4_req_features =
+ VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
+ VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
+ VK_FORMAT_FEATURE_BLIT_SRC_BIT |
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+ base_props->optimalTilingFeatures &= bgra4_req_features;
+ base_props->bufferFeatures =
+ VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
+ }
+}
+
+VkResult
+dzn_physical_device_get_image_format_properties(dzn_physical_device *pdev,
+ const VkPhysicalDeviceImageFormatInfo2 *info,
+ VkImageFormatProperties2 *properties)
+{
+ const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
+ VkExternalImageFormatProperties *external_props = NULL;
+
+ *properties = VkImageFormatProperties2 {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
+ };
+
+ /* Extract input structs */
+ vk_foreach_struct_const(s, info->pNext) {
+ switch (s->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
+ external_info = (const VkPhysicalDeviceExternalImageFormatInfo *)s;
+ break;
+ default:
+ dzn_debug_ignored_stype(s->sType);
+ break;
+ }
+ }
+
+ assert(info->tiling == VK_IMAGE_TILING_OPTIMAL || info->tiling == VK_IMAGE_TILING_LINEAR);
+
+ /* Extract output structs */
+ vk_foreach_struct(s, properties->pNext) {
+ switch (s->sType) {
+ case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
+ external_props = (VkExternalImageFormatProperties *)s;
+ break;
+ default:
+ dzn_debug_ignored_stype(s->sType);
+ break;
+ }
+ }
+
+ assert((external_props != NULL) == (external_info != NULL));
+
+ /* TODO: support image import */
+ if (external_info && external_info->handleType != 0)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if (info->tiling != VK_IMAGE_TILING_OPTIMAL &&
+ (info->usage & ~(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if (info->tiling != VK_IMAGE_TILING_OPTIMAL &&
+ vk_format_is_depth_or_stencil(info->format))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info =
+ dzn_physical_device_get_format_support(pdev, info->format);
+ if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ bool is_bgra4 = info->format == VK_FORMAT_B4G4R4A4_UNORM_PACK16;
+ ID3D12Device *dev = dzn_physical_device_get_d3d12_dev(pdev);
+
+ if ((info->type == VK_IMAGE_TYPE_1D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE1D)) ||
+ (info->type == VK_IMAGE_TYPE_2D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D)) ||
+ (info->type == VK_IMAGE_TYPE_3D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE3D)) ||
+ ((info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
+ !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURECUBE)))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if ((info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) &&
+ !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if ((info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) &&
+ (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) || is_bgra4))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if ((info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
+ (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) || is_bgra4))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if ((info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
+ (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) || is_bgra4))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if ((info->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
+ (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) || is_bgra4))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ bool is_3d = info->type == VK_IMAGE_TYPE_3D;
+ uint32_t max_extent = dzn_physical_device_get_max_extent(is_3d);
+
+ if (info->tiling == VK_IMAGE_TILING_OPTIMAL &&
+ dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_MIP)
+ properties->imageFormatProperties.maxMipLevels = dzn_physical_device_get_max_mip_levels(is_3d);
+ else
+ properties->imageFormatProperties.maxMipLevels = 1;
+
+ if (info->tiling == VK_IMAGE_TILING_OPTIMAL)
+ properties->imageFormatProperties.maxArrayLayers = dzn_physical_device_get_max_array_layers();
+ else
+ properties->imageFormatProperties.maxArrayLayers = 1;
+
+ switch (info->type) {
+ case VK_IMAGE_TYPE_1D:
+ properties->imageFormatProperties.maxExtent.width = max_extent;
+ properties->imageFormatProperties.maxExtent.height = 1;
+ properties->imageFormatProperties.maxExtent.depth = 1;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ properties->imageFormatProperties.maxExtent.width = max_extent;
+ properties->imageFormatProperties.maxExtent.height = max_extent;
+ properties->imageFormatProperties.maxExtent.depth = 1;
+ break;
+ case VK_IMAGE_TYPE_3D:
+ if (info->tiling != VK_IMAGE_TILING_OPTIMAL)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ properties->imageFormatProperties.maxExtent.width = max_extent;
+ properties->imageFormatProperties.maxExtent.height = max_extent;
+ properties->imageFormatProperties.maxExtent.depth = max_extent;
+ break;
+ default:
+ unreachable("bad VkImageType");
+ }
+
+ /* From the Vulkan 1.0 spec, section 34.1.1. Supported Sample Counts:
+ *
+ * sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of the
+ * following conditions is true:
+ *
+ * - tiling is VK_IMAGE_TILING_LINEAR
+ * - type is not VK_IMAGE_TYPE_2D
+ * - flags contains VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT
+ * - neither the VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT flag nor the
+ * VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT flag in
+ * VkFormatProperties::optimalTilingFeatures returned by
+ * vkGetPhysicalDeviceFormatProperties is set.
+ *
+ * D3D12 has a few more constraints:
+ * - no UAVs on multisample resources
+ */
+ bool rt_or_ds_cap =
+ dfmt_info.Support1 &
+ (D3D12_FORMAT_SUPPORT1_RENDER_TARGET | D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL);
+
+ properties->imageFormatProperties.sampleCounts = VK_SAMPLE_COUNT_1_BIT;
+ if (info->tiling != VK_IMAGE_TILING_LINEAR &&
+ info->type == VK_IMAGE_TYPE_2D &&
+ !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
+ rt_or_ds_cap &&
+ !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
+ for (uint32_t s = VK_SAMPLE_COUNT_2_BIT; s < VK_SAMPLE_COUNT_64_BIT; s <<= 1) {
+ D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS ms_info = {
+ .Format = dfmt_info.Format,
+ .SampleCount = s,
+ };
+
+ HRESULT hres =
+ dev->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS,
+ &ms_info, sizeof(ms_info));
+ if (!FAILED(hres) && ms_info.NumQualityLevels > 0)
+ properties->imageFormatProperties.sampleCounts |= s;
+ }
+ }
+
+ /* TODO: set correct value here */
+ properties->imageFormatProperties.maxResourceSize = UINT32_MAX;
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
+ VkFormat format,
+ VkFormatProperties2 *pFormatProperties)
+{
+ VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice);
+
+ dzn_physical_device_get_format_properties(pdev, format, pFormatProperties);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceImageFormatInfo2 *info,
+ VkImageFormatProperties2 *props)
+{
+ VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice);
+
+ return dzn_physical_device_get_image_format_properties(pdev, info, props);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice,
+ VkFormat format,
+ VkImageType type,
+ VkImageTiling tiling,
+ VkImageUsageFlags usage,
+ VkImageCreateFlags createFlags,
+ VkImageFormatProperties *pImageFormatProperties)
+{
+ const VkPhysicalDeviceImageFormatInfo2 info = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+ .format = format,
+ .type = type,
+ .tiling = tiling,
+ .usage = usage,
+ .flags = createFlags,
+ };
+
+ VkImageFormatProperties2 props = {};
+
+ VkResult result =
+ dzn_GetPhysicalDeviceImageFormatProperties2(physicalDevice, &info, &props);
+ *pImageFormatProperties = props.imageFormatProperties;
+
+ return result;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice,
+ VkFormat format,
+ VkImageType type,
+ VkSampleCountFlagBits samples,
+ VkImageUsageFlags usage,
+ VkImageTiling tiling,
+ uint32_t *pPropertyCount,
+ VkSparseImageFormatProperties *pProperties)
+{
+ *pPropertyCount = 0;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
+ uint32_t *pPropertyCount,
+ VkSparseImageFormatProperties2 *pProperties)
+{
+ *pPropertyCount = 0;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
+ VkExternalBufferProperties *pExternalBufferProperties)
+{
+ pExternalBufferProperties->externalMemoryProperties =
+ VkExternalMemoryProperties {
+ .compatibleHandleTypes = (VkExternalMemoryHandleTypeFlags)pExternalBufferInfo->handleType,
+ };
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_EnumeratePhysicalDevices(VkInstance inst,
+ uint32_t *pPhysicalDeviceCount,
+ VkPhysicalDevice *pPhysicalDevices)
+{
+ VK_FROM_HANDLE(dzn_instance, instance, inst);
+
+ if (!instance->physical_devices_enumerated) {
+ ComPtr<IDXGIFactory4> factory = dxgi_get_factory(false);
+ ComPtr<IDXGIAdapter1> adapter(NULL);
+ for (UINT i = 0; SUCCEEDED(factory->EnumAdapters1(i, &adapter)); ++i) {
+ DXGI_ADAPTER_DESC1 desc;
+ adapter->GetDesc1(&desc);
+ if (instance->debug_flags & DZN_DEBUG_WARP) {
+ if ((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) == 0)
+ continue;
+ }
+
+ VkResult result =
+ dzn_physical_device_create(instance, adapter.Get(), &desc);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+ }
+
+ VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices,
+ pPhysicalDeviceCount);
+
+ list_for_each_entry(dzn_physical_device, pdev, &instance->physical_devices, link) {
+ vk_outarray_append_typed(VkPhysicalDevice, &out, i)
+ *i = dzn_physical_device_to_handle(pdev);
+ }
+
+ instance->physical_devices_enumerated = true;
+ return vk_outarray_status(&out);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_EnumerateInstanceVersion(uint32_t *pApiVersion)
+{
+ *pApiVersion = DZN_API_VERSION;
+ return VK_SUCCESS;
+}
+
+bool
+dzn_physical_device_supports_compressed_format(dzn_physical_device *pdev,
+ const VkFormat *formats,
+ uint32_t format_count)
+{
+#define REQUIRED_COMPRESSED_CAPS \
+ (VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | \
+ VK_FORMAT_FEATURE_BLIT_SRC_BIT | \
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)
+ for (uint32_t i = 0; i < format_count; i++) {
+ VkFormatProperties2 props = {};
+ dzn_physical_device_get_format_properties(pdev, formats[i], &props);
+ if ((props.formatProperties.optimalTilingFeatures & REQUIRED_COMPRESSED_CAPS) != REQUIRED_COMPRESSED_CAPS)
+ return false;
+ }
+
+ return true;
+}
+
+bool
+dzn_physical_device_supports_bc(dzn_physical_device *pdev)
+{
+ static const VkFormat formats[] = {
+ VK_FORMAT_BC1_RGB_UNORM_BLOCK,
+ VK_FORMAT_BC1_RGB_SRGB_BLOCK,
+ VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
+ VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
+ VK_FORMAT_BC2_UNORM_BLOCK,
+ VK_FORMAT_BC2_SRGB_BLOCK,
+ VK_FORMAT_BC3_UNORM_BLOCK,
+ VK_FORMAT_BC3_SRGB_BLOCK,
+ VK_FORMAT_BC4_UNORM_BLOCK,
+ VK_FORMAT_BC4_SNORM_BLOCK,
+ VK_FORMAT_BC5_UNORM_BLOCK,
+ VK_FORMAT_BC5_SNORM_BLOCK,
+ VK_FORMAT_BC6H_UFLOAT_BLOCK,
+ VK_FORMAT_BC6H_SFLOAT_BLOCK,
+ VK_FORMAT_BC7_UNORM_BLOCK,
+ VK_FORMAT_BC7_SRGB_BLOCK,
+ };
+
+ return dzn_physical_device_supports_compressed_format(pdev, formats, ARRAY_SIZE(formats));
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceFeatures *pFeatures)
+{
+ VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice);
+
+ *pFeatures = VkPhysicalDeviceFeatures {
+ .robustBufferAccess = true, /* This feature is mandatory */
+ .fullDrawIndexUint32 = false,
+ .imageCubeArray = true,
+ .independentBlend = false,
+ .geometryShader = false,
+ .tessellationShader = false,
+ .sampleRateShading = false,
+ .dualSrcBlend = false,
+ .logicOp = false,
+ .multiDrawIndirect = false,
+ .drawIndirectFirstInstance = false,
+ .depthClamp = false,
+ .depthBiasClamp = false,
+ .fillModeNonSolid = false,
+ .depthBounds = false,
+ .wideLines = false,
+ .largePoints = false,
+ .alphaToOne = false,
+ .multiViewport = false,
+ .samplerAnisotropy = false,
+ .textureCompressionETC2 = false,
+ .textureCompressionASTC_LDR = false,
+ .textureCompressionBC = dzn_physical_device_supports_bc(pdev),
+ .occlusionQueryPrecise = true,
+ .pipelineStatisticsQuery = true,
+ .vertexPipelineStoresAndAtomics = true,
+ .fragmentStoresAndAtomics = true,
+ .shaderTessellationAndGeometryPointSize = false,
+ .shaderImageGatherExtended = false,
+ .shaderStorageImageExtendedFormats = false,
+ .shaderStorageImageMultisample = false,
+ .shaderStorageImageReadWithoutFormat = false,
+ .shaderStorageImageWriteWithoutFormat = false,
+ .shaderUniformBufferArrayDynamicIndexing = false,
+ .shaderSampledImageArrayDynamicIndexing = false,
+ .shaderStorageBufferArrayDynamicIndexing = false,
+ .shaderStorageImageArrayDynamicIndexing = false,
+ .shaderClipDistance = false,
+ .shaderCullDistance = false,
+ .shaderFloat64 = false,
+ .shaderInt64 = false,
+ .shaderInt16 = false,
+ .shaderResourceResidency = false,
+ .shaderResourceMinLod = false,
+ .sparseBinding = false,
+ .sparseResidencyBuffer = false,
+ .sparseResidencyImage2D = false,
+ .sparseResidencyImage3D = false,
+ .sparseResidency2Samples = false,
+ .sparseResidency4Samples = false,
+ .sparseResidency8Samples = false,
+ .sparseResidency16Samples = false,
+ .sparseResidencyAliased = false,
+ .variableMultisampleRate = false,
+ .inheritedQueries = false,
+ };
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceFeatures2 *pFeatures)
+{
+ dzn_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
+
+ vk_foreach_struct(ext, pFeatures->pNext) {
+ dzn_debug_ignored_stype(ext->sType);
+ }
+}
+
+
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+dzn_GetInstanceProcAddr(VkInstance _instance,
+ const char *pName)
+{
+ VK_FROM_HANDLE(dzn_instance, instance, _instance);
+ return vk_instance_get_proc_addr(&instance->vk,
+ &dzn_instance_entrypoints,
+ pName);
+}
+
+/* Windows will use a dll definition file to avoid build errors. */
+#ifdef _WIN32
+#undef PUBLIC
+#define PUBLIC
+#endif
+
+/* With version 1+ of the loader interface the ICD should expose
+ * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
+ */
+PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetInstanceProcAddr(VkInstance instance,
+ const char *pName);
+
+PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetInstanceProcAddr(VkInstance instance,
+ const char *pName)
+{
+ return dzn_GetInstanceProcAddr(instance, pName);
+}
+
+/* With version 4+ of the loader interface the ICD should expose
+ * vk_icdGetPhysicalDeviceProcAddr()
+ */
+PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,
+ const char* pName);
+
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,
+ const char* pName)
+{
+ VK_FROM_HANDLE(dzn_instance, instance, _instance);
+ return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
+}
+
+/* vk_icd.h does not declare this function, so we declare it here to
+ * suppress Wmissing-prototypes.
+ */
+PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
+vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
+
+PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
+vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
+{
+ /* For the full details on loader interface versioning, see
+ * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
+ * What follows is a condensed summary, to help you navigate the large and
+ * confusing official doc.
+ *
+ * - Loader interface v0 is incompatible with later versions. We don't
+ * support it.
+ *
+ * - In loader interface v1:
+ * - The first ICD entrypoint called by the loader is
+ * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
+ * entrypoint.
+ * - The ICD must statically expose no other Vulkan symbol unless it is
+ * linked with -Bsymbolic.
+ * - Each dispatchable Vulkan handle created by the ICD must be
+ * a pointer to a struct whose first member is VK_LOADER_DATA. The
+ * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
+ * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
+ * vkDestroySurfaceKHR(). The ICD must be capable of working with
+ * such loader-managed surfaces.
+ *
+ * - Loader interface v2 differs from v1 in:
+ * - The first ICD entrypoint called by the loader is
+ * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
+ * statically expose this entrypoint.
+ *
+ * - Loader interface v3 differs from v2 in:
+ * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
+ * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
+ * because the loader no longer does so.
+ *
+ * - Loader interface v4 differs from v3 in:
+ * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
+ */
+ *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceProperties *pProperties)
+{
+ VK_FROM_HANDLE(dzn_physical_device, pdevice, physicalDevice);
+
+ /* minimum from the spec */
+ const VkSampleCountFlags supported_sample_counts =
+ VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
+
+ /* FIXME: this is mostly bunk for now */
+ VkPhysicalDeviceLimits limits = {
+
+ /* TODO: support older feature levels */
+ .maxImageDimension1D = (1 << 14),
+ .maxImageDimension2D = (1 << 14),
+ .maxImageDimension3D = (1 << 11),
+ .maxImageDimensionCube = (1 << 14),
+ .maxImageArrayLayers = (1 << 11),
+
+ /* from here on, we simply use the minimum values from the spec for now */
+ .maxTexelBufferElements = 65536,
+ .maxUniformBufferRange = 16384,
+ .maxStorageBufferRange = (1ul << 27),
+ .maxPushConstantsSize = 128,
+ .maxMemoryAllocationCount = 4096,
+ .maxSamplerAllocationCount = 4000,
+ .bufferImageGranularity = 131072,
+ .sparseAddressSpaceSize = 0,
+ .maxBoundDescriptorSets = MAX_SETS,
+ .maxPerStageDescriptorSamplers = 16,
+ .maxPerStageDescriptorUniformBuffers = 12,
+ .maxPerStageDescriptorStorageBuffers = 4,
+ .maxPerStageDescriptorSampledImages = 16,
+ .maxPerStageDescriptorStorageImages = 4,
+ .maxPerStageDescriptorInputAttachments = 4,
+ .maxPerStageResources = 128,
+ .maxDescriptorSetSamplers = 96,
+ .maxDescriptorSetUniformBuffers = 72,
+ .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
+ .maxDescriptorSetStorageBuffers = 24,
+ .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
+ .maxDescriptorSetSampledImages = 96,
+ .maxDescriptorSetStorageImages = 24,
+ .maxDescriptorSetInputAttachments = 4,
+ .maxVertexInputAttributes = 16,
+ .maxVertexInputBindings = 16,
+ .maxVertexInputAttributeOffset = 2047,
+ .maxVertexInputBindingStride = 2048,
+ .maxVertexOutputComponents = 64,
+ .maxTessellationGenerationLevel = 0,
+ .maxTessellationPatchSize = 0,
+ .maxTessellationControlPerVertexInputComponents = 0,
+ .maxTessellationControlPerVertexOutputComponents = 0,
+ .maxTessellationControlPerPatchOutputComponents = 0,
+ .maxTessellationControlTotalOutputComponents = 0,
+ .maxTessellationEvaluationInputComponents = 0,
+ .maxTessellationEvaluationOutputComponents = 0,
+ .maxGeometryShaderInvocations = 0,
+ .maxGeometryInputComponents = 0,
+ .maxGeometryOutputComponents = 0,
+ .maxGeometryOutputVertices = 0,
+ .maxGeometryTotalOutputComponents = 0,
+ .maxFragmentInputComponents = 64,
+ .maxFragmentOutputAttachments = 4,
+ .maxFragmentDualSrcAttachments = 0,
+ .maxFragmentCombinedOutputResources = 4,
+ .maxComputeSharedMemorySize = 16384,
+ .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
+ .maxComputeWorkGroupInvocations = 128,
+ .maxComputeWorkGroupSize = { 128, 128, 64 },
+ .subPixelPrecisionBits = 4,
+ .subTexelPrecisionBits = 4,
+ .mipmapPrecisionBits = 4,
+ .maxDrawIndexedIndexValue = 0x00ffffff,
+ .maxDrawIndirectCount = 1,
+ .maxSamplerLodBias = 2.0f,
+ .maxSamplerAnisotropy = 1.0f,
+ .maxViewports = 1,
+ .maxViewportDimensions = { 4096, 4096 },
+ .viewportBoundsRange = { -8192, 8191 },
+ .viewportSubPixelBits = 0,
+ .minMemoryMapAlignment = 64,
+ .minTexelBufferOffsetAlignment = 256,
+ .minUniformBufferOffsetAlignment = 256,
+ .minStorageBufferOffsetAlignment = 256,
+ .minTexelOffset = -8,
+ .maxTexelOffset = 7,
+ .minTexelGatherOffset = 0,
+ .maxTexelGatherOffset = 0,
+ .minInterpolationOffset = 0.0f,
+ .maxInterpolationOffset = 0.0f,
+ .subPixelInterpolationOffsetBits = 0,
+ .maxFramebufferWidth = 4096,
+ .maxFramebufferHeight = 4096,
+ .maxFramebufferLayers = 256,
+ .framebufferColorSampleCounts = supported_sample_counts,
+ .framebufferDepthSampleCounts = supported_sample_counts,
+ .framebufferStencilSampleCounts = supported_sample_counts,
+ .framebufferNoAttachmentsSampleCounts = supported_sample_counts,
+ .maxColorAttachments = 4,
+ .sampledImageColorSampleCounts = supported_sample_counts,
+ .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
+ .sampledImageDepthSampleCounts = supported_sample_counts,
+ .sampledImageStencilSampleCounts = supported_sample_counts,
+ .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
+ .maxSampleMaskWords = 1,
+ .timestampComputeAndGraphics = true,
+ .timestampPeriod = pdevice->timestamp_period,
+ .maxClipDistances = 8,
+ .maxCullDistances = 8,
+ .maxCombinedClipAndCullDistances = 8,
+ .discreteQueuePriorities = 2,
+ .pointSizeRange = { 1.0f, 1.0f },
+ .lineWidthRange = { 1.0f, 1.0f },
+ .pointSizeGranularity = 0.0f,
+ .lineWidthGranularity = 0.0f,
+ .strictLines = 0,
+ .standardSampleLocations = false,
+ .optimalBufferCopyOffsetAlignment = 1,
+ .optimalBufferCopyRowPitchAlignment = 1,
+ .nonCoherentAtomSize = 256,
+ };
+
+ const DXGI_ADAPTER_DESC1& desc = pdevice->adapter_desc;
+
+ VkPhysicalDeviceType devtype = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
+ if (desc.Flags == DXGI_ADAPTER_FLAG_SOFTWARE)
+ devtype = VK_PHYSICAL_DEVICE_TYPE_CPU;
+ else if (false) { // TODO: detect discreete GPUs
+ /* This is a tad tricky to get right, because we need to have the
+ * actual ID3D12Device before we can query the
+ * D3D12_FEATURE_DATA_ARCHITECTURE structure... So for now, let's
+ * just pretend everything is integrated, because... well, that's
+ * what I have at hand right now ;)
+ */
+ devtype = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
+ }
+
+ *pProperties = VkPhysicalDeviceProperties {
+ .apiVersion = DZN_API_VERSION,
+ .driverVersion = vk_get_driver_version(),
+
+ .vendorID = desc.VendorId,
+ .deviceID = desc.DeviceId,
+ .deviceType = devtype,
+
+ .limits = limits,
+ .sparseProperties = { 0 },
+ };
+
+ snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
+ "Microsoft Direct3D12 (%S)", desc.Description);
+
+ memcpy(pProperties->pipelineCacheUUID,
+ pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceProperties2 *pProperties)
+{
+ VK_FROM_HANDLE(dzn_physical_device, pdevice, physicalDevice);
+
+ dzn_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
+
+ vk_foreach_struct(ext, pProperties->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
+ VkPhysicalDeviceIDProperties *id_props =
+ (VkPhysicalDeviceIDProperties *)ext;
+ memcpy(id_props->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
+ memcpy(id_props->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
+ /* The LUID is for Windows. */
+ id_props->deviceLUIDValid = false;
+ break;
+ }
+ default:
+ dzn_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,
+ uint32_t *pQueueFamilyPropertyCount,
+ VkQueueFamilyProperties2 *pQueueFamilyProperties)
+{
+ VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice);
+ VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
+ pQueueFamilyProperties, pQueueFamilyPropertyCount);
+
+ (void)dzn_physical_device_get_d3d12_dev(pdev);
+
+ for (uint32_t i = 0; i < pdev->queue_family_count; i++) {
+ vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
+ p->queueFamilyProperties = pdev->queue_families[i].props;
+
+ vk_foreach_struct(ext, pQueueFamilyProperties->pNext) {
+ dzn_debug_ignored_stype(ext->sType);
+ }
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceMemoryProperties *pMemoryProperties)
+{
+ VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice);
+
+ // Ensure memory caps are up-to-date
+ (void)dzn_physical_device_get_d3d12_dev(pdev);
+ *pMemoryProperties = pdev->memory;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
+{
+ dzn_GetPhysicalDeviceMemoryProperties(physicalDevice,
+ &pMemoryProperties->memoryProperties);
+
+ vk_foreach_struct(ext, pMemoryProperties->pNext) {
+ dzn_debug_ignored_stype(ext->sType);
+ }
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
+ VkLayerProperties *pProperties)
+{
+ if (pProperties == NULL) {
+ *pPropertyCount = 0;
+ return VK_SUCCESS;
+ }
+
+ return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+}
+
+static VkResult
+dzn_queue_sync_wait(dzn_queue *queue, const struct vk_sync_wait *wait)
+{
+ if (wait->sync->type == &vk_sync_dummy_type)
+ return VK_SUCCESS;
+
+ dzn_device *device = container_of(queue->vk.base.device, dzn_device, vk);
+ assert(wait->sync->type == &dzn_sync_type);
+ dzn_sync *sync = container_of(wait->sync, dzn_sync, vk);
+ uint64_t value =
+ (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? wait->wait_value : 1;
+
+ assert(sync->fence != NULL);
+
+ if (value > 0 && FAILED(queue->cmdqueue->Wait(sync->fence, value)))
+ return vk_error(device, VK_ERROR_UNKNOWN);
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_queue_sync_signal(dzn_queue *queue, const struct vk_sync_signal *signal)
+{
+ if (signal->sync->type == &vk_sync_dummy_type)
+ return VK_SUCCESS;
+
+ dzn_device *device = container_of(queue->vk.base.device, dzn_device, vk);
+ assert(signal->sync->type == &dzn_sync_type);
+ dzn_sync *sync = container_of(signal->sync, dzn_sync, vk);
+ uint64_t value =
+ (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? signal->signal_value : 1;
+ assert(value > 0);
+
+ assert(sync->fence != NULL);
+
+ if (FAILED(queue->cmdqueue->Signal(sync->fence, value)))
+ return vk_error(device, VK_ERROR_UNKNOWN);
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_queue_submit(struct vk_queue *q,
+ struct vk_queue_submit *info)
+{
+ dzn_queue *queue = container_of(q, dzn_queue, vk);
+ dzn_device *device = container_of(q->base.device, dzn_device, vk);
+ VkResult result = VK_SUCCESS;
+
+ for (uint32_t i = 0; i < info->wait_count; i++) {
+ result = dzn_queue_sync_wait(queue, &info->waits[i]);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ for (uint32_t i = 0; i < info->command_buffer_count; i++) {
+ dzn_cmd_buffer *cmd_buffer =
+ container_of(info->command_buffers[i], dzn_cmd_buffer, vk);
+
+ ID3D12CommandList *cmdlists[] = { cmd_buffer->cmdlist };
+
+ util_dynarray_foreach(&cmd_buffer->events.wait, dzn_event *, evt) {
+ if (FAILED(queue->cmdqueue->Wait((*evt)->fence, 1)))
+ return vk_error(device, VK_ERROR_UNKNOWN);
+ }
+
+ util_dynarray_foreach(&cmd_buffer->queries.wait, dzn_cmd_buffer_query_range, range) {
+ mtx_lock(&range->qpool->queries_lock);
+ for (uint32_t q = range->start; q < range->start + range->count; q++) {
+ struct dzn_query *query = &range->qpool->queries[q];
+
+ if (query->fence &&
+ FAILED(queue->cmdqueue->Wait(query->fence, query->fence_value)))
+ return vk_error(device, VK_ERROR_UNKNOWN);
+ }
+ mtx_unlock(&range->qpool->queries_lock);
+ }
+
+ util_dynarray_foreach(&cmd_buffer->queries.reset, dzn_cmd_buffer_query_range, range) {
+ mtx_lock(&range->qpool->queries_lock);
+ for (uint32_t q = range->start; q < range->start + range->count; q++) {
+ struct dzn_query *query = &range->qpool->queries[q];
+ if (query->fence) {
+ query->fence->Release();
+ query->fence = NULL;
+ }
+ query->fence_value = 0;
+ }
+ mtx_unlock(&range->qpool->queries_lock);
+ }
+
+ queue->cmdqueue->ExecuteCommandLists(1, cmdlists);
+
+ util_dynarray_foreach(&cmd_buffer->events.signal, dzn_cmd_event_signal, evt) {
+ if (FAILED(queue->cmdqueue->Signal(evt->event->fence, evt->value ? 1 : 0)))
+ return vk_error(device, VK_ERROR_UNKNOWN);
+ }
+
+ util_dynarray_foreach(&cmd_buffer->queries.signal, dzn_cmd_buffer_query_range, range) {
+ mtx_lock(&range->qpool->queries_lock);
+ for (uint32_t q = range->start; q < range->start + range->count; q++) {
+ struct dzn_query *query = &range->qpool->queries[q];
+ query->fence_value = queue->fence_point + 1;
+ query->fence = queue->fence;
+ query->fence->AddRef();
+ }
+ mtx_unlock(&range->qpool->queries_lock);
+ }
+ }
+
+ for (uint32_t i = 0; i < info->signal_count; i++) {
+ result = dzn_queue_sync_signal(queue, &info->signals[i]);
+ if (result != VK_SUCCESS)
+ return vk_error(device, VK_ERROR_UNKNOWN);
+ }
+
+ if (FAILED(queue->cmdqueue->Signal(queue->fence, ++queue->fence_point)))
+ return vk_error(device, VK_ERROR_UNKNOWN);
+
+ return VK_SUCCESS;
+}
+
+static void
+dzn_queue_finish(dzn_queue *queue)
+{
+ if (queue->cmdqueue)
+ queue->cmdqueue->Release();
+
+ if (queue->fence)
+ queue->fence->Release();
+
+ vk_queue_finish(&queue->vk);
+}
+
+static VkResult
+dzn_queue_init(dzn_queue *queue,
+ dzn_device *device,
+ const VkDeviceQueueCreateInfo *pCreateInfo,
+ uint32_t index_in_family)
+{
+ dzn_physical_device *pdev = container_of(device->vk.physical, dzn_physical_device, vk);
+
+ VkResult result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
+ if (result != VK_SUCCESS)
+ return result;
+
+ queue->vk.driver_submit = dzn_queue_submit;
+
+ assert(pCreateInfo->queueFamilyIndex < pdev->queue_family_count);
+
+ D3D12_COMMAND_QUEUE_DESC queue_desc =
+ pdev->queue_families[pCreateInfo->queueFamilyIndex].desc;
+
+ queue_desc.Priority =
+ (INT)(pCreateInfo->pQueuePriorities[index_in_family] * (float)D3D12_COMMAND_QUEUE_PRIORITY_HIGH);
+ queue_desc.NodeMask = 0;
+
+ if (FAILED(device->dev->CreateCommandQueue(&queue_desc,
+ IID_PPV_ARGS(&queue->cmdqueue)))) {
+ dzn_queue_finish(queue);
+ return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED);
+ }
+
+ if (FAILED(device->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE,
+ IID_PPV_ARGS(&queue->fence)))) {
+ dzn_queue_finish(queue);
+ return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED);
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+check_physical_device_features(VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceFeatures *features)
+{
+ VkPhysicalDeviceFeatures supported_features;
+ dzn_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
+ VkBool32 *supported_feature = (VkBool32 *)&supported_features;
+ VkBool32 *enabled_feature = (VkBool32 *)features;
+ unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
+ for (uint32_t i = 0; i < num_features; i++) {
+ if (enabled_feature[i] && !supported_feature[i])
+ return VK_ERROR_FEATURE_NOT_PRESENT;
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_device_create_sync_for_memory(struct vk_device *device,
+ VkDeviceMemory memory,
+ bool signal_memory,
+ struct vk_sync **sync_out)
+{
+ return vk_sync_create(device, &vk_sync_dummy_type,
+ (enum vk_sync_flags)0, 1, sync_out);
+}
+
+static void
+dzn_device_ref_pipeline_layout(struct vk_device *dev, VkPipelineLayout layout)
+{
+ VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
+
+ dzn_pipeline_layout_ref(playout);
+}
+
+static void
+dzn_device_unref_pipeline_layout(struct vk_device *dev, VkPipelineLayout layout)
+{
+ VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
+
+ dzn_pipeline_layout_unref(playout);
+}
+
+static VkResult
+dzn_device_query_init(dzn_device *device)
+{
+ /* FIXME: create the resource in the default heap */
+ D3D12_HEAP_PROPERTIES hprops =
+ device->dev->GetCustomHeapProperties(0, D3D12_HEAP_TYPE_UPLOAD);
+ D3D12_RESOURCE_DESC rdesc = {
+ .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
+ .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
+ .Width = DZN_QUERY_REFS_RES_SIZE,
+ .Height = 1,
+ .DepthOrArraySize = 1,
+ .MipLevels = 1,
+ .Format = DXGI_FORMAT_UNKNOWN,
+ .SampleDesc = { .Count = 1, .Quality = 0 },
+ .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
+ .Flags = D3D12_RESOURCE_FLAG_NONE,
+ };
+
+ if (FAILED(device->dev->CreateCommittedResource(&hprops,
+ D3D12_HEAP_FLAG_NONE,
+ &rdesc,
+ D3D12_RESOURCE_STATE_GENERIC_READ,
+ NULL,
+ IID_PPV_ARGS(&device->queries.refs))))
+ return vk_error(device->vk.physical, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ uint8_t *queries_ref;
+ if (FAILED(device->queries.refs->Map(0, NULL, (void **)&queries_ref)))
+ return vk_error(device->vk.physical, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ memset(queries_ref + DZN_QUERY_REFS_ALL_ONES_OFFSET, 0xff, DZN_QUERY_REFS_SECTION_SIZE);
+ memset(queries_ref + DZN_QUERY_REFS_ALL_ZEROS_OFFSET, 0x0, DZN_QUERY_REFS_SECTION_SIZE);
+ device->queries.refs->Unmap(0, NULL);
+
+ return VK_SUCCESS;
+}
+
+static void
+dzn_device_query_finish(dzn_device *device)
+{
+ if (device->queries.refs)
+ device->queries.refs->Release();
+}
+
+static void
+dzn_device_destroy(dzn_device *device, const VkAllocationCallbacks *pAllocator)
+{
+ if (!device)
+ return;
+
+ dzn_instance *instance =
+ container_of(device->vk.physical->instance, dzn_instance, vk);
+
+ vk_foreach_queue_safe(q, &device->vk) {
+ dzn_queue *queue = container_of(q, dzn_queue, vk);
+
+ dzn_queue_finish(queue);
+ }
+
+ dzn_device_query_finish(device);
+ dzn_meta_finish(device);
+
+ if (device->dev)
+ device->dev->Release();
+
+ vk_device_finish(&device->vk);
+ vk_free2(&instance->vk.alloc, pAllocator, device);
+}
+
+static VkResult
+dzn_device_create(dzn_physical_device *pdev,
+ const VkDeviceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDevice *out)
+{
+ dzn_instance *instance = container_of(pdev->vk.instance, dzn_instance, vk);
+
+ uint32_t queue_count = 0;
+ for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) {
+ const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf];
+ queue_count += qinfo->queueCount;
+ }
+
+ VK_MULTIALLOC(ma);
+ VK_MULTIALLOC_DECL(&ma, dzn_device, device, 1);
+ VK_MULTIALLOC_DECL(&ma, dzn_queue, queues, queue_count);
+
+ if (!vk_multialloc_zalloc2(&ma, &instance->vk.alloc, pAllocator,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
+ return vk_error(pdev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_device_dispatch_table dispatch_table;
+
+ /* For secondary command buffer support, overwrite any command entrypoints
+ * in the main device-level dispatch table with
+ * vk_cmd_enqueue_unless_primary_Cmd*.
+ */
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &vk_cmd_enqueue_unless_primary_device_entrypoints, true);
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &dzn_device_entrypoints, false);
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &wsi_device_entrypoints, false);
+
+ /* Populate our primary cmd_dispatch table. */
+ vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch,
+ &dzn_device_entrypoints, true);
+ vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch,
+ &vk_common_device_entrypoints,
+ false);
+
+ VkResult result =
+ vk_device_init(&device->vk, &pdev->vk, &dispatch_table, pCreateInfo, pAllocator);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, pAllocator, device);
+ return result;
+ }
+
+ /* Must be done after vk_device_init() because this function memset(0) the
+ * whole struct.
+ */
+ device->vk.command_dispatch_table = &device->cmd_dispatch;
+ device->vk.ref_pipeline_layout = dzn_device_ref_pipeline_layout;
+ device->vk.unref_pipeline_layout = dzn_device_unref_pipeline_layout;
+ device->vk.create_sync_for_memory = dzn_device_create_sync_for_memory;
+
+ device->dev = dzn_physical_device_get_d3d12_dev(pdev);
+ if (!device->dev) {
+ dzn_device_destroy(device, pAllocator);
+ return vk_error(pdev, VK_ERROR_INITIALIZATION_FAILED);
+ }
+
+ device->dev->AddRef();
+
+ ID3D12InfoQueue *info_queue;
+ if (SUCCEEDED(device->dev->QueryInterface(IID_PPV_ARGS(&info_queue)))) {
+ D3D12_MESSAGE_SEVERITY severities[] = {
+ D3D12_MESSAGE_SEVERITY_INFO,
+ D3D12_MESSAGE_SEVERITY_WARNING,
+ };
+
+ D3D12_MESSAGE_ID msg_ids[] = {
+ D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,
+ };
+
+ D3D12_INFO_QUEUE_FILTER NewFilter = {};
+ NewFilter.DenyList.NumSeverities = ARRAY_SIZE(severities);
+ NewFilter.DenyList.pSeverityList = severities;
+ NewFilter.DenyList.NumIDs = ARRAY_SIZE(msg_ids);
+ NewFilter.DenyList.pIDList = msg_ids;
+
+ info_queue->PushStorageFilter(&NewFilter);
+ }
+
+ result = dzn_meta_init(device);
+ if (result != VK_SUCCESS) {
+ dzn_device_destroy(device, pAllocator);
+ return result;
+ }
+
+ result = dzn_device_query_init(device);
+ if (result != VK_SUCCESS) {
+ dzn_device_destroy(device, pAllocator);
+ return result;
+ }
+
+ uint32_t qindex = 0;
+ for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) {
+ const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf];
+
+ for (uint32_t q = 0; q < qinfo->queueCount; q++) {
+ result =
+ dzn_queue_init(&queues[qindex++], device, qinfo, q);
+ if (result != VK_SUCCESS) {
+ dzn_device_destroy(device, pAllocator);
+ return result;
+ }
+ }
+ }
+
+ assert(queue_count == qindex);
+ *out = dzn_device_to_handle(device);
+ return VK_SUCCESS;
+}
+
+ID3D12RootSignature *
+dzn_device_create_root_sig(dzn_device *device,
+ const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc)
+{
+ dzn_instance *instance =
+ container_of(device->vk.physical->instance, dzn_instance, vk);
+ ComPtr<ID3DBlob> sig, error;
+
+ if (FAILED(instance->d3d12.serialize_root_sig(desc,
+ &sig, &error))) {
+ if (instance->debug_flags & DZN_DEBUG_SIG) {
+ const char* error_msg = (const char*)error->GetBufferPointer();
+ fprintf(stderr,
+ "== SERIALIZE ROOT SIG ERROR =============================================\n"
+ "%s\n"
+ "== END ==========================================================\n",
+ error_msg);
+ }
+
+ return NULL;
+ }
+
+ ID3D12RootSignature *root_sig;
+ if (FAILED(device->dev->CreateRootSignature(0,
+ sig->GetBufferPointer(),
+ sig->GetBufferSize(),
+ IID_PPV_ARGS(&root_sig))))
+ return NULL;
+
+ return root_sig;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateDevice(VkPhysicalDevice physicalDevice,
+ const VkDeviceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDevice *pDevice)
+{
+ VK_FROM_HANDLE(dzn_physical_device, physical_device, physicalDevice);
+ dzn_instance *instance =
+ container_of(physical_device->vk.instance, dzn_instance, vk);
+ VkResult result;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
+
+ /* Check enabled features */
+ if (pCreateInfo->pEnabledFeatures) {
+ result = check_physical_device_features(physicalDevice,
+ pCreateInfo->pEnabledFeatures);
+ if (result != VK_SUCCESS)
+ return vk_error(physical_device, result);
+ }
+
+ /* Check requested queues and fail if we are requested to create any
+ * queues with flags we don't support.
+ */
+ assert(pCreateInfo->queueCreateInfoCount > 0);
+ for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+ if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
+ return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
+ }
+
+ return dzn_device_create(physical_device, pCreateInfo, pAllocator, pDevice);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyDevice(VkDevice dev,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(dzn_device, device, dev);
+
+ device->vk.dispatch_table.DeviceWaitIdle(dev);
+
+ dzn_device_destroy(device, pAllocator);
+}
+
+static void
+dzn_device_memory_destroy(dzn_device_memory *mem,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (!mem)
+ return;
+
+ dzn_device *device = container_of(mem->base.device, dzn_device, vk);
+
+ if (mem->map)
+ mem->map_res->Unmap(0, NULL);
+
+ if (mem->map_res)
+ mem->map_res->Release();
+
+ if (mem->heap)
+ mem->heap->Release();
+
+ vk_object_base_finish(&mem->base);
+ vk_free2(&device->vk.alloc, pAllocator, mem);
+}
+
+static VkResult
+dzn_device_memory_create(dzn_device *device,
+ const VkMemoryAllocateInfo *pAllocateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDeviceMemory *out)
+{
+ dzn_physical_device *pdevice =
+ container_of(device->vk.physical, dzn_physical_device, vk);
+
+ dzn_device_memory *mem = (dzn_device_memory *)
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!mem)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);
+
+ /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
+ assert(pAllocateInfo->allocationSize > 0);
+
+ mem->size = pAllocateInfo->allocationSize;
+
+#if 0
+ const VkExportMemoryAllocateInfo *export_info = NULL;
+ VkMemoryAllocateFlags vk_flags = 0;
+#endif
+
+ vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
+ dzn_debug_ignored_stype(ext->sType);
+ }
+
+ const VkMemoryType *mem_type =
+ &pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex];
+
+ D3D12_HEAP_DESC heap_desc = {};
+ // TODO: fix all of these:
+ heap_desc.SizeInBytes = pAllocateInfo->allocationSize;
+ heap_desc.Alignment =
+ heap_desc.SizeInBytes >= D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT ?
+ D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT :
+ D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
+ heap_desc.Flags =
+ dzn_physical_device_get_heap_flags_for_mem_type(pdevice,
+ pAllocateInfo->memoryTypeIndex);
+
+ /* TODO: Unsure about this logic??? */
+ mem->initial_state = D3D12_RESOURCE_STATE_COMMON;
+ heap_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM;
+ heap_desc.Properties.MemoryPoolPreference =
+ ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
+ !pdevice->architecture.UMA) ?
+ D3D12_MEMORY_POOL_L1 : D3D12_MEMORY_POOL_L0;
+ if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
+ heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
+ } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
+ heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE;
+ } else {
+ heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE;
+ }
+
+ if (FAILED(device->dev->CreateHeap(&heap_desc, IID_PPV_ARGS(&mem->heap)))) {
+ dzn_device_memory_destroy(mem, pAllocator);
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+
+ if ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
+ !(heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS)){
+ D3D12_RESOURCE_DESC res_desc = {};
+ res_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+ res_desc.Format = DXGI_FORMAT_UNKNOWN;
+ res_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
+ res_desc.Width = heap_desc.SizeInBytes;
+ res_desc.Height = 1;
+ res_desc.DepthOrArraySize = 1;
+ res_desc.MipLevels = 1;
+ res_desc.SampleDesc.Count = 1;
+ res_desc.SampleDesc.Quality = 0;
+ res_desc.Flags = D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
+ res_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+ HRESULT hr = device->dev->CreatePlacedResource(mem->heap, 0, &res_desc,
+ mem->initial_state,
+ NULL, IID_PPV_ARGS(&mem->map_res));
+ if (FAILED(hr)) {
+ dzn_device_memory_destroy(mem, pAllocator);
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+ }
+
+ *out = dzn_device_memory_to_handle(mem);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_AllocateMemory(VkDevice device,
+ const VkMemoryAllocateInfo *pAllocateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDeviceMemory *pMem)
+{
+ return dzn_device_memory_create(dzn_device_from_handle(device),
+ pAllocateInfo, pAllocator, pMem);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_FreeMemory(VkDevice device,
+ VkDeviceMemory mem,
+ const VkAllocationCallbacks *pAllocator)
+{
+ dzn_device_memory_destroy(dzn_device_memory_from_handle(mem), pAllocator);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_MapMemory(VkDevice _device,
+ VkDeviceMemory _memory,
+ VkDeviceSize offset,
+ VkDeviceSize size,
+ VkMemoryMapFlags flags,
+ void **ppData)
+{
+ VK_FROM_HANDLE(dzn_device, device, _device);
+ VK_FROM_HANDLE(dzn_device_memory, mem, _memory);
+
+ if (mem == NULL) {
+ *ppData = NULL;
+ return VK_SUCCESS;
+ }
+
+ if (size == VK_WHOLE_SIZE)
+ size = mem->size - offset;
+
+ /* From the Vulkan spec version 1.0.32 docs for MapMemory:
+ *
+ * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
+ * assert(size != 0);
+ * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
+ * equal to the size of the memory minus offset
+ */
+ assert(size > 0);
+ assert(offset + size <= mem->size);
+
+ assert(mem->map_res);
+ D3D12_RANGE range = {};
+ range.Begin = offset;
+ range.End = offset + size;
+ void *map = NULL;
+ if (FAILED(mem->map_res->Map(0, &range, &map)))
+ return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
+
+ mem->map = map;
+ mem->map_size = size;
+
+ *ppData = ((uint8_t*) map) + offset;
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_UnmapMemory(VkDevice _device,
+ VkDeviceMemory _memory)
+{
+ VK_FROM_HANDLE(dzn_device, device, _device);
+ VK_FROM_HANDLE(dzn_device_memory, mem, _memory);
+
+ if (mem == NULL)
+ return;
+
+ assert(mem->map_res);
+ mem->map_res->Unmap(0, NULL);
+
+ mem->map = NULL;
+ mem->map_size = 0;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_FlushMappedMemoryRanges(VkDevice _device,
+ uint32_t memoryRangeCount,
+ const VkMappedMemoryRange *pMemoryRanges)
+{
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_InvalidateMappedMemoryRanges(VkDevice _device,
+ uint32_t memoryRangeCount,
+ const VkMappedMemoryRange *pMemoryRanges)
+{
+ return VK_SUCCESS;
+}
+
+static void
+dzn_buffer_destroy(dzn_buffer *buf, const VkAllocationCallbacks *pAllocator)
+{
+ if (!buf)
+ return;
+
+ dzn_device *device = container_of(buf->base.device, dzn_device, vk);
+
+ if (buf->res)
+ buf->res->Release();
+
+ vk_object_base_finish(&buf->base);
+ vk_free2(&device->vk.alloc, pAllocator, buf);
+}
+
+static VkResult
+dzn_buffer_create(dzn_device *device,
+ const VkBufferCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkBuffer *out)
+{
+ dzn_buffer *buf = (dzn_buffer *)
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*buf), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!buf)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &buf->base, VK_OBJECT_TYPE_BUFFER);
+ buf->create_flags = pCreateInfo->flags;
+ buf->size = pCreateInfo->size;
+ buf->usage = pCreateInfo->usage;
+
+ if (buf->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
+ buf->size = ALIGN_POT(buf->size, 256);
+
+ buf->desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+ buf->desc.Format = DXGI_FORMAT_UNKNOWN;
+ buf->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
+ buf->desc.Width = buf->size;
+ buf->desc.Height = 1;
+ buf->desc.DepthOrArraySize = 1;
+ buf->desc.MipLevels = 1;
+ buf->desc.SampleDesc.Count = 1;
+ buf->desc.SampleDesc.Quality = 0;
+ buf->desc.Flags = D3D12_RESOURCE_FLAG_NONE;
+ buf->desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+
+ if (buf->usage &
+ (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT))
+ buf->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+
+ *out = dzn_buffer_to_handle(buf);
+ return VK_SUCCESS;
+}
+
+DXGI_FORMAT
+dzn_buffer_get_dxgi_format(VkFormat format)
+{
+ enum pipe_format pfmt = vk_format_to_pipe_format(format);
+
+ return dzn_pipe_to_dxgi_format(pfmt);
+}
+
+D3D12_TEXTURE_COPY_LOCATION
+dzn_buffer_get_copy_loc(const dzn_buffer *buf,
+ VkFormat format,
+ const VkBufferImageCopy2KHR *region,
+ VkImageAspectFlagBits aspect,
+ uint32_t layer)
+{
+ const uint32_t buffer_row_length =
+ region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width;
+ const uint32_t buffer_image_height =
+ region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height;
+
+ VkFormat plane_format = dzn_image_get_plane_format(format, aspect);
+
+ enum pipe_format pfmt = vk_format_to_pipe_format(plane_format);
+ uint32_t blksz = util_format_get_blocksize(pfmt);
+ uint32_t blkw = util_format_get_blockwidth(pfmt);
+ uint32_t blkh = util_format_get_blockheight(pfmt);
+
+ D3D12_TEXTURE_COPY_LOCATION loc = {
+ .pResource = buf->res,
+ .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
+ .PlacedFootprint = {
+ .Footprint = {
+ .Format =
+ dzn_image_get_placed_footprint_format(format, aspect),
+ .Width = region->imageExtent.width,
+ .Height = region->imageExtent.height,
+ .Depth = region->imageExtent.depth,
+ .RowPitch = blksz * DIV_ROUND_UP(buffer_row_length, blkw),
+ },
+ },
+ };
+
+ uint32_t buffer_layer_stride =
+ loc.PlacedFootprint.Footprint.RowPitch *
+ DIV_ROUND_UP(loc.PlacedFootprint.Footprint.Height, blkh);
+
+ loc.PlacedFootprint.Offset =
+ region->bufferOffset + (layer * buffer_layer_stride);
+
+ return loc;
+}
+
+D3D12_TEXTURE_COPY_LOCATION
+dzn_buffer_get_line_copy_loc(const dzn_buffer *buf, VkFormat format,
+ const VkBufferImageCopy2KHR *region,
+ const D3D12_TEXTURE_COPY_LOCATION *loc,
+ uint32_t y, uint32_t z, uint32_t *start_x)
+{
+ uint32_t buffer_row_length =
+ region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width;
+ uint32_t buffer_image_height =
+ region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height;
+
+ format = dzn_image_get_plane_format(format, region->imageSubresource.aspectMask);
+
+ enum pipe_format pfmt = vk_format_to_pipe_format(format);
+ uint32_t blksz = util_format_get_blocksize(pfmt);
+ uint32_t blkw = util_format_get_blockwidth(pfmt);
+ uint32_t blkh = util_format_get_blockheight(pfmt);
+ uint32_t blkd = util_format_get_blockdepth(pfmt);
+ D3D12_TEXTURE_COPY_LOCATION new_loc = *loc;
+ uint32_t buffer_row_stride =
+ DIV_ROUND_UP(buffer_row_length, blkw) * blksz;
+ uint32_t buffer_layer_stride =
+ buffer_row_stride *
+ DIV_ROUND_UP(buffer_image_height, blkh);
+
+ uint64_t tex_offset =
+ ((y / blkh) * buffer_row_stride) +
+ ((z / blkd) * buffer_layer_stride);
+ uint64_t offset = loc->PlacedFootprint.Offset + tex_offset;
+ uint32_t offset_alignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT;
+
+ while (offset_alignment % blksz)
+ offset_alignment += D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT;
+
+ new_loc.PlacedFootprint.Footprint.Height = blkh;
+ new_loc.PlacedFootprint.Footprint.Depth = 1;
+ new_loc.PlacedFootprint.Offset = (offset / offset_alignment) * offset_alignment;
+ *start_x = ((offset % offset_alignment) / blksz) * blkw;
+ new_loc.PlacedFootprint.Footprint.Width = *start_x + region->imageExtent.width;
+ new_loc.PlacedFootprint.Footprint.RowPitch =
+ ALIGN_POT(DIV_ROUND_UP(new_loc.PlacedFootprint.Footprint.Width, blkw) * blksz,
+ D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+ return new_loc;
+}
+
+bool
+dzn_buffer_supports_region_copy(const D3D12_TEXTURE_COPY_LOCATION *loc)
+{
+ return !(loc->PlacedFootprint.Offset & (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1)) &&
+ !(loc->PlacedFootprint.Footprint.RowPitch & (D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1));
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateBuffer(VkDevice device,
+ const VkBufferCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkBuffer *pBuffer)
+{
+ return dzn_buffer_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pBuffer);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyBuffer(VkDevice device,
+ VkBuffer buffer,
+ const VkAllocationCallbacks *pAllocator)
+{
+ dzn_buffer_destroy(dzn_buffer_from_handle(buffer), pAllocator);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetBufferMemoryRequirements2(VkDevice dev,
+ const VkBufferMemoryRequirementsInfo2 *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
+{
+ VK_FROM_HANDLE(dzn_device, device, dev);
+ VK_FROM_HANDLE(dzn_buffer, buffer, pInfo->buffer);
+ dzn_physical_device *pdev =
+ container_of(device->vk.physical, dzn_physical_device, vk);
+
+ /* uh, this is grossly over-estimating things */
+ uint32_t alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
+ VkDeviceSize size = buffer->size;
+
+ if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) {
+ alignment = MAX2(alignment, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
+ size = ALIGN_POT(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
+ }
+
+ pMemoryRequirements->memoryRequirements.size = size;
+ pMemoryRequirements->memoryRequirements.alignment = alignment;
+ pMemoryRequirements->memoryRequirements.memoryTypeBits =
+ dzn_physical_device_get_mem_type_mask_for_resource(pdev, &buffer->desc);
+
+ vk_foreach_struct(ext, pMemoryRequirements->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+ VkMemoryDedicatedRequirements *requirements =
+ (VkMemoryDedicatedRequirements *)ext;
+ /* TODO: figure out dedicated allocations */
+ requirements->prefersDedicatedAllocation = false;
+ requirements->requiresDedicatedAllocation = false;
+ break;
+ }
+
+ default:
+ dzn_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+
+#if 0
+ D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo(
+ UINT visibleMask,
+ UINT numResourceDescs,
+ const D3D12_RESOURCE_DESC *pResourceDescs);
+#endif
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_BindBufferMemory2(VkDevice _device,
+ uint32_t bindInfoCount,
+ const VkBindBufferMemoryInfo *pBindInfos)
+{
+ VK_FROM_HANDLE(dzn_device, device, _device);
+
+ for (uint32_t i = 0; i < bindInfoCount; i++) {
+ assert(pBindInfos[i].sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
+
+ VK_FROM_HANDLE(dzn_device_memory, mem, pBindInfos[i].memory);
+ VK_FROM_HANDLE(dzn_buffer, buffer, pBindInfos[i].buffer);
+
+ if (FAILED(device->dev->CreatePlacedResource(mem->heap,
+ pBindInfos[i].memoryOffset,
+ &buffer->desc,
+ mem->initial_state,
+ NULL, IID_PPV_ARGS(&buffer->res))))
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_framebuffer_create(dzn_device *device,
+ const VkFramebufferCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkFramebuffer *out)
+{
+ VK_MULTIALLOC(ma);
+ VK_MULTIALLOC_DECL(&ma, dzn_framebuffer, framebuffer, 1);
+ VK_MULTIALLOC_DECL(&ma, dzn_image_view *, attachments, pCreateInfo->attachmentCount);
+
+ if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ framebuffer->width = pCreateInfo->width;
+ framebuffer->height = pCreateInfo->height;
+ framebuffer->layers = pCreateInfo->layers;
+
+ framebuffer->attachments = attachments;
+ framebuffer->attachment_count = pCreateInfo->attachmentCount;
+ for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
+ VK_FROM_HANDLE(dzn_image_view, iview, pCreateInfo->pAttachments[i]);
+ framebuffer->attachments[i] = iview;
+ }
+
+ vk_object_base_init(&device->vk, &framebuffer->base, VK_OBJECT_TYPE_FRAMEBUFFER);
+ *out = dzn_framebuffer_to_handle(framebuffer);
+ return VK_SUCCESS;
+}
+
+static void
+dzn_framebuffer_destroy(dzn_framebuffer *framebuffer,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (!framebuffer)
+ return;
+
+ dzn_device *device =
+ container_of(framebuffer->base.device, dzn_device, vk);
+
+ vk_object_base_finish(&framebuffer->base);
+ vk_free2(&device->vk.alloc, pAllocator, framebuffer);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateFramebuffer(VkDevice device,
+ const VkFramebufferCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkFramebuffer *pFramebuffer)
+{
+ return dzn_framebuffer_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pFramebuffer);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyFramebuffer(VkDevice device,
+ VkFramebuffer fb,
+ const VkAllocationCallbacks *pAllocator)
+{
+ dzn_framebuffer_destroy(dzn_framebuffer_from_handle(fb), pAllocator);
+}
+
+static void
+dzn_event_destroy(dzn_event *event,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (!event)
+ return;
+
+ dzn_device *device =
+ container_of(event->base.device, dzn_device, vk);
+
+ if (event->fence)
+ event->fence->Release();
+
+ vk_object_base_finish(&event->base);
+ vk_free2(&device->vk.alloc, pAllocator, event);
+}
+
+static VkResult
+dzn_event_create(dzn_device *device,
+ const VkEventCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkEvent *out)
+{
+ dzn_event *event = (dzn_event *)
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!event)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
+
+ if (FAILED(device->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE,
+ IID_PPV_ARGS(&event->fence)))) {
+ dzn_event_destroy(event, pAllocator);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ *out = dzn_event_to_handle(event);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateEvent(VkDevice device,
+ const VkEventCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkEvent *pEvent)
+{
+ return dzn_event_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pEvent);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyEvent(VkDevice device,
+ VkEvent event,
+ const VkAllocationCallbacks *pAllocator)
+{
+ return dzn_event_destroy(dzn_event_from_handle(event), pAllocator);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_ResetEvent(VkDevice dev,
+ VkEvent evt)
+{
+ VK_FROM_HANDLE(dzn_device, device, dev);
+ VK_FROM_HANDLE(dzn_event, event, evt);
+
+ if (FAILED(event->fence->Signal(0)))
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_SetEvent(VkDevice dev,
+ VkEvent evt)
+{
+ VK_FROM_HANDLE(dzn_device, device, dev);
+ VK_FROM_HANDLE(dzn_event, event, evt);
+
+ if (FAILED(event->fence->Signal(1)))
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_GetEventStatus(VkDevice device,
+ VkEvent evt)
+{
+ VK_FROM_HANDLE(dzn_event, event, evt);
+
+ return event->fence->GetCompletedValue() == 0 ?
+ VK_EVENT_RESET : VK_EVENT_SET;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetDeviceMemoryCommitment(VkDevice device,
+ VkDeviceMemory memory,
+ VkDeviceSize *pCommittedMemoryInBytes)
+{
+ VK_FROM_HANDLE(dzn_device_memory, mem, memory);
+
+ // TODO: find if there's a way to query/track actual heap residency
+ *pCommittedMemoryInBytes = mem->size;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_QueueBindSparse(VkQueue queue,
+ uint32_t bindInfoCount,
+ const VkBindSparseInfo *pBindInfo,
+ VkFence fence)
+{
+ // FIXME: add proper implem
+ dzn_stub();
+ return VK_SUCCESS;
+}
+
+static D3D12_TEXTURE_ADDRESS_MODE
+dzn_sampler_translate_addr_mode(VkSamplerAddressMode in)
+{
+ switch (in) {
+ case VK_SAMPLER_ADDRESS_MODE_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
+ case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR;
+ case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
+ case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER;
+ default: unreachable("Invalid address mode");
+ }
+}
+
+static void
+dzn_sampler_destroy(dzn_sampler *sampler,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (!sampler)
+ return;
+
+ dzn_device *device =
+ container_of(sampler->base.device, dzn_device, vk);
+
+ vk_object_base_finish(&sampler->base);
+ vk_free2(&device->vk.alloc, pAllocator, sampler);
+}
+
+static VkResult
+dzn_sampler_create(dzn_device *device,
+ const VkSamplerCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkSampler *out)
+{
+ dzn_sampler *sampler = (dzn_sampler *)
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!sampler)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
+
+ const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = (const VkSamplerCustomBorderColorCreateInfoEXT *)
+ vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
+
+ /* TODO: have a sampler pool to allocate shader-invisible descs which we
+ * can copy to the desc_set when UpdateDescriptorSets() is called.
+ */
+ sampler->desc.Filter = dzn_translate_sampler_filter(pCreateInfo);
+ sampler->desc.AddressU = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeU);
+ sampler->desc.AddressV = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeV);
+ sampler->desc.AddressW = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeW);
+ sampler->desc.MipLODBias = pCreateInfo->mipLodBias;
+ sampler->desc.MaxAnisotropy = pCreateInfo->maxAnisotropy;
+ sampler->desc.MinLOD = pCreateInfo->minLod;
+ sampler->desc.MaxLOD = pCreateInfo->maxLod;
+
+ if (pCreateInfo->compareEnable)
+ sampler->desc.ComparisonFunc = dzn_translate_compare_op(pCreateInfo->compareOp);
+
+ bool reads_border_color =
+ pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
+ pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
+ pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
+
+ if (reads_border_color) {
+ switch (pCreateInfo->borderColor) {
+ case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
+ sampler->desc.BorderColor[0] = 0.0f;
+ sampler->desc.BorderColor[1] = 0.0f;
+ sampler->desc.BorderColor[2] = 0.0f;
+ sampler->desc.BorderColor[3] =
+ pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ? 0.0f : 1.0f;
+ sampler->static_border_color =
+ pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ?
+ D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK :
+ D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK;
+ break;
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
+ sampler->desc.BorderColor[0] = sampler->desc.BorderColor[1] = 1.0f;
+ sampler->desc.BorderColor[2] = sampler->desc.BorderColor[3] = 1.0f;
+ sampler->static_border_color = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE;
+ break;
+ case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
+ sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1;
+ for (unsigned i = 0; i < ARRAY_SIZE(sampler->desc.BorderColor); i++)
+ sampler->desc.BorderColor[i] = pBorderColor->customBorderColor.float32[i];
+ break;
+ case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
+ case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
+ case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
+ case VK_BORDER_COLOR_INT_CUSTOM_EXT:
+ /* FIXME: sampling from integer textures is not supported yet. */
+ sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1;
+ break;
+ default:
+ unreachable("Unsupported border color");
+ }
+ }
+
+ *out = dzn_sampler_to_handle(sampler);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateSampler(VkDevice device,
+ const VkSamplerCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkSampler *pSampler)
+{
+ return dzn_sampler_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pSampler);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroySampler(VkDevice device,
+ VkSampler sampler,
+ const VkAllocationCallbacks *pAllocator)
+{
+ return dzn_sampler_destroy(dzn_sampler_from_handle(sampler), pAllocator);
+}
diff --git a/src/microsoft/vulkan/dzn_image.cpp b/src/microsoft/vulkan/dzn_image.cpp
new file mode 100644
index 00000000000..547540a00d0
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_image.cpp
@@ -0,0 +1,1220 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include "vk_alloc.h"
+#include "vk_debug_report.h"
+#include "vk_format.h"
+#include "vk_util.h"
+
+static void
+dzn_image_destroy(dzn_image *image,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (!image)
+ return;
+
+ dzn_device *device = container_of(image->vk.base.device, dzn_device, vk);
+
+ if (image->res)
+ image->res->Release();
+
+ vk_image_finish(&image->vk);
+ vk_free2(&device->vk.alloc, pAllocator, image);
+}
+
+static VkResult
+dzn_image_create(dzn_device *device,
+ const VkImageCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkImage *out)
+{
+ dzn_image *image = (dzn_image *)
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*image), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ dzn_physical_device *pdev =
+ container_of(device->vk.physical, dzn_physical_device, vk);
+
+ if (!image)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ const VkExternalMemoryImageCreateInfo *create_info =
+ (const VkExternalMemoryImageCreateInfo *)
+ vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
+
+#if 0
+ VkExternalMemoryHandleTypeFlags supported =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT |
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT |
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT |
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT |
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT |
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT;
+
+ if (create_info && (create_info->handleTypes & supported))
+ return dzn_image_from_external(device, pCreateInfo, create_info,
+ pAllocator, pImage);
+#endif
+
+#if 0
+ const VkImageSwapchainCreateInfoKHR *swapchain_info = (const VkImageSwapchainCreateInfoKHR *)
+ vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
+ if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE)
+ return dzn_image_from_swapchain(device, pCreateInfo, swapchain_info,
+ pAllocator, pImage);
+#endif
+
+ vk_image_init(&device->vk, &image->vk, pCreateInfo);
+ enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
+
+ if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) {
+ /* Treat linear images as buffers: they should only be used as copy
+ * src/dest, and CopyTextureResource() can manipulate buffers.
+ * We only support linear tiling on things strictly required by the spec:
+ * "Images created with tiling equal to VK_IMAGE_TILING_LINEAR have
+ * further restrictions on their limits and capabilities compared to
+ * images created with tiling equal to VK_IMAGE_TILING_OPTIMAL. Creation
+ * of images with tiling VK_IMAGE_TILING_LINEAR may not be supported
+ * unless other parameters meet all of the constraints:
+ * - imageType is VK_IMAGE_TYPE_2D
+ * - format is not a depth/stencil format
+ * - mipLevels is 1
+ * - arrayLayers is 1
+ * - samples is VK_SAMPLE_COUNT_1_BIT
+ * - usage only includes VK_IMAGE_USAGE_TRANSFER_SRC_BIT and/or VK_IMAGE_USAGE_TRANSFER_DST_BIT
+ * "
+ */
+ assert(!vk_format_is_depth_or_stencil(pCreateInfo->format));
+ assert(pCreateInfo->mipLevels == 1);
+ assert(pCreateInfo->arrayLayers == 1);
+ assert(pCreateInfo->samples == 1);
+ assert(pCreateInfo->imageType != VK_IMAGE_TYPE_3D);
+ assert(!(pCreateInfo->usage & ~(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)));
+ D3D12_RESOURCE_DESC tmp_desc = {
+ .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
+ .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
+ .Width = ALIGN(image->vk.extent.width, util_format_get_blockwidth(pfmt)),
+ .Height = (UINT)ALIGN(image->vk.extent.height, util_format_get_blockheight(pfmt)),
+ .DepthOrArraySize = 1,
+ .MipLevels = 1,
+ .Format =
+ dzn_image_get_dxgi_format(pCreateInfo->format, pCreateInfo->usage, 0),
+ .SampleDesc = { .Count = 1, .Quality = 0 },
+ .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
+ .Flags = D3D12_RESOURCE_FLAG_NONE
+ };
+ D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint;
+ uint64_t size = 0;
+ device->dev->GetCopyableFootprints(&tmp_desc, 0, 1, 0, &footprint, NULL, NULL, &size);
+
+ image->linear.row_stride = footprint.Footprint.RowPitch;
+ image->linear.size = size;
+ size *= pCreateInfo->arrayLayers;
+ image->desc.Format = DXGI_FORMAT_UNKNOWN;
+ image->desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+ image->desc.Width = size;
+ image->desc.Height = 1;
+ image->desc.DepthOrArraySize = 1;
+ image->desc.MipLevels = 1;
+ image->desc.SampleDesc.Count = 1;
+ image->desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+ } else {
+ image->desc.Format =
+ dzn_image_get_dxgi_format(pCreateInfo->format,
+ pCreateInfo->usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ 0),
+ image->desc.Dimension = (D3D12_RESOURCE_DIMENSION)(D3D12_RESOURCE_DIMENSION_TEXTURE1D + pCreateInfo->imageType);
+ image->desc.Width = image->vk.extent.width;
+ image->desc.Height = image->vk.extent.height;
+ image->desc.DepthOrArraySize = pCreateInfo->imageType == VK_IMAGE_TYPE_3D ?
+ image->vk.extent.depth :
+ pCreateInfo->arrayLayers;
+ image->desc.MipLevels = pCreateInfo->mipLevels;
+ image->desc.SampleDesc.Count = pCreateInfo->samples;
+ image->desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
+ }
+
+ if (image->desc.SampleDesc.Count > 1)
+ image->desc.Alignment = D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT;
+ else
+ image->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
+
+ image->desc.SampleDesc.Quality = 0;
+
+ image->desc.Flags = D3D12_RESOURCE_FLAG_NONE;
+
+ if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
+ image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
+
+ if (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+ image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
+
+ if (!(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
+ image->desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
+ }
+
+ /* Images with TRANSFER_DST can be cleared or passed as a blit/resolve
+ * destination. Both operations require the RT or DS cap flags.
+ */
+ if ((image->vk.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) &&
+ image->vk.tiling == VK_IMAGE_TILING_OPTIMAL) {
+
+ D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info =
+ dzn_physical_device_get_format_support(pdev, pCreateInfo->format);
+ if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) {
+ image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
+ } else if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) {
+ image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
+ } else if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) {
+ image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+ }
+ }
+
+ if (image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT)
+ image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+
+ *out = dzn_image_to_handle(image);
+ return VK_SUCCESS;
+}
+
+DXGI_FORMAT
+dzn_image_get_dxgi_format(VkFormat format,
+ VkImageUsageFlags usage,
+ VkImageAspectFlags aspects)
+{
+ enum pipe_format pfmt = vk_format_to_pipe_format(format);
+
+ if (!vk_format_is_depth_or_stencil(format))
+ return dzn_pipe_to_dxgi_format(pfmt);
+
+ switch (pfmt) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ?
+ DXGI_FORMAT_D16_UNORM : DXGI_FORMAT_R16_UNORM;
+
+ case PIPE_FORMAT_Z32_FLOAT:
+ return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ?
+ DXGI_FORMAT_D32_FLOAT : DXGI_FORMAT_R32_FLOAT;
+
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ?
+ DXGI_FORMAT_D24_UNORM_S8_UINT : DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
+
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ if (usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)
+ return DXGI_FORMAT_D24_UNORM_S8_UINT;
+
+ if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
+ return DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
+ else
+ return DXGI_FORMAT_X24_TYPELESS_G8_UINT;
+
+ case PIPE_FORMAT_X24S8_UINT:
+ return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ?
+ DXGI_FORMAT_D24_UNORM_S8_UINT : DXGI_FORMAT_X24_TYPELESS_G8_UINT;
+
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ if (usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)
+ return DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
+
+ if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
+ return DXGI_FORMAT_X32_TYPELESS_G8X24_UINT;
+ else if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
+ return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
+ else
+ return DXGI_FORMAT_R32G8X24_TYPELESS;
+
+ default:
+ return dzn_pipe_to_dxgi_format(pfmt);
+ }
+}
+
+DXGI_FORMAT
+dzn_image_get_placed_footprint_format(VkFormat format,
+ VkImageAspectFlags aspect)
+{
+ DXGI_FORMAT out =
+ dzn_image_get_dxgi_format(format,
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+ aspect);
+
+ switch (out) {
+ case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
+ case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
+ return DXGI_FORMAT_R32_TYPELESS;
+ case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
+ case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
+ return DXGI_FORMAT_R8_TYPELESS;
+ default:
+ return out;
+ }
+}
+
+VkFormat
+dzn_image_get_plane_format(VkFormat format,
+ VkImageAspectFlags aspectMask)
+{
+ if (aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ return vk_format_stencil_only(format);
+ else if (aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
+ return vk_format_depth_only(format);
+ else
+ return format;
+}
+
+uint32_t
+dzn_image_layers_get_subresource_index(const dzn_image *image,
+ const VkImageSubresourceLayers *subres,
+ VkImageAspectFlagBits aspect,
+ uint32_t layer)
+{
+ int planeSlice =
+ aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0;
+
+ return subres->mipLevel +
+ ((subres->baseArrayLayer + layer) * image->desc.MipLevels) +
+ (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize);
+}
+
+uint32_t
+dzn_image_range_get_subresource_index(const dzn_image *image,
+ const VkImageSubresourceRange *subres,
+ VkImageAspectFlagBits aspect,
+ uint32_t level, uint32_t layer)
+{
+ int planeSlice =
+ aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0;
+
+ return subres->baseMipLevel + level +
+ ((subres->baseArrayLayer + layer) * image->desc.MipLevels) +
+ (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize);
+}
+
+uint32_t
+dzn_image_get_subresource_index(const dzn_image *image,
+ const VkImageSubresource *subres,
+ VkImageAspectFlagBits aspect)
+{
+ int planeSlice =
+ aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0;
+
+ return subres->mipLevel +
+ (subres->arrayLayer * image->desc.MipLevels) +
+ (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize);
+}
+
+D3D12_TEXTURE_COPY_LOCATION
+dzn_image_get_copy_loc(const dzn_image *image,
+ const VkImageSubresourceLayers *subres,
+ VkImageAspectFlagBits aspect,
+ uint32_t layer)
+{
+ D3D12_TEXTURE_COPY_LOCATION loc = {
+ .pResource = image->res,
+ };
+
+ assert((subres->aspectMask & aspect) != 0);
+ VkFormat format = dzn_image_get_plane_format(image->vk.format, aspect);
+
+ if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) {
+ VkImageUsageFlags usage =
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+
+ assert((subres->baseArrayLayer + layer) == 0);
+ assert(subres->mipLevel == 0);
+ loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+ loc.PlacedFootprint.Offset = 0;
+ loc.PlacedFootprint.Footprint.Format =
+ dzn_image_get_placed_footprint_format(image->vk.format, aspect);
+ loc.PlacedFootprint.Footprint.Width = image->vk.extent.width;
+ loc.PlacedFootprint.Footprint.Height = image->vk.extent.height;
+ loc.PlacedFootprint.Footprint.Depth = image->vk.extent.depth;
+ loc.PlacedFootprint.Footprint.RowPitch = image->linear.row_stride;
+ } else {
+ loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+ loc.SubresourceIndex = dzn_image_layers_get_subresource_index(image, subres, aspect, layer);
+ }
+
+ return loc;
+}
+
+D3D12_DEPTH_STENCIL_VIEW_DESC
+dzn_image_get_dsv_desc(const dzn_image *image,
+ const VkImageSubresourceRange *range,
+ uint32_t level)
+{
+ uint32_t layer_count = dzn_get_layer_count(image, range);
+ D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = {
+ .Format =
+ dzn_image_get_dxgi_format(image->vk.format,
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ range->aspectMask),
+ };
+
+ switch (image->vk.image_type) {
+ case VK_IMAGE_TYPE_1D:
+ dsv_desc.ViewDimension =
+ image->vk.array_layers > 1 ?
+ D3D12_DSV_DIMENSION_TEXTURE1DARRAY :
+ D3D12_DSV_DIMENSION_TEXTURE1D;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ if (image->vk.array_layers > 1) {
+ dsv_desc.ViewDimension =
+ image->vk.samples > 1 ?
+ D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY :
+ D3D12_DSV_DIMENSION_TEXTURE2DARRAY;
+ } else {
+ dsv_desc.ViewDimension =
+ image->vk.samples > 1 ?
+ D3D12_DSV_DIMENSION_TEXTURE2DMS :
+ D3D12_DSV_DIMENSION_TEXTURE2D;
+ }
+ break;
+ default:
+ unreachable("Invalid image type");
+ }
+
+ switch (dsv_desc.ViewDimension) {
+ case D3D12_DSV_DIMENSION_TEXTURE1D:
+ dsv_desc.Texture1D.MipSlice = range->baseMipLevel + level;
+ break;
+ case D3D12_DSV_DIMENSION_TEXTURE1DARRAY:
+ dsv_desc.Texture1DArray.MipSlice = range->baseMipLevel + level;
+ dsv_desc.Texture1DArray.FirstArraySlice = range->baseArrayLayer;
+ dsv_desc.Texture1DArray.ArraySize = layer_count;
+ break;
+ case D3D12_DSV_DIMENSION_TEXTURE2D:
+ dsv_desc.Texture2D.MipSlice = range->baseMipLevel + level;
+ break;
+ case D3D12_DSV_DIMENSION_TEXTURE2DMS:
+ break;
+ case D3D12_DSV_DIMENSION_TEXTURE2DARRAY:
+ dsv_desc.Texture2DArray.MipSlice = range->baseMipLevel + level;
+ dsv_desc.Texture2DArray.FirstArraySlice = range->baseArrayLayer;
+ dsv_desc.Texture2DArray.ArraySize = layer_count;
+ break;
+ }
+
+ return dsv_desc;
+}
+
+D3D12_RENDER_TARGET_VIEW_DESC
+dzn_image_get_rtv_desc(const dzn_image *image,
+ const VkImageSubresourceRange *range,
+ uint32_t level)
+{
+ uint32_t layer_count = dzn_get_layer_count(image, range);
+ D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {
+ .Format =
+ dzn_image_get_dxgi_format(image->vk.format,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_COLOR_BIT),
+ };
+
+ switch (image->vk.image_type) {
+ case VK_IMAGE_TYPE_1D:
+ rtv_desc.ViewDimension =
+ image->vk.array_layers > 1 ?
+ D3D12_RTV_DIMENSION_TEXTURE1DARRAY : D3D12_RTV_DIMENSION_TEXTURE1D;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ if (image->vk.array_layers > 1) {
+ rtv_desc.ViewDimension =
+ image->vk.samples > 1 ?
+ D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY :
+ D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
+ } else {
+ rtv_desc.ViewDimension =
+ image->vk.samples > 1 ?
+ D3D12_RTV_DIMENSION_TEXTURE2DMS :
+ D3D12_RTV_DIMENSION_TEXTURE2D;
+ }
+ break;
+ case VK_IMAGE_TYPE_3D:
+ rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D;
+ break;
+ default: unreachable("Invalid image type\n");
+ }
+
+ switch (rtv_desc.ViewDimension) {
+ case D3D12_RTV_DIMENSION_TEXTURE1D:
+ rtv_desc.Texture1D.MipSlice = range->baseMipLevel + level;
+ break;
+ case D3D12_RTV_DIMENSION_TEXTURE1DARRAY:
+ rtv_desc.Texture1DArray.MipSlice = range->baseMipLevel + level;
+ rtv_desc.Texture1DArray.FirstArraySlice = range->baseArrayLayer;
+ rtv_desc.Texture1DArray.ArraySize = layer_count;
+ break;
+ case D3D12_RTV_DIMENSION_TEXTURE2D:
+ rtv_desc.Texture2D.MipSlice = range->baseMipLevel + level;
+ if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_1_BIT)
+ rtv_desc.Texture2D.PlaneSlice = 1;
+ else if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_2_BIT)
+ rtv_desc.Texture2D.PlaneSlice = 2;
+ else
+ rtv_desc.Texture2D.PlaneSlice = 0;
+ break;
+ case D3D12_RTV_DIMENSION_TEXTURE2DMS:
+ break;
+ case D3D12_RTV_DIMENSION_TEXTURE2DARRAY:
+ rtv_desc.Texture2DArray.MipSlice = range->baseMipLevel + level;
+ rtv_desc.Texture2DArray.FirstArraySlice = range->baseArrayLayer;
+ rtv_desc.Texture2DArray.ArraySize = layer_count;
+ if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_1_BIT)
+ rtv_desc.Texture2DArray.PlaneSlice = 1;
+ else if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_2_BIT)
+ rtv_desc.Texture2DArray.PlaneSlice = 2;
+ else
+ rtv_desc.Texture2DArray.PlaneSlice = 0;
+ break;
+ case D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY:
+ rtv_desc.Texture2DMSArray.FirstArraySlice = range->baseArrayLayer;
+ rtv_desc.Texture2DMSArray.ArraySize = layer_count;
+ break;
+ case D3D12_RTV_DIMENSION_TEXTURE3D:
+ rtv_desc.Texture3D.MipSlice = range->baseMipLevel + level;
+ rtv_desc.Texture3D.FirstWSlice = range->baseArrayLayer;
+ rtv_desc.Texture3D.WSize = layer_count;
+ break;
+ }
+
+ return rtv_desc;
+}
+
+D3D12_RESOURCE_STATES
+dzn_image_layout_to_state(VkImageLayout layout)
+{
+ switch (layout) {
+ case VK_IMAGE_LAYOUT_PREINITIALIZED:
+ case VK_IMAGE_LAYOUT_UNDEFINED:
+ case VK_IMAGE_LAYOUT_GENERAL:
+ /* YOLO! */
+ case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
+ return D3D12_RESOURCE_STATE_COMMON;
+
+ case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+ return D3D12_RESOURCE_STATE_COPY_DEST;
+
+ case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+ return D3D12_RESOURCE_STATE_COPY_SOURCE;
+
+ case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ return D3D12_RESOURCE_STATE_RENDER_TARGET;
+
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
+ return D3D12_RESOURCE_STATE_DEPTH_WRITE;
+
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
+ return D3D12_RESOURCE_STATE_DEPTH_READ;
+
+ case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ return D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE;
+
+ default:
+ unreachable("not implemented");
+ }
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateImage(VkDevice device,
+ const VkImageCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkImage *pImage)
+{
+ return dzn_image_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pImage);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyImage(VkDevice device, VkImage image,
+ const VkAllocationCallbacks *pAllocator)
+{
+ dzn_image_destroy(dzn_image_from_handle(image), pAllocator);
+}
+
+static dzn_image *
+dzn_swapchain_get_image(dzn_device *device,
+ VkSwapchainKHR swapchain,
+ uint32_t index)
+{
+ uint32_t n_images = index + 1;
+ STACK_ARRAY(VkImage, images, n_images);
+ dzn_image *image = NULL;
+
+ VkResult result = wsi_common_get_images(swapchain, &n_images, images);
+
+ if (result == VK_SUCCESS || result == VK_INCOMPLETE)
+ image = dzn_image_from_handle(images[index]);
+
+ STACK_ARRAY_FINISH(images);
+ return image;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_BindImageMemory2(VkDevice dev,
+ uint32_t bindInfoCount,
+ const VkBindImageMemoryInfo *pBindInfos)
+{
+ VK_FROM_HANDLE(dzn_device, device, dev);
+
+ for (uint32_t i = 0; i < bindInfoCount; i++) {
+ const VkBindImageMemoryInfo *bind_info = &pBindInfos[i];
+ VK_FROM_HANDLE(dzn_device_memory, mem, bind_info->memory);
+ VK_FROM_HANDLE(dzn_image, image, bind_info->image);
+ bool did_bind = false;
+
+ vk_foreach_struct_const(s, bind_info->pNext) {
+ switch (s->sType) {
+ case VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR: {
+ const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
+ (const VkBindImageMemorySwapchainInfoKHR *) s;
+ dzn_image *swapchain_image =
+ dzn_swapchain_get_image(device,
+ swapchain_info->swapchain,
+ swapchain_info->imageIndex);
+ assert(swapchain_image);
+ assert(image->vk.aspects == swapchain_image->vk.aspects);
+ assert(mem == NULL);
+
+ /* TODO: something something binding the image memory */
+ assert(false);
+
+ did_bind = true;
+ break;
+ }
+ default:
+ dzn_debug_ignored_stype(s->sType);
+ break;
+ }
+ }
+
+ if (!did_bind) {
+ image->mem = mem;
+ image->mem_offset = bind_info->memoryOffset;
+ if (FAILED(device->dev->CreatePlacedResource(mem->heap,
+ bind_info->memoryOffset,
+ &image->desc,
+ mem->initial_state,
+ NULL, IID_PPV_ARGS(&image->res))))
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ did_bind = true;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetImageMemoryRequirements2(VkDevice _device,
+ const VkImageMemoryRequirementsInfo2 *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
+{
+ VK_FROM_HANDLE(dzn_device, device, _device);
+ VK_FROM_HANDLE(dzn_image, image, pInfo->image);
+ dzn_physical_device *pdev =
+ container_of(device->vk.physical, dzn_physical_device, vk);
+
+ vk_foreach_struct_const(ext, pInfo->pNext) {
+ dzn_debug_ignored_stype(ext->sType);
+ }
+
+ vk_foreach_struct(ext, pMemoryRequirements->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+ VkMemoryDedicatedRequirements *requirements =
+ (VkMemoryDedicatedRequirements *)ext;
+ /* TODO: figure out dedicated allocations */
+ requirements->prefersDedicatedAllocation = false;
+ requirements->requiresDedicatedAllocation = false;
+ break;
+ }
+
+ default:
+ dzn_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+
+ D3D12_RESOURCE_ALLOCATION_INFO info = device->dev->GetResourceAllocationInfo(0, 1, &image->desc);
+
+ pMemoryRequirements->memoryRequirements = VkMemoryRequirements {
+ .size = info.SizeInBytes,
+ .alignment = info.Alignment,
+ .memoryTypeBits =
+ dzn_physical_device_get_mem_type_mask_for_resource(pdev, &image->desc),
+ };
+
+ /*
+ * MSAA images need memory to be aligned on
+ * D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT (4MB), but the memory
+ * allocation function doesn't know what the memory will be used for,
+ * and forcing all allocations to be 4MB-aligned has a cost, so let's
+ * force MSAA resources to be at least 4MB, such that the allocation
+ * logic can consider sub-4MB allocations to not require this 4MB alignment.
+ */
+ if (image->vk.samples > 1 &&
+ pMemoryRequirements->memoryRequirements.size < D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT)
+ pMemoryRequirements->memoryRequirements.size = D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetImageSubresourceLayout(VkDevice _device,
+ VkImage _image,
+ const VkImageSubresource *subresource,
+ VkSubresourceLayout *layout)
+{
+ VK_FROM_HANDLE(dzn_device, device, _device);
+ VK_FROM_HANDLE(dzn_image, image, _image);
+
+ if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) {
+ assert(subresource->arrayLayer == 0);
+ assert(subresource->mipLevel == 0);
+ layout->offset = 0;
+ layout->rowPitch = image->linear.row_stride;
+ layout->depthPitch = 0;
+ layout->arrayPitch = 0;
+ layout->size = image->linear.size;
+ } else {
+ UINT subres_index =
+ dzn_image_get_subresource_index(image, subresource,
+ (VkImageAspectFlagBits)subresource->aspectMask);
+ D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint;
+ UINT num_rows;
+ UINT64 row_size, total_size;
+ device->dev->GetCopyableFootprints(&image->desc,
+ subres_index, 1,
+ 0, // base-offset?
+ &footprint,
+ &num_rows, &row_size,
+ &total_size);
+
+ layout->offset = footprint.Offset;
+ layout->rowPitch = footprint.Footprint.RowPitch;
+ layout->depthPitch = layout->rowPitch * footprint.Footprint.Height;
+ layout->arrayPitch = layout->depthPitch; // uuuh... why is this even here?
+ layout->size = total_size;
+ }
+}
+
+static D3D12_SHADER_COMPONENT_MAPPING
+translate_swizzle(VkComponentSwizzle in, uint32_t comp)
+{
+ switch (in) {
+ case VK_COMPONENT_SWIZZLE_IDENTITY:
+ return (D3D12_SHADER_COMPONENT_MAPPING)
+ (comp + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0);
+ case VK_COMPONENT_SWIZZLE_ZERO:
+ return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0;
+ case VK_COMPONENT_SWIZZLE_ONE:
+ return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1;
+ case VK_COMPONENT_SWIZZLE_R:
+ return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0;
+ case VK_COMPONENT_SWIZZLE_G:
+ return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1;
+ case VK_COMPONENT_SWIZZLE_B:
+ return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2;
+ case VK_COMPONENT_SWIZZLE_A:
+ return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3;
+ default: unreachable("Invalid swizzle");
+ }
+}
+
+static void
+dzn_image_view_prepare_srv_desc(dzn_image_view *iview)
+{
+ uint32_t plane_slice = (iview->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
+ bool ms = iview->vk.image->samples > 1;
+ uint32_t layers_per_elem =
+ (iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE ||
+ iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) ?
+ 6 : 1;
+ bool use_array = (iview->vk.base_array_layer / layers_per_elem) > 0 ||
+ (iview->vk.layer_count / layers_per_elem) > 1;
+
+ iview->srv_desc = D3D12_SHADER_RESOURCE_VIEW_DESC {
+ .Format =
+ dzn_image_get_dxgi_format(iview->vk.format,
+ iview->vk.image->usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ iview->vk.aspects),
+ };
+
+ D3D12_SHADER_COMPONENT_MAPPING swz[] = {
+ translate_swizzle(iview->vk.swizzle.r, 0),
+ translate_swizzle(iview->vk.swizzle.g, 1),
+ translate_swizzle(iview->vk.swizzle.b, 2),
+ translate_swizzle(iview->vk.swizzle.a, 3),
+ };
+
+ /* Swap components to fake B4G4R4A4 support. */
+ if (iview->vk.format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
+ static const D3D12_SHADER_COMPONENT_MAPPING bgra4_remap[] = {
+ D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1,
+ D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0,
+ D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3,
+ D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2,
+ D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
+ D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1,
+ };
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(swz); i++)
+ swz[i] = bgra4_remap[swz[i]];
+ }
+
+ iview->srv_desc.Shader4ComponentMapping =
+ D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(swz[0], swz[1], swz[2], swz[3]);
+
+ switch (iview->vk.view_type) {
+ case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
+ case VK_IMAGE_VIEW_TYPE_1D:
+ if (use_array) {
+ iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY;
+ iview->srv_desc.Texture1DArray.MostDetailedMip = iview->vk.base_mip_level;
+ iview->srv_desc.Texture1DArray.MipLevels = iview->vk.level_count;
+ iview->srv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->srv_desc.Texture1DArray.ArraySize = iview->vk.layer_count;
+ } else {
+ iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D;
+ iview->srv_desc.Texture1D.MostDetailedMip = iview->vk.base_mip_level;
+ iview->srv_desc.Texture1D.MipLevels = iview->vk.level_count;
+ }
+ break;
+
+ case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
+ case VK_IMAGE_VIEW_TYPE_2D:
+ if (use_array && ms) {
+ iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY;
+ iview->srv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->srv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count;
+ } else if (use_array && !ms) {
+ iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
+ iview->srv_desc.Texture2DArray.MostDetailedMip = iview->vk.base_mip_level;
+ iview->srv_desc.Texture2DArray.MipLevels = iview->vk.level_count;
+ iview->srv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->srv_desc.Texture2DArray.ArraySize = iview->vk.layer_count;
+ iview->srv_desc.Texture2DArray.PlaneSlice = plane_slice;
+ } else if (!use_array && ms) {
+ iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS;
+ } else {
+ iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+ iview->srv_desc.Texture2D.MostDetailedMip = iview->vk.base_mip_level;
+ iview->srv_desc.Texture2D.MipLevels = iview->vk.level_count;
+ iview->srv_desc.Texture2D.PlaneSlice = plane_slice;
+ }
+ break;
+
+ case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
+ case VK_IMAGE_VIEW_TYPE_CUBE:
+ if (use_array) {
+ iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY;
+ iview->srv_desc.TextureCubeArray.MostDetailedMip = iview->vk.base_mip_level;
+ iview->srv_desc.TextureCubeArray.MipLevels = iview->vk.level_count;
+ iview->srv_desc.TextureCubeArray.First2DArrayFace = iview->vk.base_array_layer;
+ iview->srv_desc.TextureCubeArray.NumCubes = iview->vk.layer_count / 6;
+ } else {
+ iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
+ iview->srv_desc.TextureCube.MostDetailedMip = iview->vk.base_mip_level;
+ iview->srv_desc.TextureCube.MipLevels = iview->vk.level_count;
+ }
+ break;
+
+ case VK_IMAGE_VIEW_TYPE_3D:
+ iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
+ iview->srv_desc.Texture3D.MostDetailedMip = iview->vk.base_mip_level;
+ iview->srv_desc.Texture3D.MipLevels = iview->vk.level_count;
+ break;
+
+ default: unreachable("Invalid view type");
+ }
+}
+
+static void
+dzn_image_view_prepare_uav_desc(dzn_image_view *iview)
+{
+ bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1;
+
+ assert(iview->vk.image->samples == 1);
+
+ iview->uav_desc = D3D12_UNORDERED_ACCESS_VIEW_DESC {
+ .Format =
+ dzn_image_get_dxgi_format(iview->vk.format,
+ VK_IMAGE_USAGE_STORAGE_BIT,
+ iview->vk.aspects),
+ };
+
+ switch (iview->vk.view_type) {
+ case VK_IMAGE_VIEW_TYPE_1D:
+ case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
+ if (use_array) {
+ iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1DARRAY;
+ iview->uav_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level;
+ iview->uav_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->uav_desc.Texture1DArray.ArraySize = iview->vk.layer_count;
+ } else {
+ iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D;
+ iview->uav_desc.Texture1D.MipSlice = iview->vk.base_mip_level;
+ }
+ break;
+
+ case VK_IMAGE_VIEW_TYPE_2D:
+ case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
+ case VK_IMAGE_VIEW_TYPE_CUBE:
+ case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
+ if (use_array) {
+ iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
+ iview->uav_desc.Texture2DArray.PlaneSlice = 0;
+ iview->uav_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level;
+ iview->uav_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->uav_desc.Texture2DArray.ArraySize = iview->vk.layer_count;
+ } else {
+ iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
+ iview->uav_desc.Texture2D.MipSlice = iview->vk.base_mip_level;
+ iview->uav_desc.Texture2D.PlaneSlice = 0;
+ }
+ break;
+ case VK_IMAGE_VIEW_TYPE_3D:
+ iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
+ iview->uav_desc.Texture3D.MipSlice = iview->vk.base_mip_level;
+ iview->uav_desc.Texture3D.FirstWSlice = 0;
+ iview->uav_desc.Texture3D.WSize = iview->vk.extent.depth;
+ break;
+ default: unreachable("Invalid type");
+ }
+}
+
+static void
+dzn_image_view_prepare_rtv_desc(dzn_image_view *iview)
+{
+ bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1;
+ bool ms = iview->vk.image->samples > 1;
+ uint32_t plane_slice =
+ (iview->vk.aspects & VK_IMAGE_ASPECT_PLANE_2_BIT) ? 2 :
+ (iview->vk.aspects & VK_IMAGE_ASPECT_PLANE_1_BIT) ? 1 : 0;
+
+ assert(iview->vk.level_count == 1);
+
+ iview->rtv_desc = D3D12_RENDER_TARGET_VIEW_DESC {
+ .Format =
+ dzn_image_get_dxgi_format(iview->vk.format,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ iview->vk.aspects),
+ };
+
+ switch (iview->vk.view_type) {
+ case VK_IMAGE_VIEW_TYPE_1D:
+ case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
+ if (use_array) {
+ iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1DARRAY;
+ iview->rtv_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level;
+ iview->rtv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->rtv_desc.Texture1DArray.ArraySize = iview->vk.layer_count;
+ } else {
+ iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D;
+ iview->rtv_desc.Texture1D.MipSlice = iview->vk.base_mip_level;
+ }
+ break;
+
+ case VK_IMAGE_VIEW_TYPE_2D:
+ case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
+ case VK_IMAGE_VIEW_TYPE_CUBE:
+ case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
+ if (use_array && ms) {
+ iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY;
+ iview->rtv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->rtv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count;
+ } else if (use_array && !ms) {
+ iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
+ iview->rtv_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level;
+ iview->rtv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->rtv_desc.Texture2DArray.ArraySize = iview->vk.layer_count;
+ iview->rtv_desc.Texture2DArray.PlaneSlice = plane_slice;
+ } else if (!use_array && ms) {
+ iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS;
+ } else {
+ iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
+ iview->rtv_desc.Texture2D.MipSlice = iview->vk.base_mip_level;
+ iview->rtv_desc.Texture2D.PlaneSlice = plane_slice;
+ }
+ break;
+
+ case VK_IMAGE_VIEW_TYPE_3D:
+ iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D;
+ iview->rtv_desc.Texture3D.MipSlice = iview->vk.base_mip_level;
+ iview->rtv_desc.Texture3D.FirstWSlice = 0;
+ iview->rtv_desc.Texture3D.WSize = iview->vk.extent.depth;
+ break;
+
+ default: unreachable("Invalid view type");
+ }
+}
+
+static void
+dzn_image_view_prepare_dsv_desc(dzn_image_view *iview)
+{
+ bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1;
+ bool ms = iview->vk.image->samples > 1;
+
+ iview->dsv_desc = D3D12_DEPTH_STENCIL_VIEW_DESC {
+ .Format =
+ dzn_image_get_dxgi_format(iview->vk.format,
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ iview->vk.aspects),
+ };
+
+ switch (iview->vk.view_type) {
+ case VK_IMAGE_VIEW_TYPE_1D:
+ case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
+ if (use_array) {
+ iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1DARRAY;
+ iview->dsv_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level;
+ iview->dsv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->dsv_desc.Texture1DArray.ArraySize = iview->vk.layer_count;
+ } else {
+ iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D;
+ iview->dsv_desc.Texture1D.MipSlice = iview->vk.base_mip_level;
+ }
+ break;
+
+ case VK_IMAGE_VIEW_TYPE_2D:
+ case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
+ case VK_IMAGE_VIEW_TYPE_CUBE:
+ case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
+ if (use_array && ms) {
+ iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY;
+ iview->dsv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->dsv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count;
+ } else if (use_array && !ms) {
+ iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY;
+ iview->dsv_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level;
+ iview->dsv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer;
+ iview->dsv_desc.Texture2DArray.ArraySize = iview->vk.layer_count;
+ } else if (!use_array && ms) {
+ iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMS;
+ } else {
+ iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
+ iview->dsv_desc.Texture2D.MipSlice = iview->vk.base_mip_level;
+ }
+ break;
+
+ default: unreachable("Invalid view type");
+ }
+}
+
+void
+dzn_image_view_finish(dzn_image_view *iview)
+{
+ vk_image_view_finish(&iview->vk);
+}
+
+void
+dzn_image_view_init(dzn_device *device,
+ dzn_image_view *iview,
+ const VkImageViewCreateInfo *pCreateInfo)
+{
+ VK_FROM_HANDLE(dzn_image, image, pCreateInfo->image);
+
+ const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
+ uint32_t level_count = dzn_get_level_count(image, range);
+ uint32_t layer_count = dzn_get_layer_count(image, range);
+ uint32_t plane_slice =
+ pCreateInfo->subresourceRange.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0;
+
+ vk_image_view_init(&device->vk, &iview->vk, pCreateInfo);
+
+ assert(layer_count > 0);
+ assert(range->baseMipLevel < image->vk.mip_levels);
+
+ /* View usage should be a subset of image usage */
+ assert(image->vk.usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT |
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT));
+
+ switch (image->vk.image_type) {
+ default:
+ unreachable("bad VkImageType");
+ case VK_IMAGE_TYPE_1D:
+ case VK_IMAGE_TYPE_2D:
+ assert(range->baseArrayLayer + dzn_get_layer_count(image, range) - 1 <= image->vk.array_layers);
+ break;
+ case VK_IMAGE_TYPE_3D:
+ assert(range->baseArrayLayer + dzn_get_layer_count(image, range) - 1
+ <= u_minify(image->vk.extent.depth, range->baseMipLevel));
+ break;
+ }
+
+ dzn_image_view_prepare_srv_desc(iview);
+
+ if (image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT)
+ dzn_image_view_prepare_uav_desc(iview);
+
+ if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
+ dzn_image_view_prepare_rtv_desc(iview);
+
+ if (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)
+ dzn_image_view_prepare_dsv_desc(iview);
+}
+
+static void
+dzn_image_view_destroy(dzn_image_view *iview,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (!iview)
+ return;
+
+ dzn_device *device = container_of(iview->vk.base.device, dzn_device, vk);
+
+ vk_image_view_finish(&iview->vk);
+ vk_free2(&device->vk.alloc, pAllocator, iview);
+}
+
+static VkResult
+dzn_image_view_create(dzn_device *device,
+ const VkImageViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkImageView *out)
+{
+ dzn_image_view *iview = (dzn_image_view *)
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*iview), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!iview)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ dzn_image_view_init(device, iview, pCreateInfo);
+
+ *out = dzn_image_view_to_handle(iview);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateImageView(VkDevice device,
+ const VkImageViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkImageView *pView)
+{
+ return dzn_image_view_create(dzn_device_from_handle(device), pCreateInfo,
+ pAllocator, pView);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyImageView(VkDevice device,
+ VkImageView imageView,
+ const VkAllocationCallbacks *pAllocator)
+{
+ dzn_image_view_destroy(dzn_image_view_from_handle(imageView), pAllocator);
+}
+
+static void
+dzn_buffer_view_destroy(dzn_buffer_view *bview,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (!bview)
+ return;
+
+ dzn_device *device = container_of(bview->base.device, dzn_device, vk);
+
+ vk_object_base_finish(&bview->base);
+ vk_free2(&device->vk.alloc, pAllocator, bview);
+}
+
+static VkResult
+dzn_buffer_view_create(dzn_device *device,
+ const VkBufferViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkBufferView *out)
+{
+ VK_FROM_HANDLE(dzn_buffer, buf, pCreateInfo->buffer);
+
+ dzn_buffer_view *bview = (dzn_buffer_view *)
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*bview), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!bview)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &bview->base, VK_OBJECT_TYPE_BUFFER_VIEW);
+
+ enum pipe_format pfmt = vk_format_to_pipe_format(pCreateInfo->format);
+ unsigned blksz = util_format_get_blocksize(pfmt);
+ VkDeviceSize size =
+ pCreateInfo->range == VK_WHOLE_SIZE ?
+ buf->size - pCreateInfo->offset : pCreateInfo->range;
+
+ bview->buffer = buf;
+ if (buf->usage &
+ (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) {
+ bview->srv_desc = D3D12_SHADER_RESOURCE_VIEW_DESC {
+ .Format = dzn_buffer_get_dxgi_format(pCreateInfo->format),
+ .ViewDimension = D3D12_SRV_DIMENSION_BUFFER,
+ .Shader4ComponentMapping =
+ D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
+ .Buffer = {
+ .FirstElement = pCreateInfo->offset / blksz,
+ .NumElements = UINT(size / blksz),
+ .Flags = D3D12_BUFFER_SRV_FLAG_NONE,
+ },
+ };
+ }
+
+ if (buf->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) {
+ bview->uav_desc = D3D12_UNORDERED_ACCESS_VIEW_DESC {
+ .Format = dzn_buffer_get_dxgi_format(pCreateInfo->format),
+ .ViewDimension = D3D12_UAV_DIMENSION_BUFFER,
+ .Buffer = {
+ .FirstElement = pCreateInfo->offset / blksz,
+ .NumElements = UINT(size / blksz),
+ .Flags = D3D12_BUFFER_UAV_FLAG_NONE,
+ },
+ };
+ }
+
+ *out = dzn_buffer_view_to_handle(bview);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateBufferView(VkDevice device,
+ const VkBufferViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkBufferView *pView)
+{
+ return dzn_buffer_view_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pView);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyBufferView(VkDevice device,
+ VkBufferView bufferView,
+ const VkAllocationCallbacks *pAllocator)
+{
+ dzn_buffer_view_destroy(dzn_buffer_view_from_handle(bufferView), pAllocator);
+}
diff --git a/src/microsoft/vulkan/dzn_meta.cpp b/src/microsoft/vulkan/dzn_meta.cpp
new file mode 100644
index 00000000000..088bc6c5adc
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_meta.cpp
@@ -0,0 +1,744 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include "spirv_to_dxil.h"
+#include "nir_to_dxil.h"
+
+#include "dxil_nir.h"
+#include "dxil_nir_lower_int_samplers.h"
+
+static void
+dzn_meta_compile_shader(dzn_device *device, nir_shader *nir,
+ D3D12_SHADER_BYTECODE *slot)
+{
+ dzn_instance *instance =
+ container_of(device->vk.physical->instance, dzn_instance, vk);
+ IDxcValidator *validator = instance->dxc.validator;
+ IDxcLibrary *library = instance->dxc.library;
+ IDxcCompiler *compiler = instance->dxc.compiler;
+
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+ if ((instance->debug_flags & DZN_DEBUG_NIR) &&
+ (instance->debug_flags & DZN_DEBUG_INTERNAL))
+ nir_print_shader(nir, stderr);
+
+ struct nir_to_dxil_options opts = { .environment = DXIL_ENVIRONMENT_VULKAN };
+ struct blob dxil_blob;
+ bool ret = nir_to_dxil(nir, &opts, &dxil_blob);
+ assert(ret);
+
+ dzn_shader_blob blob(dxil_blob.data, dxil_blob.size);
+ ComPtr<IDxcOperationResult> result;
+ validator->Validate(&blob, DxcValidatorFlags_InPlaceEdit, &result);
+ if ((instance->debug_flags & DZN_DEBUG_DXIL) &&
+ (instance->debug_flags & DZN_DEBUG_INTERNAL)) {
+ IDxcBlobEncoding *disassembly;
+ compiler->Disassemble(&blob, &disassembly);
+ ComPtr<IDxcBlobEncoding> blobUtf8;
+ library->GetBlobAsUtf8(disassembly, blobUtf8.GetAddressOf());
+ char *disasm = reinterpret_cast<char*>(blobUtf8->GetBufferPointer());
+ disasm[blobUtf8->GetBufferSize() - 1] = 0;
+ fprintf(stderr,
+ "== BEGIN SHADER ============================================\n"
+ "%s\n"
+ "== END SHADER ==============================================\n",
+ disasm);
+ disassembly->Release();
+ }
+
+ HRESULT validationStatus;
+ result->GetStatus(&validationStatus);
+ if (FAILED(validationStatus)) {
+ if ((instance->debug_flags & DZN_DEBUG_DXIL) &&
+ (instance->debug_flags & DZN_DEBUG_INTERNAL)) {
+ ComPtr<IDxcBlobEncoding> printBlob, printBlobUtf8;
+ result->GetErrorBuffer(&printBlob);
+ library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf());
+
+ char *errorString;
+ if (printBlobUtf8) {
+ errorString = reinterpret_cast<char*>(printBlobUtf8->GetBufferPointer());
+ errorString[printBlobUtf8->GetBufferSize() - 1] = 0;
+ fprintf(stderr,
+ "== VALIDATION ERROR =============================================\n"
+ "%s\n"
+ "== END ==========================================================\n",
+ errorString);
+ }
+ }
+ }
+ assert(!FAILED(validationStatus));
+
+ void *data;
+ size_t size;
+ blob_finish_get_buffer(&dxil_blob, &data, &size);
+ slot->pShaderBytecode = data;
+ slot->BytecodeLength = size;
+}
+
+#define DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT 4
+
+static void
+dzn_meta_indirect_draw_finish(dzn_device *device, enum dzn_indirect_draw_type type)
+{
+ dzn_meta_indirect_draw *meta = &device->indirect_draws[type];
+
+ if (meta->root_sig)
+ meta->root_sig->Release();
+
+ if (meta->pipeline_state)
+ meta->pipeline_state->Release();
+}
+
+static VkResult
+dzn_meta_indirect_draw_init(dzn_device *device,
+ enum dzn_indirect_draw_type type)
+{
+ dzn_meta_indirect_draw *meta = &device->indirect_draws[type];
+ dzn_instance *instance =
+ container_of(device->vk.physical->instance, dzn_instance, vk);
+ VkResult ret = VK_SUCCESS;
+
+ glsl_type_singleton_init_or_ref();
+
+ nir_shader *nir = dzn_nir_indirect_draw_shader(type);
+ bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN ||
+ type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
+ uint32_t shader_params_size =
+ triangle_fan ?
+ sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) :
+ sizeof(struct dzn_indirect_draw_rewrite_params);
+
+ uint32_t root_param_count = 0;
+ D3D12_ROOT_PARAMETER1 root_params[DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT];
+
+ root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 {
+ .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
+ .Constants = {
+ .ShaderRegister = 0,
+ .RegisterSpace = 0,
+ .Num32BitValues = shader_params_size / 4,
+ },
+ .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
+ };
+
+ root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 {
+ .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV,
+ .Descriptor = {
+ .ShaderRegister = 1,
+ .RegisterSpace = 0,
+ .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
+ },
+ .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
+ };
+
+ root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 {
+ .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
+ .Descriptor = {
+ .ShaderRegister = 2,
+ .RegisterSpace = 0,
+ .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
+ },
+ .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
+ };
+
+
+ if (triangle_fan) {
+ root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 {
+ .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
+ .Descriptor = {
+ .ShaderRegister = 3,
+ .RegisterSpace = 0,
+ .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
+ },
+ .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
+ };
+ }
+
+ assert(root_param_count <= ARRAY_SIZE(root_params));
+
+ D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
+ .Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
+ .Desc_1_1 = {
+ .NumParameters = root_param_count,
+ .pParameters = root_params,
+ .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
+ },
+ };
+
+ D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {
+ .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
+ };
+
+ meta->root_sig =
+ dzn_device_create_root_sig(device, &root_sig_desc);
+ if (!meta->root_sig) {
+ ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
+ goto out;
+ }
+
+ desc.pRootSignature = meta->root_sig;
+ dzn_meta_compile_shader(device, nir, &desc.CS);
+ assert(desc.CS.pShaderBytecode);
+
+ if (FAILED(device->dev->CreateComputePipelineState(&desc,
+ IID_PPV_ARGS(&meta->pipeline_state))))
+ ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
+
+out:
+ if (ret != VK_SUCCESS)
+ dzn_meta_indirect_draw_finish(device, type);
+
+ free((void *)desc.CS.pShaderBytecode);
+ ralloc_free(nir);
+ glsl_type_singleton_decref();
+
+ return ret;
+}
+
+#define DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT 3
+
+static void
+dzn_meta_triangle_fan_rewrite_index_finish(dzn_device *device,
+ enum dzn_index_type old_index_type)
+{
+ dzn_meta_triangle_fan_rewrite_index *meta =
+ &device->triangle_fan[old_index_type];
+
+ if (meta->root_sig)
+ meta->root_sig->Release();
+ if (meta->pipeline_state)
+ meta->pipeline_state->Release();
+ if (meta->cmd_sig)
+ meta->cmd_sig->Release();
+}
+
+static VkResult
+dzn_meta_triangle_fan_rewrite_index_init(dzn_device *device,
+ enum dzn_index_type old_index_type)
+{
+ dzn_meta_triangle_fan_rewrite_index *meta =
+ &device->triangle_fan[old_index_type];
+ dzn_instance *instance =
+ container_of(device->vk.physical->instance, dzn_instance, vk);
+ VkResult ret = VK_SUCCESS;
+
+ glsl_type_singleton_init_or_ref();
+
+ uint8_t old_index_size = dzn_index_size(old_index_type);
+
+ nir_shader *nir = dzn_nir_triangle_fan_rewrite_index_shader(old_index_size);
+
+ uint32_t root_param_count = 0;
+ D3D12_ROOT_PARAMETER1 root_params[DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT];
+
+ root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 {
+ .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
+ .Descriptor = {
+ .ShaderRegister = 1,
+ .RegisterSpace = 0,
+ .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
+ },
+ .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
+ };
+
+ root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 {
+ .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
+ .Constants = {
+ .ShaderRegister = 0,
+ .RegisterSpace = 0,
+ .Num32BitValues = sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4,
+ },
+ .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
+ };
+
+ if (old_index_type != DZN_NO_INDEX) {
+ root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 {
+ .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV,
+ .Descriptor = {
+ .ShaderRegister = 2,
+ .RegisterSpace = 0,
+ .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
+ },
+ .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
+ };
+ }
+
+ assert(root_param_count <= ARRAY_SIZE(root_params));
+
+ D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
+ .Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
+ .Desc_1_1 = {
+ .NumParameters = root_param_count,
+ .pParameters = root_params,
+ .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
+ },
+ };
+
+ D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {
+ .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
+ };
+
+ D3D12_INDIRECT_ARGUMENT_DESC cmd_args[] = {
+ {
+ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW,
+ .UnorderedAccessView = {
+ .RootParameterIndex = 0,
+ },
+ },
+ {
+ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
+ .Constant = {
+ .RootParameterIndex = 1,
+ .DestOffsetIn32BitValues = 0,
+ .Num32BitValuesToSet = sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4,
+ },
+ },
+ {
+ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
+ },
+ };
+
+ D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
+ .ByteStride = sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params),
+ .NumArgumentDescs = ARRAY_SIZE(cmd_args),
+ .pArgumentDescs = cmd_args,
+ };
+
+ assert((cmd_sig_desc.ByteStride & 7) == 0);
+
+ meta->root_sig = dzn_device_create_root_sig(device, &root_sig_desc);
+ if (!meta->root_sig) {
+ ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
+ goto out;
+ }
+
+
+ desc.pRootSignature = meta->root_sig;
+ dzn_meta_compile_shader(device, nir, &desc.CS);
+
+ if (FAILED(device->dev->CreateComputePipelineState(&desc,
+ IID_PPV_ARGS(&meta->pipeline_state)))) {
+ ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
+ goto out;
+ }
+
+ if (FAILED(device->dev->CreateCommandSignature(&cmd_sig_desc,
+ meta->root_sig,
+ IID_PPV_ARGS(&meta->cmd_sig))))
+ ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
+
+out:
+ if (ret != VK_SUCCESS)
+ dzn_meta_triangle_fan_rewrite_index_finish(device, old_index_type);
+
+ free((void *)desc.CS.pShaderBytecode);
+ ralloc_free(nir);
+ glsl_type_singleton_decref();
+
+ return ret;
+}
+
+static const D3D12_SHADER_BYTECODE *
+dzn_meta_blits_get_vs(dzn_device *device)
+{
+ dzn_meta_blits *meta = &device->blits;
+ D3D12_SHADER_BYTECODE *out;
+
+ mtx_lock(&meta->shaders_lock);
+
+ if (meta->vs.pShaderBytecode == NULL) {
+ nir_shader *nir = dzn_nir_blit_vs();
+
+ NIR_PASS_V(nir, nir_lower_system_values);
+
+ gl_system_value system_values[] = {
+ SYSTEM_VALUE_FIRST_VERTEX,
+ SYSTEM_VALUE_BASE_VERTEX,
+ };
+
+ NIR_PASS_V(nir, dxil_nir_lower_system_values_to_zero, system_values,
+ ARRAY_SIZE(system_values));
+
+ D3D12_SHADER_BYTECODE bc;
+
+ dzn_meta_compile_shader(device, nir, &bc);
+ meta->vs.pShaderBytecode =
+ vk_alloc(&device->vk.alloc, bc.BytecodeLength, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (meta->vs.pShaderBytecode) {
+ meta->vs.BytecodeLength = bc.BytecodeLength;
+ memcpy((void *)meta->vs.pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength);
+ out = &meta->vs;
+ }
+ free((void *)bc.pShaderBytecode);
+ ralloc_free(nir);
+ } else {
+ out = &meta->vs;
+ }
+
+ mtx_unlock(&meta->shaders_lock);
+
+ return &meta->vs;
+}
+
+static const D3D12_SHADER_BYTECODE *
+dzn_meta_blits_get_fs(dzn_device *device,
+ const struct dzn_nir_blit_info *info)
+{
+ dzn_meta_blits *meta = &device->blits;
+ D3D12_SHADER_BYTECODE *out = NULL;
+
+ mtx_lock(&meta->shaders_lock);
+
+ STATIC_ASSERT(sizeof(struct dzn_nir_blit_info) == sizeof(uint32_t));
+
+ struct hash_entry *he =
+ _mesa_hash_table_search(meta->fs, (void *)(uintptr_t)info->hash_key);
+
+ if (!he) {
+ nir_shader *nir = dzn_nir_blit_fs(info);
+
+ if (info->out_type != GLSL_TYPE_FLOAT) {
+ dxil_wrap_sampler_state wrap_state = {
+ .is_int_sampler = 1,
+ .is_linear_filtering = 0,
+ .skip_boundary_conditions = 1,
+ };
+ dxil_lower_sample_to_txf_for_integer_tex(nir, &wrap_state, NULL, 0);
+ }
+
+ D3D12_SHADER_BYTECODE bc;
+
+ dzn_meta_compile_shader(device, nir, &bc);
+
+ out = (D3D12_SHADER_BYTECODE *)
+ vk_alloc(&device->vk.alloc,
+ sizeof(D3D12_SHADER_BYTECODE) + bc.BytecodeLength, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (out) {
+ out->pShaderBytecode = (void *)(out + 1);
+ memcpy((void *)out->pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength);
+ out->BytecodeLength = bc.BytecodeLength;
+ _mesa_hash_table_insert(meta->fs, &info->hash_key, out);
+ }
+ free((void *)bc.pShaderBytecode);
+ ralloc_free(nir);
+ } else {
+ out = (D3D12_SHADER_BYTECODE *)he->data;
+ }
+
+ mtx_unlock(&meta->shaders_lock);
+
+ return out;
+}
+
+static void
+dzn_meta_blit_destroy(dzn_device *device, dzn_meta_blit *blit)
+{
+ if (!blit)
+ return;
+
+ if (blit->root_sig)
+ blit->root_sig->Release();
+ if (blit->pipeline_state)
+ blit->pipeline_state->Release();
+
+ vk_free(&device->vk.alloc, blit);
+}
+
+static dzn_meta_blit *
+dzn_meta_blit_create(dzn_device *device, const dzn_meta_blit_key *key)
+{
+ dzn_meta_blits *blits = &device->blits;
+ dzn_meta_blit *blit = (dzn_meta_blit *)
+ vk_zalloc(&device->vk.alloc, sizeof(*blit), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+
+ if (!blit)
+ return NULL;
+
+ D3D12_DESCRIPTOR_RANGE1 ranges[] = {
+ {
+ .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
+ .NumDescriptors = 1,
+ .BaseShaderRegister = 0,
+ .RegisterSpace = 0,
+ .Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS,
+ .OffsetInDescriptorsFromTableStart = 0,
+ },
+ };
+
+ D3D12_STATIC_SAMPLER_DESC samplers[] = {
+ {
+ .Filter = key->linear_filter ?
+ D3D12_FILTER_MIN_MAG_MIP_LINEAR :
+ D3D12_FILTER_MIN_MAG_MIP_POINT,
+ .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
+ .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
+ .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
+ .MipLODBias = 0,
+ .MaxAnisotropy = 0,
+ .MinLOD = 0,
+ .MaxLOD = D3D12_FLOAT32_MAX,
+ .ShaderRegister = 0,
+ .RegisterSpace = 0,
+ .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
+ },
+ };
+
+ D3D12_ROOT_PARAMETER1 root_params[] = {
+ {
+ .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
+ .DescriptorTable = {
+ .NumDescriptorRanges = ARRAY_SIZE(ranges),
+ .pDescriptorRanges = ranges,
+ },
+ .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
+ },
+ {
+ .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
+ .Constants = {
+ .ShaderRegister = 0,
+ .RegisterSpace = 0,
+ .Num32BitValues = 17,
+ },
+ .ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX,
+ },
+ };
+
+ D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
+ .Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
+ .Desc_1_1 = {
+ .NumParameters = ARRAY_SIZE(root_params),
+ .pParameters = root_params,
+ .NumStaticSamplers = ARRAY_SIZE(samplers),
+ .pStaticSamplers = samplers,
+ .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
+ },
+ };
+
+ D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {
+ .SampleMask = key->resolve ? 1 : (1ULL << key->samples) - 1,
+ .RasterizerState = {
+ .FillMode = D3D12_FILL_MODE_SOLID,
+ .CullMode = D3D12_CULL_MODE_NONE,
+ .DepthClipEnable = TRUE,
+ },
+ .PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
+ .SampleDesc = {
+ .Count = key->resolve ? 1 : key->samples,
+ .Quality = 0,
+ },
+ .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
+ };
+
+ struct dzn_nir_blit_info blit_fs_info = {
+ .src_samples = key->samples,
+ .loc = key->loc,
+ .out_type = key->out_type,
+ .sampler_dim = key->sampler_dim,
+ .src_is_array = key->src_is_array,
+ .resolve = key->resolve,
+ .padding = 0,
+ };
+
+ blit->root_sig = dzn_device_create_root_sig(device, &root_sig_desc);
+ if (!blit->root_sig) {
+ dzn_meta_blit_destroy(device, blit);
+ return NULL;
+ }
+
+ desc.pRootSignature = blit->root_sig;
+
+ const D3D12_SHADER_BYTECODE *vs, *fs;
+
+ vs = dzn_meta_blits_get_vs(device);
+ if (!vs) {
+ dzn_meta_blit_destroy(device, blit);
+ return NULL;
+ }
+
+ desc.VS = *vs;
+ assert(desc.VS.pShaderBytecode);
+
+ fs = dzn_meta_blits_get_fs(device, &blit_fs_info);
+ if (!fs) {
+ dzn_meta_blit_destroy(device, blit);
+ return NULL;
+ }
+
+ desc.PS = *fs;
+ assert(desc.PS.pShaderBytecode);
+
+ assert(key->loc == FRAG_RESULT_DATA0 ||
+ key->loc == FRAG_RESULT_DEPTH ||
+ key->loc == FRAG_RESULT_STENCIL);
+
+ if (key->loc == FRAG_RESULT_DATA0) {
+ desc.NumRenderTargets = 1;
+ desc.RTVFormats[0] = key->out_format;
+ desc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
+ } else {
+ desc.DSVFormat = key->out_format;
+ if (key->loc == FRAG_RESULT_DEPTH) {
+ desc.DepthStencilState.DepthEnable = TRUE;
+ desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
+ desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+ } else {
+ assert(key->loc == FRAG_RESULT_STENCIL);
+ desc.DepthStencilState.StencilEnable = TRUE;
+ desc.DepthStencilState.StencilWriteMask = 0xff;
+ desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE;
+ desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE;
+ desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;
+ desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+ desc.DepthStencilState.BackFace = desc.DepthStencilState.FrontFace;
+ }
+ }
+
+ if (FAILED(device->dev->CreateGraphicsPipelineState(&desc,
+ IID_PPV_ARGS(&blit->pipeline_state)))) {
+ dzn_meta_blit_destroy(device, blit);
+ return NULL;
+ }
+
+ return blit;
+}
+
+const dzn_meta_blit *
+dzn_meta_blits_get_context(dzn_device *device,
+ const dzn_meta_blit_key *key)
+{
+ dzn_meta_blit *out = NULL;
+
+ STATIC_ASSERT(sizeof(key) == sizeof(uint64_t));
+
+ mtx_lock(&device->blits.contexts_lock);
+
+ out = (dzn_meta_blit *)
+ _mesa_hash_table_u64_search(device->blits.contexts, key->u64);
+ if (!out) {
+ out = dzn_meta_blit_create(device, key);
+
+ if (out)
+ _mesa_hash_table_u64_insert(device->blits.contexts, key->u64, out);
+ }
+
+ mtx_unlock(&device->blits.contexts_lock);
+
+ return out;
+}
+
+static void
+dzn_meta_blits_finish(dzn_device *device)
+{
+ dzn_meta_blits *meta = &device->blits;
+
+ vk_free(&device->vk.alloc, (void *)meta->vs.pShaderBytecode);
+
+ if (meta->fs) {
+ hash_table_foreach(meta->fs, he)
+ vk_free(&device->vk.alloc, he->data);
+ _mesa_hash_table_destroy(meta->fs, NULL);
+ }
+
+ if (meta->contexts) {
+ hash_table_foreach(meta->contexts->table, he)
+ dzn_meta_blit_destroy(device, (dzn_meta_blit *)he->data);
+ _mesa_hash_table_u64_destroy(meta->contexts);
+ }
+
+ mtx_destroy(&meta->shaders_lock);
+ mtx_destroy(&meta->contexts_lock);
+}
+
+static VkResult
+dzn_meta_blits_init(dzn_device *device)
+{
+ dzn_instance *instance =
+ container_of(device->vk.physical->instance, dzn_instance, vk);
+ dzn_meta_blits *meta = &device->blits;
+
+ mtx_init(&meta->shaders_lock, mtx_plain);
+ mtx_init(&meta->contexts_lock, mtx_plain);
+
+ meta->fs = _mesa_hash_table_create_u32_keys(NULL);
+ if (!meta->fs) {
+ dzn_meta_blits_finish(device);
+ return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ meta->contexts = _mesa_hash_table_u64_create(NULL);
+ if (!meta->contexts) {
+ dzn_meta_blits_finish(device);
+ return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ return VK_SUCCESS;
+}
+
+void
+dzn_meta_finish(dzn_device *device)
+{
+ for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++)
+ dzn_meta_triangle_fan_rewrite_index_finish(device, (enum dzn_index_type)i);
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++)
+ dzn_meta_indirect_draw_finish(device, (enum dzn_indirect_draw_type)i);
+
+ dzn_meta_blits_finish(device);
+}
+
+VkResult
+dzn_meta_init(dzn_device *device)
+{
+ VkResult result = dzn_meta_blits_init(device);
+ if (result != VK_SUCCESS)
+ goto out;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++) {
+ VkResult result =
+ dzn_meta_indirect_draw_init(device, (enum dzn_indirect_draw_type)i);
+ if (result != VK_SUCCESS)
+ goto out;
+ }
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++) {
+ VkResult result =
+ dzn_meta_triangle_fan_rewrite_index_init(device, (enum dzn_index_type)i);
+ if (result != VK_SUCCESS)
+ goto out;
+ }
+
+out:
+ if (result != VK_SUCCESS) {
+ dzn_meta_finish(device);
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
diff --git a/src/microsoft/vulkan/dzn_nir.c b/src/microsoft/vulkan/dzn_nir.c
new file mode 100644
index 00000000000..649d900fd77
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_nir.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <directx/d3d12.h>
+
+#include "spirv_to_dxil.h"
+#include "nir_to_dxil.h"
+#include "nir_builder.h"
+#include "nir_vulkan.h"
+
+#include "dzn_nir.h"
+
+static nir_ssa_def *
+dzn_nir_create_bo_desc(nir_builder *b,
+ nir_variable_mode mode,
+ uint32_t desc_set,
+ uint32_t binding,
+ const char *name,
+ unsigned access)
+{
+ struct glsl_struct_field field = {
+ .type = mode == nir_var_mem_ubo ?
+ glsl_array_type(glsl_uint_type(), 4096, 4) :
+ glsl_uint_type(),
+ .name = "dummy_int",
+ };
+ const struct glsl_type *dummy_type =
+ glsl_struct_type(&field, 1, "dummy_type", false);
+
+ nir_variable *var =
+ nir_variable_create(b->shader, mode, dummy_type, name);
+ var->data.descriptor_set = desc_set;
+ var->data.binding = binding;
+ var->data.access = access;
+
+ assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo);
+ if (mode == nir_var_mem_ubo)
+ b->shader->info.num_ubos++;
+ else
+ b->shader->info.num_ssbos++;
+
+ VkDescriptorType desc_type =
+ var->data.mode == nir_var_mem_ubo ?
+ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER :
+ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ nir_address_format addr_format = nir_address_format_32bit_index_offset;
+ nir_ssa_def *index =
+ nir_vulkan_resource_index(b,
+ nir_address_format_num_components(addr_format),
+ nir_address_format_bit_size(addr_format),
+ nir_imm_int(b, 0),
+ .desc_set = desc_set,
+ .binding = binding,
+ .desc_type = desc_type);
+
+ nir_ssa_def *desc =
+ nir_load_vulkan_descriptor(b,
+ nir_address_format_num_components(addr_format),
+ nir_address_format_bit_size(addr_format),
+ index,
+ .desc_type = desc_type);
+
+ return nir_channel(b, desc, 0);
+}
+
+nir_shader *
+dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
+{
+ const char *type_str[] = {
+ "draw",
+ "indexed_draw",
+ "draw_triangle_fan",
+ "indexed_draw_triangle_fan",
+ };
+
+ assert(type < ARRAY_SIZE(type_str));
+
+ bool indexed = type == DZN_INDIRECT_INDEXED_DRAW ||
+ type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
+ bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN ||
+ type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
+ dxil_get_nir_compiler_options(),
+ "dzn_meta_indirect_%s()",
+ type_str[type]);
+ b.shader->info.internal = true;
+
+ struct glsl_struct_field field = {
+ .type = glsl_uint_type(),
+ .name = "dummy_int",
+ };
+ const struct glsl_type *dummy_type =
+ glsl_struct_type(&field, 1, "dummy_type", false);
+
+ nir_ssa_def *params_desc =
+ dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
+ nir_ssa_def *draw_buf_desc =
+ dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, "draw_buf", ACCESS_NON_WRITEABLE);
+ nir_ssa_def *exec_buf_desc =
+ dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE);
+
+ unsigned params_size =
+ triangle_fan ?
+ sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) :
+ sizeof(struct dzn_indirect_draw_rewrite_params);
+
+ nir_ssa_def *params =
+ nir_load_ubo(&b, params_size / 4, 32,
+ params_desc, nir_imm_int(&b, 0),
+ .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
+
+ nir_ssa_def *draw_stride = nir_channel(&b, params, 0);
+ nir_ssa_def *exec_stride = nir_imm_int(&b, sizeof(struct dzn_indirect_draw_exec_params));
+ nir_ssa_def *index =
+ nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
+
+ nir_ssa_def *draw_offset = nir_imul(&b, draw_stride, index);
+ nir_ssa_def *exec_offset = nir_imul(&b, exec_stride, index);
+
+ nir_ssa_def *draw_info1 =
+ nir_load_ssbo(&b, 4, 32, draw_buf_desc, draw_offset, .align_mul = 4);
+ nir_ssa_def *draw_info2 =
+ indexed ?
+ nir_load_ssbo(&b, 1, 32, draw_buf_desc,
+ nir_iadd_imm(&b, draw_offset, 16), .align_mul = 4) :
+ nir_imm_int(&b, 0);
+
+ nir_ssa_def *first_vertex = nir_channel(&b, draw_info1, indexed ? 3 : 2);
+ nir_ssa_def *base_instance =
+ indexed ? draw_info2 : nir_channel(&b, draw_info1, 3);
+
+ nir_ssa_def *exec_vals[7] = {
+ first_vertex,
+ base_instance,
+ };
+
+ if (triangle_fan) {
+ /* Patch {vertex,index}_count and first_index */
+ nir_ssa_def *triangle_count =
+ nir_usub_sat(&b, nir_channel(&b, draw_info1, 0), nir_imm_int(&b, 2));
+ exec_vals[2] = nir_imul_imm(&b, triangle_count, 3);
+ exec_vals[3] = nir_channel(&b, draw_info1, 1);
+ exec_vals[4] = nir_imm_int(&b, 0);
+ exec_vals[5] = indexed ? nir_channel(&b, draw_info1, 3) : nir_imm_int(&b, 0);
+ exec_vals[6] = indexed ? draw_info2 : nir_channel(&b, draw_info1, 3);
+
+ nir_ssa_def *triangle_fan_exec_buf_desc =
+ dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3,
+ "triangle_fan_exec_buf",
+ ACCESS_NON_READABLE);
+ nir_ssa_def *triangle_fan_index_buf_stride = nir_channel(&b, params, 1);
+ nir_ssa_def *triangle_fan_index_buf_addr_lo =
+ nir_iadd(&b, nir_channel(&b, params, 2),
+ nir_imul(&b, triangle_fan_index_buf_stride, index));
+ nir_ssa_def *addr_lo_overflow =
+ nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2));
+ nir_ssa_def *triangle_fan_index_buf_addr_hi =
+ nir_iadd(&b, nir_channel(&b, params, 3),
+ nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
+
+ nir_ssa_def *triangle_fan_exec_vals[] = {
+ triangle_fan_index_buf_addr_lo,
+ triangle_fan_index_buf_addr_hi,
+ nir_channel(&b, draw_info1, 2),
+ triangle_count,
+ nir_imm_int(&b, 1),
+ nir_imm_int(&b, 1),
+ };
+
+ assert(sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params) == (ARRAY_SIZE(triangle_fan_exec_vals) * 4));
+
+ nir_ssa_def *triangle_fan_exec_stride =
+ nir_imm_int(&b, sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params));
+ nir_ssa_def *triangle_fan_exec_offset =
+ nir_imul(&b, triangle_fan_exec_stride, index);
+
+ nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[0], 4),
+ triangle_fan_exec_buf_desc, triangle_fan_exec_offset,
+ .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 4);
+ nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[4], 2),
+ triangle_fan_exec_buf_desc,
+ nir_iadd_imm(&b, triangle_fan_exec_offset, 16),
+ .write_mask = 0x3, .access = ACCESS_NON_READABLE, .align_mul = 4);
+
+ nir_ssa_def *ibview_vals[] = {
+ triangle_fan_index_buf_addr_lo,
+ triangle_fan_index_buf_addr_hi,
+ triangle_fan_index_buf_stride,
+ nir_imm_int(&b, DXGI_FORMAT_R32_UINT),
+ };
+
+ nir_store_ssbo(&b, nir_vec(&b, ibview_vals, ARRAY_SIZE(ibview_vals)),
+ exec_buf_desc, exec_offset,
+ .write_mask = 0x3, .access = ACCESS_NON_READABLE, .align_mul = 4);
+ exec_offset = nir_iadd_imm(&b, exec_offset, ARRAY_SIZE(ibview_vals) * 4);
+ } else {
+ exec_vals[2] = nir_channel(&b, draw_info1, 0);
+ exec_vals[3] = nir_channel(&b, draw_info1, 1);
+ exec_vals[4] = nir_channel(&b, draw_info1, 2);
+ exec_vals[5] = nir_channel(&b, draw_info1, 3);
+ exec_vals[6] = draw_info2;
+ }
+
+ nir_store_ssbo(&b, nir_vec(&b, exec_vals, 4),
+ exec_buf_desc, exec_offset,
+ .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 4);
+ nir_store_ssbo(&b, nir_vec(&b, &exec_vals[4], 3),
+ exec_buf_desc, nir_iadd_imm(&b, exec_offset, 16),
+ .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
+
+
+ return b.shader;
+}
+
+nir_shader *
+dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)
+{
+ assert(old_index_size == 0 || old_index_size == 2 || old_index_size == 4);
+
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
+ dxil_get_nir_compiler_options(),
+ "dzn_meta_triangle_rewrite_index(old_index_size=%d)",
+ old_index_size);
+ b.shader->info.internal = true;
+
+ nir_ssa_def *params_desc =
+ dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
+ nir_ssa_def *new_index_buf_desc =
+ dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
+ "new_index_buf", ACCESS_NON_READABLE);
+
+ nir_ssa_def *old_index_buf_desc = NULL;
+ if (old_index_size > 0) {
+ old_index_buf_desc =
+ dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
+ "old_index_buf", ACCESS_NON_WRITEABLE);
+ }
+
+ nir_ssa_def *params =
+ nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, 32,
+ params_desc, nir_imm_int(&b, 0),
+ .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
+
+ nir_ssa_def *triangle = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
+ nir_ssa_def *new_indices;
+
+ if (old_index_size > 0) {
+ nir_ssa_def *old_first_index = nir_channel(&b, params, 0);
+ nir_ssa_def *old_index0_offset =
+ nir_imul_imm(&b, old_first_index, old_index_size);
+ nir_ssa_def *old_index1_offset =
+ nir_imul_imm(&b, nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), old_first_index),
+ old_index_size);
+
+ nir_ssa_def *old_index0 =
+ nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
+ old_index_size == 2 ? nir_iand_imm(&b, old_index0_offset, ~3ULL) : old_index0_offset,
+ .align_mul = 4);
+
+ if (old_index_size == 2) {
+ old_index0 =
+ nir_bcsel(&b,
+ nir_ieq_imm(&b, nir_iand_imm(&b, old_index0_offset, 0x2), 0),
+ nir_iand_imm(&b, old_index0, 0xffff),
+ nir_ushr_imm(&b, old_index0, 16));
+ }
+
+ nir_ssa_def *old_index12 =
+ nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
+ old_index_size == 2 ? nir_iand_imm(&b, old_index1_offset, ~3ULL) : old_index1_offset,
+ .align_mul = 4);
+ if (old_index_size == 2) {
+ nir_ssa_def *indices[] = {
+ nir_iand_imm(&b, nir_channel(&b, old_index12, 0), 0xffff),
+ nir_ushr_imm(&b, nir_channel(&b, old_index12, 0), 16),
+ nir_iand_imm(&b, nir_channel(&b, old_index12, 1), 0xffff),
+ };
+
+ old_index12 =
+ nir_bcsel(&b,
+ nir_ieq_imm(&b, nir_iand_imm(&b, old_index1_offset, 0x2), 0),
+ nir_vec2(&b, indices[0], indices[1]),
+ nir_vec2(&b, indices[1], indices[2]));
+ }
+
+ /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
+ new_indices =
+ nir_vec3(&b, nir_channel(&b, old_index12, 0),
+ nir_channel(&b, old_index12, 1), old_index0);
+ } else {
+ nir_ssa_def *first_vertex = nir_channel(&b, params, 0);
+
+ new_indices =
+ nir_vec3(&b,
+ nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), first_vertex),
+ nir_iadd(&b, nir_iadd_imm(&b, triangle, 2), first_vertex),
+ first_vertex);
+ }
+
+ nir_ssa_def *new_index_offset =
+ nir_imul_imm(&b, triangle, 4 * 3);
+
+ nir_store_ssbo(&b, new_indices, new_index_buf_desc,
+ new_index_offset,
+ .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
+
+ return b.shader;
+}
+
+nir_shader *
+dzn_nir_blit_vs(void)
+{
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
+ dxil_get_nir_compiler_options(),
+ "dzn_meta_blit_vs()");
+ b.shader->info.internal = true;
+
+ nir_ssa_def *params_desc =
+ dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
+
+ nir_variable *out_pos =
+ nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
+ "gl_Position");
+ out_pos->data.location = VARYING_SLOT_POS;
+ out_pos->data.driver_location = 0;
+
+ nir_variable *out_coords =
+ nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(3),
+ "coords");
+ out_coords->data.location = VARYING_SLOT_TEX0;
+ out_coords->data.driver_location = 1;
+
+ nir_ssa_def *vertex = nir_load_vertex_id(&b);
+ nir_ssa_def *base = nir_imul_imm(&b, vertex, 4 * sizeof(float));
+ nir_ssa_def *coords =
+ nir_load_ubo(&b, 4, 32, params_desc, base,
+ .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0);
+ nir_ssa_def *pos =
+ nir_vec4(&b, nir_channel(&b, coords, 0), nir_channel(&b, coords, 1),
+ nir_imm_float(&b, 0.0), nir_imm_float(&b, 1.0));
+ nir_ssa_def *z_coord =
+ nir_load_ubo(&b, 1, 32, params_desc, nir_imm_int(&b, 4 * 4 * sizeof(float)),
+ .align_mul = 64, .align_offset = 0, .range_base = 0, .range = ~0);
+ coords = nir_vec3(&b, nir_channel(&b, coords, 2), nir_channel(&b, coords, 3), z_coord);
+
+ nir_store_var(&b, out_pos, pos, 0xf);
+ nir_store_var(&b, out_coords, coords, 0x7);
+ return b.shader;
+}
+
+nir_shader *
+dzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
+{
+ bool ms = info->src_samples > 1;
+ nir_alu_type nir_out_type =
+ nir_get_nir_type_for_glsl_base_type(info->out_type);
+ uint32_t coord_comps =
+ glsl_get_sampler_dim_coordinate_components(info->sampler_dim) +
+ info->src_is_array;
+
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
+ dxil_get_nir_compiler_options(),
+ "dzn_meta_blit_fs()");
+ b.shader->info.internal = true;
+
+ const struct glsl_type *tex_type =
+ glsl_texture_type(info->sampler_dim, info->src_is_array, info->out_type);
+ nir_variable *tex_var =
+ nir_variable_create(b.shader, nir_var_uniform, tex_type, "texture");
+ nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);
+
+ nir_variable *pos_var =
+ nir_variable_create(b.shader, nir_var_shader_in,
+ glsl_vector_type(GLSL_TYPE_FLOAT, 4),
+ "gl_FragCoord");
+ pos_var->data.location = VARYING_SLOT_POS;
+ pos_var->data.driver_location = 0;
+
+ nir_variable *coord_var =
+ nir_variable_create(b.shader, nir_var_shader_in,
+ glsl_vector_type(GLSL_TYPE_FLOAT, 3),
+ "coord");
+ coord_var->data.location = VARYING_SLOT_TEX0;
+ coord_var->data.driver_location = 1;
+ nir_ssa_def *coord =
+ nir_channels(&b, nir_load_var(&b, coord_var), (1 << coord_comps) - 1);
+
+ uint32_t out_comps =
+ (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4;
+ nir_variable *out =
+ nir_variable_create(b.shader, nir_var_shader_out,
+ glsl_vector_type(info->out_type, out_comps),
+ "out");
+ out->data.location = info->loc;
+
+ nir_ssa_def *res = NULL;
+
+ if (info->resolve) {
+ /* When resolving a float type, we need to calculate the average of all
+ * samples. For integer resolve, Vulkan says that one sample should be
+ * chosen without telling which. Let's just pick the first one in that
+ * case.
+ */
+
+ unsigned nsamples = info->out_type == GLSL_TYPE_FLOAT ?
+ info->src_samples : 1;
+ for (unsigned s = 0; s < nsamples; s++) {
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 4);
+
+ tex->op = nir_texop_txf_ms;
+ tex->dest_type = nir_out_type;
+ tex->texture_index = 0;
+ tex->is_array = info->src_is_array;
+ tex->sampler_dim = info->sampler_dim;
+
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
+ tex->coord_components = coord_comps;
+
+ tex->src[1].src_type = nir_tex_src_ms_index;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
+
+ tex->src[2].src_type = nir_tex_src_lod;
+ tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+
+ tex->src[3].src_type = nir_tex_src_texture_deref;
+ tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa);
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
+
+ nir_builder_instr_insert(&b, &tex->instr);
+ res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
+ }
+
+ if (nsamples > 1) {
+ unsigned type_sz = nir_alu_type_get_type_size(nir_out_type);
+ res = nir_fmul(&b, res, nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
+ }
+ } else {
+ nir_tex_instr *tex =
+ nir_tex_instr_create(b.shader, ms ? 4 : 3);
+
+ tex->dest_type = nir_out_type;
+ tex->is_array = info->src_is_array;
+ tex->sampler_dim = info->sampler_dim;
+
+ if (ms) {
+ tex->op = nir_texop_txf_ms;
+
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
+ tex->coord_components = coord_comps;
+
+ tex->src[1].src_type = nir_tex_src_ms_index;
+ tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
+
+ tex->src[2].src_type = nir_tex_src_lod;
+ tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+
+ tex->src[3].src_type = nir_tex_src_texture_deref;
+ tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa);
+ } else {
+ nir_variable *sampler_var =
+ nir_variable_create(b.shader, nir_var_uniform, glsl_bare_sampler_type(), "sampler");
+ nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler_var);
+
+ tex->op = nir_texop_tex;
+ tex->sampler_index = 0;
+
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(coord);
+ tex->coord_components = coord_comps;
+
+ tex->src[1].src_type = nir_tex_src_texture_deref;
+ tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa);
+
+ tex->src[2].src_type = nir_tex_src_sampler_deref;
+ tex->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
+ }
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
+ nir_builder_instr_insert(&b, &tex->instr);
+ res = &tex->dest.ssa;
+ }
+
+ nir_store_var(&b, out, nir_channels(&b, res, (1 << out_comps) - 1), 0xf);
+
+ return b.shader;
+}
diff --git a/src/microsoft/vulkan/dzn_nir.h b/src/microsoft/vulkan/dzn_nir.h
new file mode 100644
index 00000000000..f3aced31a71
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_nir.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef DZN_NIR_H
+#define DZN_NIR_H
+
+#define D3D12_IGNORE_SDK_LAYERS
+#include <directx/d3d12.h>
+
+#include "nir.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct dzn_indirect_draw_params {
+ uint32_t vertex_count;
+ uint32_t instance_count;
+ uint32_t first_vertex;
+ uint32_t first_instance;
+};
+
+struct dzn_indirect_indexed_draw_params {
+ uint32_t index_count;
+ uint32_t instance_count;
+ uint32_t first_index;
+ int32_t vertex_offset;
+ uint32_t first_instance;
+};
+
+struct dzn_indirect_draw_rewrite_params {
+ uint32_t draw_buf_stride;
+};
+
+struct dzn_indirect_draw_triangle_fan_rewrite_params {
+ uint32_t draw_buf_stride;
+ uint32_t triangle_fan_index_buf_stride;
+ uint64_t triangle_fan_index_buf_start;
+};
+
+struct dzn_indirect_draw_exec_params {
+ struct {
+ uint32_t first_vertex;
+ uint32_t base_instance;
+ } sysvals;
+ union {
+ struct dzn_indirect_draw_params draw;
+ struct dzn_indirect_indexed_draw_params indexed_draw;
+ };
+};
+
+struct dzn_indirect_triangle_fan_draw_exec_params {
+ D3D12_INDEX_BUFFER_VIEW ibview;
+ struct {
+ uint32_t first_vertex;
+ uint32_t base_instance;
+ } sysvals;
+ union {
+ struct dzn_indirect_draw_params draw;
+ struct dzn_indirect_indexed_draw_params indexed_draw;
+ };
+};
+
+struct dzn_triangle_fan_rewrite_index_params {
+ union {
+ uint32_t first_index;
+ uint32_t first_vertex;
+ };
+};
+
+struct dzn_indirect_triangle_fan_rewrite_index_exec_params {
+ uint64_t new_index_buf;
+ struct dzn_triangle_fan_rewrite_index_params params;
+ struct {
+ uint32_t x, y, z;
+ } group_count;
+};
+
+enum dzn_indirect_draw_type {
+ DZN_INDIRECT_DRAW,
+ DZN_INDIRECT_INDEXED_DRAW,
+ DZN_INDIRECT_DRAW_TRIANGLE_FAN,
+ DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN,
+ DZN_NUM_INDIRECT_DRAW_TYPES,
+};
+
+nir_shader *
+dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type);
+
+nir_shader *
+dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size);
+
+struct dzn_nir_blit_info {
+ union {
+ struct {
+ uint32_t src_samples : 6;
+ uint32_t loc : 4;
+ uint32_t out_type : 4;
+ uint32_t sampler_dim : 4;
+ uint32_t src_is_array : 1;
+ uint32_t resolve : 1;
+ uint32_t padding : 12;
+ };
+ const uint32_t hash_key;
+ };
+};
+
+nir_shader *
+dzn_nir_blit_vs(void);
+
+nir_shader *
+dzn_nir_blit_fs(const struct dzn_nir_blit_info *info);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/microsoft/vulkan/dzn_pass.cpp b/src/microsoft/vulkan/dzn_pass.cpp
new file mode 100644
index 00000000000..590d5137420
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_pass.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include "vk_alloc.h"
+#include "vk_format.h"
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateRenderPass2(VkDevice dev,
+ const VkRenderPassCreateInfo2KHR *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkRenderPass *pRenderPass)
+{
+ VK_FROM_HANDLE(dzn_device, device, dev);
+
+ VK_MULTIALLOC(ma);
+ VK_MULTIALLOC_DECL(&ma, dzn_render_pass, pass, 1);
+ VK_MULTIALLOC_DECL(&ma, dzn_subpass, subpasses,
+ pCreateInfo->subpassCount);
+ VK_MULTIALLOC_DECL(&ma, dzn_attachment, attachments,
+ pCreateInfo->attachmentCount);
+
+ if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS);
+ pass->subpasses = subpasses;
+ pass->subpass_count = pCreateInfo->subpassCount;
+ pass->attachments = attachments;
+ pass->attachment_count = pCreateInfo->attachmentCount;
+
+ assert(!pass->attachment_count || pass->attachments);
+ for (uint32_t i = 0; i < pass->attachment_count; i++) {
+ const VkAttachmentDescription2 *attachment = &pCreateInfo->pAttachments[i];
+
+ attachments[i].idx = i;
+ attachments[i].format = attachment->format;
+ assert(attachments[i].format);
+ if (vk_format_is_depth_or_stencil(attachment->format)) {
+ attachments[i].clear.depth =
+ attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR;
+ attachments[i].clear.stencil =
+ attachment->stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR;
+ } else {
+ attachments[i].clear.color =
+ attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR;
+ }
+ attachments[i].samples = attachment->samples;
+ attachments[i].before = dzn_image_layout_to_state(attachment->initialLayout);
+ attachments[i].after = dzn_image_layout_to_state(attachment->finalLayout);
+ attachments[i].last = attachments[i].before;
+ }
+
+ assert(subpasses);
+ for (uint32_t i = 0; i < pass->subpass_count; i++) {
+ const VkSubpassDescription2 *subpass = &pCreateInfo->pSubpasses[i];
+ const VkSubpassDescription2 *subpass_after = NULL;
+
+ if (i + 1 < pass->subpass_count)
+ subpass_after = &pCreateInfo->pSubpasses[i + 1];
+
+ for (uint32_t j = 0; j < subpass->colorAttachmentCount; j++) {
+ uint32_t idx = subpass->pColorAttachments[j].attachment;
+ subpasses[i].colors[j].idx = idx;
+ if (idx != VK_ATTACHMENT_UNUSED) {
+ subpasses[i].colors[j].before = attachments[idx].last;
+ subpasses[i].colors[j].during =
+ dzn_image_layout_to_state(subpass->pColorAttachments[j].layout);
+ attachments[idx].last = subpasses[i].colors[j].during;
+ subpasses[i].color_count = j + 1;
+ }
+
+ idx = subpass->pResolveAttachments ?
+ subpass->pResolveAttachments[j].attachment :
+ VK_ATTACHMENT_UNUSED;
+ subpasses[i].resolve[j].idx = idx;
+ if (idx != VK_ATTACHMENT_UNUSED) {
+ subpasses[i].resolve[j].before = attachments[idx].last;
+ subpasses[i].resolve[j].during =
+ dzn_image_layout_to_state(subpass->pResolveAttachments[j].layout);
+ attachments[idx].last = subpasses[i].resolve[j].during;
+ }
+ }
+
+ subpasses[i].zs.idx = VK_ATTACHMENT_UNUSED;
+ if (subpass->pDepthStencilAttachment) {
+ uint32_t idx = subpass->pDepthStencilAttachment->attachment;
+ subpasses[i].zs.idx = idx;
+ if (idx != VK_ATTACHMENT_UNUSED) {
+ subpasses[i].zs.before = attachments[idx].last;
+ subpasses[i].zs.during =
+ dzn_image_layout_to_state(subpass->pDepthStencilAttachment->layout);
+ attachments[idx].last = subpasses[i].zs.during;
+ }
+ }
+
+ subpasses[i].input_count = subpass->inputAttachmentCount;
+ for (uint32_t j = 0; j < subpasses[i].input_count; j++) {
+ uint32_t idx = subpass->pInputAttachments[j].attachment;
+ subpasses[i].inputs[j].idx = idx;
+ if (idx != VK_ATTACHMENT_UNUSED) {
+ subpasses[i].inputs[j].before = attachments[idx].last;
+ subpasses[i].inputs[j].during =
+ dzn_image_layout_to_state(subpass->pInputAttachments[j].layout);
+ attachments[idx].last = subpasses[i].inputs[j].during;
+ }
+ }
+ }
+
+ *pRenderPass = dzn_render_pass_to_handle(pass);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyRenderPass(VkDevice dev,
+ VkRenderPass p,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(dzn_device, device, dev);
+ VK_FROM_HANDLE(dzn_render_pass, pass, p);
+
+ if (!pass)
+ return;
+
+ vk_object_base_finish(&pass->base);
+ vk_free2(&device->vk.alloc, pAllocator, pass);
+}
+
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_GetRenderAreaGranularity(VkDevice device,
+ VkRenderPass pass,
+ VkExtent2D *pGranularity)
+{
+ // FIXME: query the actual optimal granularity
+ pGranularity->width = pGranularity->height = 1;
+}
diff --git a/src/microsoft/vulkan/dzn_pipeline.cpp b/src/microsoft/vulkan/dzn_pipeline.cpp
new file mode 100644
index 00000000000..f21ff8d00ec
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_pipeline.cpp
@@ -0,0 +1,1184 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include "spirv_to_dxil.h"
+
+#include "vk_alloc.h"
+#include "vk_util.h"
+#include "vk_format.h"
+
+#include <directx/d3d12.h>
+#include <dxguids/dxguids.h>
+
+#include <dxcapi.h>
+#include <wrl/client.h>
+
+#include "util/u_debug.h"
+
+using Microsoft::WRL::ComPtr;
+
+static dxil_spirv_shader_stage
+to_dxil_shader_stage(VkShaderStageFlagBits in)
+{
+ switch (in) {
+ case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX;
+ case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL;
+ case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL;
+ case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY;
+ case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT;
+ case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE;
+ default: unreachable("Unsupported stage");
+ }
+}
+
+static VkResult
+dzn_pipeline_compile_shader(dzn_device *device,
+ const VkAllocationCallbacks *alloc,
+ dzn_pipeline_layout *layout,
+ const VkPipelineShaderStageCreateInfo *stage_info,
+ enum dxil_spirv_yz_flip_mode yz_flip_mode,
+ uint16_t y_flip_mask, uint16_t z_flip_mask,
+ D3D12_SHADER_BYTECODE *slot)
+{
+ dzn_instance *instance =
+ container_of(device->vk.physical->instance, dzn_instance, vk);
+ IDxcValidator *validator = instance->dxc.validator;
+ IDxcLibrary *library = instance->dxc.library;
+ IDxcCompiler *compiler = instance->dxc.compiler;
+ const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo;
+ VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
+ struct dxil_spirv_object dxil_object;
+
+ /* convert VkSpecializationInfo */
+ struct dxil_spirv_specialization *spec = NULL;
+ uint32_t num_spec = 0;
+
+ if (spec_info && spec_info->mapEntryCount) {
+ spec = (struct dxil_spirv_specialization *)
+ vk_alloc2(&device->vk.alloc, alloc,
+ spec_info->mapEntryCount * sizeof(*spec), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!spec)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) {
+ const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i];
+ const uint8_t *data = (const uint8_t *)spec_info->pData + entry->offset;
+ assert(data + entry->size <= (const uint8_t *)spec_info->pData + spec_info->dataSize);
+ spec[i].id = entry->constantID;
+ switch (entry->size) {
+ case 8:
+ spec[i].value.u64 = *(const uint64_t *)data;
+ break;
+ case 4:
+ spec[i].value.u32 = *(const uint32_t *)data;
+ break;
+ case 2:
+ spec[i].value.u16 = *(const uint16_t *)data;
+ break;
+ case 1:
+ spec[i].value.u8 = *(const uint8_t *)data;
+ break;
+ default:
+ assert(!"Invalid spec constant size");
+ break;
+ }
+
+ spec[i].defined_on_module = false;
+ }
+
+ num_spec = spec_info->mapEntryCount;
+ }
+
+ struct dxil_spirv_runtime_conf conf = {
+ .runtime_data_cbv = {
+ .register_space = DZN_REGISTER_SPACE_SYSVALS,
+ .base_shader_register = 0,
+ },
+ .push_constant_cbv = {
+ .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT,
+ .base_shader_register = 0,
+ },
+ .descriptor_set_count = layout->set_count,
+ .descriptor_sets = layout->binding_translation,
+ .zero_based_vertex_instance_id = false,
+ .yz_flip = {
+ .mode = yz_flip_mode,
+ .y_mask = y_flip_mask,
+ .z_mask = z_flip_mask,
+ },
+ .read_only_images_as_srvs = true,
+ };
+
+ struct dxil_spirv_debug_options dbg_opts = {
+ .dump_nir = !!(instance->debug_flags & DZN_DEBUG_NIR),
+ };
+
+ /* TODO: Extend spirv_to_dxil() to allow passing a custom allocator */
+ bool success =
+ spirv_to_dxil((uint32_t *)module->data, module->size / sizeof(uint32_t),
+ spec, num_spec,
+ to_dxil_shader_stage(stage_info->stage),
+ stage_info->pName, &dbg_opts, &conf, &dxil_object);
+
+ vk_free2(&device->vk.alloc, alloc, spec);
+
+ if (!success)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ dzn_shader_blob blob(dxil_object.binary.buffer, dxil_object.binary.size);
+ ComPtr<IDxcOperationResult> result;
+ validator->Validate(&blob, DxcValidatorFlags_InPlaceEdit, &result);
+
+ if (instance->debug_flags & DZN_DEBUG_DXIL) {
+ IDxcBlobEncoding *disassembly;
+ compiler->Disassemble(&blob, &disassembly);
+ ComPtr<IDxcBlobEncoding> blobUtf8;
+ library->GetBlobAsUtf8(disassembly, blobUtf8.GetAddressOf());
+ char *disasm = reinterpret_cast<char*>(blobUtf8->GetBufferPointer());
+ disasm[blobUtf8->GetBufferSize() - 1] = 0;
+ fprintf(stderr, "== BEGIN SHADER ============================================\n"
+ "%s\n"
+ "== END SHADER ==============================================\n",
+ disasm);
+ disassembly->Release();
+ }
+
+ HRESULT validationStatus;
+ result->GetStatus(&validationStatus);
+ if (FAILED(validationStatus)) {
+ if (instance->debug_flags & DZN_DEBUG_DXIL) {
+ ComPtr<IDxcBlobEncoding> printBlob, printBlobUtf8;
+ result->GetErrorBuffer(&printBlob);
+ library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf());
+
+ char *errorString;
+ if (printBlobUtf8) {
+ errorString = reinterpret_cast<char*>(printBlobUtf8->GetBufferPointer());
+
+ errorString[printBlobUtf8->GetBufferSize() - 1] = 0;
+ fprintf(stderr,
+ "== VALIDATION ERROR =============================================\n"
+ "%s\n"
+ "== END ==========================================================\n",
+ errorString);
+ }
+ }
+
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ slot->pShaderBytecode = dxil_object.binary.buffer;
+ slot->BytecodeLength = dxil_object.binary.size;
+ return VK_SUCCESS;
+}
+
+static D3D12_SHADER_BYTECODE *
+dzn_pipeline_get_gfx_shader_slot(D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc,
+ VkShaderStageFlagBits in)
+{
+ switch (in) {
+ case VK_SHADER_STAGE_VERTEX_BIT: return &desc->VS;
+ case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return &desc->DS;
+ case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return &desc->HS;
+ case VK_SHADER_STAGE_GEOMETRY_BIT: return &desc->GS;
+ case VK_SHADER_STAGE_FRAGMENT_BIT: return &desc->PS;
+ default: unreachable("Unsupported stage");
+ }
+}
+
+static VkResult
+dzn_graphics_pipeline_translate_vi(dzn_graphics_pipeline *pipeline,
+ const VkAllocationCallbacks *alloc,
+ D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
+ const VkGraphicsPipelineCreateInfo *in,
+ D3D12_INPUT_ELEMENT_DESC **input_elems)
+{
+ dzn_device *device =
+ container_of(pipeline->base.base.device, dzn_device, vk);
+ const VkPipelineVertexInputStateCreateInfo *in_vi =
+ in->pVertexInputState;
+
+ if (!in_vi->vertexAttributeDescriptionCount) {
+ out->InputLayout.pInputElementDescs = NULL;
+ out->InputLayout.NumElements = 0;
+ *input_elems = NULL;
+ return VK_SUCCESS;
+ }
+
+ *input_elems = (D3D12_INPUT_ELEMENT_DESC *)
+ vk_alloc2(&device->vk.alloc, alloc,
+ sizeof(**input_elems) * in_vi->vertexAttributeDescriptionCount, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!*input_elems)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ D3D12_INPUT_ELEMENT_DESC *inputs = *input_elems;
+ D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS];
+
+ pipeline->vb.count = 0;
+ for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) {
+ const struct VkVertexInputBindingDescription *bdesc =
+ &in_vi->pVertexBindingDescriptions[i];
+
+ pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1);
+ pipeline->vb.strides[bdesc->binding] = bdesc->stride;
+ if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
+ slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
+ } else {
+ assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX);
+ slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
+ }
+ }
+
+ for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) {
+ const VkVertexInputAttributeDescription *attr =
+ &in_vi->pVertexAttributeDescriptions[i];
+
+ /* nir_to_dxil() name all vertex inputs as TEXCOORDx */
+ inputs[i].SemanticName = "TEXCOORD";
+ inputs[i].SemanticIndex = attr->location;
+ inputs[i].Format = dzn_buffer_get_dxgi_format(attr->format);
+ inputs[i].InputSlot = attr->binding;
+ inputs[i].InputSlotClass = slot_class[attr->binding];
+ inputs[i].InstanceDataStepRate =
+ inputs[i].InputSlotClass == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0;
+ inputs[i].AlignedByteOffset = attr->offset;
+ }
+
+ out->InputLayout.pInputElementDescs = inputs;
+ out->InputLayout.NumElements = in_vi->vertexAttributeDescriptionCount;
+ return VK_SUCCESS;
+}
+
+static D3D12_PRIMITIVE_TOPOLOGY_TYPE
+to_prim_topology_type(VkPrimitiveTopology in)
+{
+ switch (in) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+ return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
+ case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+ return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
+ default: unreachable("Invalid primitive topology");
+ }
+}
+
+static D3D12_PRIMITIVE_TOPOLOGY
+to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points)
+{
+ switch (in) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
+ /* Triangle fans are emulated using an intermediate index buffer. */
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+ assert(patch_control_points);
+ return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1);
+ default: unreachable("Invalid primitive topology");
+ }
+}
+
+static void
+dzn_graphics_pipeline_translate_ia(dzn_graphics_pipeline *pipeline,
+ D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
+ const VkGraphicsPipelineCreateInfo *in)
+{
+ const VkPipelineInputAssemblyStateCreateInfo *in_ia =
+ in->pInputAssemblyState;
+ const VkPipelineTessellationStateCreateInfo *in_tes =
+ (out->DS.pShaderBytecode && out->HS.pShaderBytecode) ?
+ in->pTessellationState : NULL;
+
+ out->PrimitiveTopologyType = to_prim_topology_type(in_ia->topology);
+ pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
+ pipeline->ia.topology =
+ to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0);
+
+ /* FIXME: does that work for u16 index buffers? */
+ if (in_ia->primitiveRestartEnable)
+ out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
+ else
+ out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
+}
+
+static D3D12_FILL_MODE
+translate_polygon_mode(VkPolygonMode in)
+{
+ switch (in) {
+ case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID;
+ case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME;
+ default: unreachable("Unsupported polygon mode");
+ }
+}
+
+static D3D12_CULL_MODE
+translate_cull_mode(VkCullModeFlags in)
+{
+ switch (in) {
+ case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE;
+ case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT;
+ case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK;
+ /* Front+back face culling is equivalent to 'rasterization disabled' */
+ case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE;
+ default: unreachable("Unsupported cull mode");
+ }
+}
+
+static void
+dzn_graphics_pipeline_translate_rast(dzn_graphics_pipeline *pipeline,
+ D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
+ const VkGraphicsPipelineCreateInfo *in)
+{
+ const VkPipelineRasterizationStateCreateInfo *in_rast =
+ in->pRasterizationState;
+ const VkPipelineViewportStateCreateInfo *in_vp =
+ in->pViewportState;
+
+ if (in_vp) {
+ pipeline->vp.count = in_vp->viewportCount;
+ if (in_vp->pViewports) {
+ for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++)
+ dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]);
+ }
+
+ pipeline->scissor.count = in_vp->scissorCount;
+ if (in_vp->pScissors) {
+ for (uint32_t i = 0; i < in_vp->scissorCount; i++)
+ dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]);
+ }
+ }
+
+ out->RasterizerState.DepthClipEnable = !in_rast->depthClampEnable;
+ out->RasterizerState.FillMode = translate_polygon_mode(in_rast->polygonMode);
+ out->RasterizerState.CullMode = translate_cull_mode(in_rast->cullMode);
+ out->RasterizerState.FrontCounterClockwise =
+ in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
+ if (in_rast->depthBiasEnable) {
+ out->RasterizerState.DepthBias = in_rast->depthBiasConstantFactor;
+ out->RasterizerState.SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
+ out->RasterizerState.DepthBiasClamp = in_rast->depthBiasClamp;
+ }
+
+ assert(in_rast->lineWidth == 1.0f);
+}
+
+static void
+dzn_graphics_pipeline_translate_ms(dzn_graphics_pipeline *pipeline,
+ D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
+ const VkGraphicsPipelineCreateInfo *in)
+{
+ const VkPipelineMultisampleStateCreateInfo *in_ms =
+ in->pMultisampleState;
+
+ /* TODO: sampleShadingEnable, minSampleShading,
+ * alphaToOneEnable
+ */
+ out->SampleDesc.Count = in_ms ? in_ms->rasterizationSamples : 1;
+ out->SampleDesc.Quality = 0;
+ out->SampleMask = in_ms && in_ms->pSampleMask ?
+ *in_ms->pSampleMask :
+ (1 << out->SampleDesc.Count) - 1;
+}
+
+static D3D12_STENCIL_OP
+translate_stencil_op(VkStencilOp in)
+{
+ switch (in) {
+ case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP;
+ case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO;
+ case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE;
+ case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT;
+ case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT;
+ case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR;
+ case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR;
+ case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT;
+ default: unreachable("Invalid stencil op");
+ }
+}
+
+static void
+translate_stencil_test(dzn_graphics_pipeline *pipeline,
+ D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
+ const VkGraphicsPipelineCreateInfo *in)
+{
+ const VkPipelineDepthStencilStateCreateInfo *in_zsa =
+ in->pDepthStencilState;
+
+ bool front_test_uses_ref =
+ !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
+ in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
+ in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
+ (pipeline->zsa.stencil_test.dynamic_compare_mask ||
+ in_zsa->front.compareMask != 0);
+ bool back_test_uses_ref =
+ !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
+ in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
+ in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
+ (pipeline->zsa.stencil_test.dynamic_compare_mask ||
+ in_zsa->back.compareMask != 0);
+
+ if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
+ pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX;
+ else if (front_test_uses_ref)
+ pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask;
+ else
+ pipeline->zsa.stencil_test.front.compare_mask = 0;
+
+ if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
+ pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX;
+ else if (back_test_uses_ref)
+ pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask;
+ else
+ pipeline->zsa.stencil_test.back.compare_mask = 0;
+
+ bool diff_wr_mask =
+ in->pRasterizationState->cullMode == VK_CULL_MODE_NONE &&
+ (pipeline->zsa.stencil_test.dynamic_write_mask ||
+ in_zsa->back.writeMask != in_zsa->front.writeMask);
+ bool diff_ref =
+ in->pRasterizationState->cullMode == VK_CULL_MODE_NONE &&
+ (pipeline->zsa.stencil_test.dynamic_ref ||
+ in_zsa->back.reference != in_zsa->front.reference);
+ bool diff_cmp_mask =
+ back_test_uses_ref && front_test_uses_ref &&
+ (pipeline->zsa.stencil_test.dynamic_compare_mask ||
+ pipeline->zsa.stencil_test.front.compare_mask != pipeline->zsa.stencil_test.back.compare_mask);
+
+ if (diff_cmp_mask || diff_wr_mask)
+ pipeline->zsa.stencil_test.independent_front_back = true;
+
+ bool back_wr_uses_ref =
+ !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
+ (in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
+ in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) ||
+ (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
+ (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
+ in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) ||
+ (in_zsa->depthTestEnable &&
+ in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
+ in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE);
+ bool front_wr_uses_ref =
+ !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
+ (in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
+ in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) ||
+ (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
+ (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
+ in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) ||
+ (in_zsa->depthTestEnable &&
+ in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
+ in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE);
+
+ pipeline->zsa.stencil_test.front.write_mask =
+ (pipeline->zsa.stencil_test.dynamic_write_mask ||
+ (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ?
+ 0 : in_zsa->front.writeMask;
+ pipeline->zsa.stencil_test.back.write_mask =
+ (pipeline->zsa.stencil_test.dynamic_write_mask ||
+ (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ?
+ 0 : in_zsa->back.writeMask;
+
+ pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref;
+ pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref;
+
+ if (diff_ref &&
+ pipeline->zsa.stencil_test.front.uses_ref &&
+ pipeline->zsa.stencil_test.back.uses_ref)
+ pipeline->zsa.stencil_test.independent_front_back = true;
+
+ pipeline->zsa.stencil_test.front.ref =
+ pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference;
+ pipeline->zsa.stencil_test.back.ref =
+ pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference;
+
+ /* FIXME: We don't support independent {compare,write}_mask and stencil
+ * reference. Until we have proper support for independent front/back
+ * stencil test, let's prioritize the front setup when both are active.
+ */
+ out->DepthStencilState.StencilReadMask =
+ front_test_uses_ref ?
+ pipeline->zsa.stencil_test.front.compare_mask :
+ back_test_uses_ref ?
+ pipeline->zsa.stencil_test.back.compare_mask : 0;
+ out->DepthStencilState.StencilWriteMask =
+ pipeline->zsa.stencil_test.front.write_mask ?
+ pipeline->zsa.stencil_test.front.write_mask :
+ pipeline->zsa.stencil_test.back.write_mask;
+
+ assert(!pipeline->zsa.stencil_test.independent_front_back);
+}
+
+static void
+dzn_graphics_pipeline_translate_zsa(dzn_graphics_pipeline *pipeline,
+ D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
+ const VkGraphicsPipelineCreateInfo *in)
+{
+ const VkPipelineDepthStencilStateCreateInfo *in_zsa =
+ in->pDepthStencilState;
+
+ if (!in_zsa)
+ return;
+
+ /* TODO: depthBoundsTestEnable */
+
+ out->DepthStencilState.DepthEnable = in_zsa->depthTestEnable;
+ out->DepthStencilState.DepthWriteMask =
+ in_zsa->depthWriteEnable ?
+ D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
+ out->DepthStencilState.DepthFunc =
+ dzn_translate_compare_op(in_zsa->depthCompareOp);
+ out->DepthStencilState.StencilEnable = in_zsa->stencilTestEnable;
+ if (in_zsa->stencilTestEnable) {
+ out->DepthStencilState.FrontFace.StencilFailOp =
+ translate_stencil_op(in_zsa->front.failOp);
+ out->DepthStencilState.FrontFace.StencilDepthFailOp =
+ translate_stencil_op(in_zsa->front.depthFailOp);
+ out->DepthStencilState.FrontFace.StencilPassOp =
+ translate_stencil_op(in_zsa->front.passOp);
+ out->DepthStencilState.FrontFace.StencilFunc =
+ dzn_translate_compare_op(in_zsa->front.compareOp);
+ out->DepthStencilState.BackFace.StencilFailOp =
+ translate_stencil_op(in_zsa->back.failOp);
+ out->DepthStencilState.BackFace.StencilDepthFailOp =
+ translate_stencil_op(in_zsa->back.depthFailOp);
+ out->DepthStencilState.BackFace.StencilPassOp =
+ translate_stencil_op(in_zsa->back.passOp);
+ out->DepthStencilState.BackFace.StencilFunc =
+ dzn_translate_compare_op(in_zsa->back.compareOp);
+
+ pipeline->zsa.stencil_test.enable = true;
+
+ translate_stencil_test(pipeline, out, in);
+ }
+}
+
+static D3D12_BLEND
+translate_blend_factor(VkBlendFactor in)
+{
+ switch (in) {
+ case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO;
+ case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE;
+ case VK_BLEND_FACTOR_SRC_COLOR: return D3D12_BLEND_SRC_COLOR;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR;
+ case VK_BLEND_FACTOR_DST_COLOR: return D3D12_BLEND_DEST_COLOR;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR;
+ case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
+ case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
+ /* FIXME: no way to isolate the alpla and color constants */
+ case VK_BLEND_FACTOR_CONSTANT_COLOR:
+ case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+ return D3D12_BLEND_BLEND_FACTOR;
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+ return D3D12_BLEND_INV_BLEND_FACTOR;
+ case VK_BLEND_FACTOR_SRC1_COLOR: return D3D12_BLEND_SRC1_COLOR;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: return D3D12_BLEND_INV_SRC1_COLOR;
+ case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
+ case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
+ default: unreachable("Invalid blend factor");
+ }
+}
+
+static D3D12_BLEND_OP
+translate_blend_op(VkBlendOp in)
+{
+ switch (in) {
+ case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD;
+ case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
+ case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
+ case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN;
+ case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX;
+ default: unreachable("Invalid blend op");
+ }
+}
+
+static D3D12_LOGIC_OP
+translate_logic_op(VkLogicOp in)
+{
+ switch (in) {
+ case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR;
+ case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND;
+ case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
+ case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY;
+ case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
+ case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP;
+ case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR;
+ case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR;
+ case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR;
+ case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV;
+ case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT;
+ case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
+ case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
+ case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
+ case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND;
+ case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET;
+ default: unreachable("Invalid logic op");
+ }
+}
+
+static void
+dzn_graphics_pipeline_translate_blend(dzn_graphics_pipeline *pipeline,
+ D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
+ const VkGraphicsPipelineCreateInfo *in)
+{
+ const VkPipelineColorBlendStateCreateInfo *in_blend =
+ in->pColorBlendState;
+ const VkPipelineMultisampleStateCreateInfo *in_ms =
+ in->pMultisampleState;
+
+ if (!in_blend || !in_ms)
+ return;
+
+ D3D12_LOGIC_OP logicop =
+ in_blend->logicOpEnable ?
+ translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP;
+ out->BlendState.AlphaToCoverageEnable = in_ms->alphaToCoverageEnable;
+ for (uint32_t i = 0; i < in_blend->attachmentCount; i++) {
+ if (i > 0 &&
+ !memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i],
+ sizeof(*in_blend->pAttachments)))
+ out->BlendState.IndependentBlendEnable = true;
+
+ out->BlendState.RenderTarget[i].BlendEnable =
+ in_blend->pAttachments[i].blendEnable;
+ in_blend->logicOpEnable;
+ out->BlendState.RenderTarget[i].RenderTargetWriteMask =
+ in_blend->pAttachments[i].colorWriteMask;
+ if (in_blend->logicOpEnable) {
+ out->BlendState.RenderTarget[i].LogicOpEnable = true;
+ out->BlendState.RenderTarget[i].LogicOp = logicop;
+ } else {
+ out->BlendState.RenderTarget[i].SrcBlend =
+ translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor);
+ out->BlendState.RenderTarget[i].DestBlend =
+ translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor);
+ out->BlendState.RenderTarget[i].BlendOp =
+ translate_blend_op(in_blend->pAttachments[i].colorBlendOp);
+ out->BlendState.RenderTarget[i].SrcBlendAlpha =
+ translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor);
+ out->BlendState.RenderTarget[i].DestBlendAlpha =
+ translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor);
+ out->BlendState.RenderTarget[i].BlendOpAlpha =
+ translate_blend_op(in_blend->pAttachments[i].alphaBlendOp);
+ }
+ }
+}
+
+
+static void
+dzn_pipeline_init(dzn_pipeline *pipeline,
+ dzn_device *device,
+ VkPipelineBindPoint type,
+ dzn_pipeline_layout *layout)
+{
+ pipeline->type = type;
+ pipeline->root.sets_param_count = layout->root.sets_param_count;
+ pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx;
+ pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx;
+ STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type));
+ memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type));
+ pipeline->root.sig = layout->root.sig;
+ pipeline->root.sig->AddRef();
+
+ STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count));
+ memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count));
+
+ STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets));
+ memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets));
+ vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
+}
+
+static void
+dzn_pipeline_finish(dzn_pipeline *pipeline)
+{
+ if (pipeline->state)
+ pipeline->state->Release();
+ if (pipeline->root.sig)
+ pipeline->root.sig->Release();
+
+ vk_object_base_finish(&pipeline->base);
+}
+
+static void
+dzn_graphics_pipeline_destroy(dzn_graphics_pipeline *pipeline,
+ const VkAllocationCallbacks *alloc)
+{
+ if (!pipeline)
+ return;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) {
+ if (pipeline->indirect_cmd_sigs[i])
+ pipeline->indirect_cmd_sigs[i]->Release();
+ }
+
+ dzn_pipeline_finish(&pipeline->base);
+ vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
+}
+
+static VkResult
+dzn_graphics_pipeline_create(dzn_device *device,
+ VkPipelineCache cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *out)
+{
+ VK_FROM_HANDLE(dzn_render_pass, pass, pCreateInfo->renderPass);
+ VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
+ const dzn_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
+ uint32_t stage_mask = 0;
+ VkResult ret;
+ HRESULT hres = 0;
+
+ dzn_graphics_pipeline *pipeline = (dzn_graphics_pipeline *)
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!pipeline)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ dzn_pipeline_init(&pipeline->base, device,
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ layout);
+ D3D12_INPUT_ELEMENT_DESC *inputs = NULL;
+ D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {
+ .pRootSignature = pipeline->base.root.sig,
+ .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
+ };
+
+ ret = dzn_graphics_pipeline_translate_vi(pipeline, pAllocator, &desc, pCreateInfo, &inputs);
+ if (ret != VK_SUCCESS)
+ goto out;
+
+ if (pCreateInfo->pDynamicState) {
+ for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) {
+ switch (pCreateInfo->pDynamicState->pDynamicStates[i]) {
+ case VK_DYNAMIC_STATE_VIEWPORT:
+ pipeline->vp.dynamic = true;
+ break;
+ case VK_DYNAMIC_STATE_SCISSOR:
+ pipeline->scissor.dynamic = true;
+ break;
+ case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
+ pipeline->zsa.stencil_test.dynamic_ref = true;
+ break;
+ case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
+ pipeline->zsa.stencil_test.dynamic_compare_mask = true;
+ break;
+ case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
+ pipeline->zsa.stencil_test.dynamic_write_mask = true;
+ break;
+ default: unreachable("Unsupported dynamic state");
+ }
+ }
+ }
+
+ dzn_graphics_pipeline_translate_ia(pipeline, &desc, pCreateInfo);
+ dzn_graphics_pipeline_translate_rast(pipeline, &desc, pCreateInfo);
+ dzn_graphics_pipeline_translate_ms(pipeline, &desc, pCreateInfo);
+ dzn_graphics_pipeline_translate_zsa(pipeline, &desc, pCreateInfo);
+ dzn_graphics_pipeline_translate_blend(pipeline, &desc, pCreateInfo);
+
+ desc.NumRenderTargets = subpass->color_count;
+ for (uint32_t i = 0; i < subpass->color_count; i++) {
+ uint32_t idx = subpass->colors[i].idx;
+
+ if (idx == VK_ATTACHMENT_UNUSED) continue;
+
+ const struct dzn_attachment *attachment = &pass->attachments[idx];
+
+ desc.RTVFormats[i] =
+ dzn_image_get_dxgi_format(attachment->format,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_COLOR_BIT);
+ }
+
+ if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) {
+ const struct dzn_attachment *attachment =
+ &pass->attachments[subpass->zs.idx];
+
+ desc.DSVFormat =
+ dzn_image_get_dxgi_format(attachment->format,
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_DEPTH_BIT |
+ VK_IMAGE_ASPECT_STENCIL_BIT);
+ }
+
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
+ stage_mask |= pCreateInfo->pStages[i].stage;
+
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+ if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT &&
+ pCreateInfo->pRasterizationState &&
+ (pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
+ pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) {
+ /* Disable rasterization (AKA leave fragment shader NULL) when
+ * front+back culling or discard is set.
+ */
+ continue;
+ }
+
+ D3D12_SHADER_BYTECODE *slot =
+ dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage);
+ enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE;
+ uint16_t y_flip_mask = 0, z_flip_mask = 0;
+
+ if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT ||
+ (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_VERTEX_BIT &&
+ !(stage_mask & VK_SHADER_STAGE_GEOMETRY_BIT))) {
+ if (pipeline->vp.dynamic) {
+ yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL;
+ } else if (pCreateInfo->pViewportState) {
+ const VkPipelineViewportStateCreateInfo *vp_info =
+ pCreateInfo->pViewportState;
+
+ for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) {
+ if (vp_info->pViewports[i].height > 0)
+ y_flip_mask |= BITFIELD_BIT(i);
+
+ if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth)
+ z_flip_mask |= BITFIELD_BIT(i);
+ }
+
+ if (y_flip_mask && z_flip_mask)
+ yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL;
+ else if (z_flip_mask)
+ yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL;
+ else if (y_flip_mask)
+ yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
+ }
+ }
+
+ ret = dzn_pipeline_compile_shader(device, pAllocator,
+ layout, &pCreateInfo->pStages[i],
+ yz_flip_mode, y_flip_mask, z_flip_mask, slot);
+ if (ret != VK_SUCCESS)
+ goto out;
+ }
+
+
+ hres = device->dev->CreateGraphicsPipelineState(&desc,
+ IID_PPV_ARGS(&pipeline->base.state));
+ if (FAILED(hres)) {
+ ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto out;
+ }
+
+ ret = VK_SUCCESS;
+
+out:
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+ D3D12_SHADER_BYTECODE *slot =
+ dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage);
+ free((void *)slot->pShaderBytecode);
+ }
+
+ vk_free2(&device->vk.alloc, pAllocator, inputs);
+ if (ret != VK_SUCCESS)
+ dzn_graphics_pipeline_destroy(pipeline, pAllocator);
+ else
+ *out = dzn_graphics_pipeline_to_handle(pipeline);
+
+ return ret;
+}
+
+#define DZN_INDIRECT_CMD_SIG_MAX_ARGS 3
+
+ID3D12CommandSignature *
+dzn_graphics_pipeline_get_indirect_cmd_sig(dzn_graphics_pipeline *pipeline,
+ enum dzn_indirect_draw_cmd_sig_type type)
+{
+ assert(type < DZN_NUM_INDIRECT_DRAW_CMD_SIGS);
+
+ dzn_device *device =
+ container_of(pipeline->base.base.device, dzn_device, vk);
+ ID3D12CommandSignature *cmdsig = pipeline->indirect_cmd_sigs[type];
+
+ if (cmdsig)
+ return cmdsig;
+
+ bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG;
+ bool indexed = type == DZN_INDIRECT_INDEXED_DRAW_CMD_SIG || triangle_fan;
+
+ uint32_t cmd_arg_count = 0;
+ D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS];
+
+ if (triangle_fan) {
+ cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC {
+ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW,
+ };
+ }
+
+ cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC {
+ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
+ .Constant = {
+ .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
+ .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4,
+ .Num32BitValuesToSet = 2,
+ },
+ };
+
+ cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC {
+ .Type = indexed ?
+ D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED :
+ D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
+ };
+
+ assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
+ assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0);
+
+ D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
+ .ByteStride =
+ triangle_fan ?
+ sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
+ sizeof(struct dzn_indirect_draw_exec_params),
+ .NumArgumentDescs = cmd_arg_count,
+ .pArgumentDescs = cmd_args,
+ };
+ HRESULT hres =
+ device->dev->CreateCommandSignature(&cmd_sig_desc,
+ pipeline->base.root.sig,
+ IID_PPV_ARGS(&cmdsig));
+ if (FAILED(hres))
+ return NULL;
+
+ pipeline->indirect_cmd_sigs[type] = cmdsig;
+ return cmdsig;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateGraphicsPipelines(VkDevice dev,
+ VkPipelineCache pipelineCache,
+ uint32_t count,
+ const VkGraphicsPipelineCreateInfo *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
+{
+ VK_FROM_HANDLE(dzn_device, device, dev);
+ VkResult result = VK_SUCCESS;
+
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ result = dzn_graphics_pipeline_create(device,
+ pipelineCache,
+ &pCreateInfos[i],
+ pAllocator,
+ &pPipelines[i]);
+ if (result != VK_SUCCESS) {
+ pPipelines[i] = VK_NULL_HANDLE;
+
+ /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
+ * is not obvious what error should be report upon 2 different failures.
+ */
+ if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT)
+ break;
+
+ if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
+ break;
+ }
+ }
+
+ for (; i < count; i++)
+ pPipelines[i] = VK_NULL_HANDLE;
+
+ return result;
+}
+
+static void
+dzn_compute_pipeline_destroy(dzn_compute_pipeline *pipeline,
+ const VkAllocationCallbacks *alloc)
+{
+ if (!pipeline)
+ return;
+
+ if (pipeline->indirect_cmd_sig)
+ pipeline->indirect_cmd_sig->Release();
+
+ dzn_pipeline_finish(&pipeline->base);
+ vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
+}
+
+static VkResult
+dzn_compute_pipeline_create(dzn_device *device,
+ VkPipelineCache cache,
+ const VkComputePipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *out)
+{
+ VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
+
+ dzn_compute_pipeline *pipeline = (dzn_compute_pipeline *)
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!pipeline)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ dzn_pipeline_init(&pipeline->base, device,
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ layout);
+
+ D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {
+ .pRootSignature = pipeline->base.root.sig,
+ .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
+ };
+
+ VkResult ret =
+ dzn_pipeline_compile_shader(device, pAllocator, layout,
+ &pCreateInfo->stage,
+ DXIL_SPIRV_YZ_FLIP_NONE, 0, 0,
+ &desc.CS);
+ if (ret != VK_SUCCESS)
+ goto out;
+
+ if (FAILED(device->dev->CreateComputePipelineState(&desc,
+ IID_PPV_ARGS(&pipeline->base.state)))) {
+ ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto out;
+ }
+
+out:
+ free((void *)desc.CS.pShaderBytecode);
+ if (ret != VK_SUCCESS)
+ dzn_compute_pipeline_destroy(pipeline, pAllocator);
+ else
+ *out = dzn_compute_pipeline_to_handle(pipeline);
+
+ return ret;
+}
+
+ID3D12CommandSignature *
+dzn_compute_pipeline_get_indirect_cmd_sig(dzn_compute_pipeline *pipeline)
+{
+ if (pipeline->indirect_cmd_sig)
+ return pipeline->indirect_cmd_sig;
+
+ dzn_device *device =
+ container_of(pipeline->base.base.device, dzn_device, vk);
+
+ D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = {
+ {
+ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
+ .Constant = {
+ .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
+ .DestOffsetIn32BitValues = 0,
+ .Num32BitValuesToSet = 3,
+ },
+ },
+ {
+ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
+ },
+ };
+
+ D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = {
+ .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
+ .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args),
+ .pArgumentDescs = indirect_dispatch_args,
+ };
+
+ HRESULT hres =
+ device->dev->CreateCommandSignature(&indirect_dispatch_desc,
+ pipeline->base.root.sig,
+ IID_PPV_ARGS(&pipeline->indirect_cmd_sig));
+ if (FAILED(hres))
+ return NULL;
+
+ return pipeline->indirect_cmd_sig;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateComputePipelines(VkDevice dev,
+ VkPipelineCache pipelineCache,
+ uint32_t count,
+ const VkComputePipelineCreateInfo *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
+{
+ VK_FROM_HANDLE(dzn_device, device, dev);
+ VkResult result = VK_SUCCESS;
+
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ result = dzn_compute_pipeline_create(device,
+ pipelineCache,
+ &pCreateInfos[i],
+ pAllocator,
+ &pPipelines[i]);
+ if (result != VK_SUCCESS) {
+ pPipelines[i] = VK_NULL_HANDLE;
+
+ /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
+ * is not obvious what error should be report upon 2 different failures.
+ */
+ if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT)
+ break;
+
+ if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
+ break;
+ }
+ }
+
+ for (; i < count; i++)
+ pPipelines[i] = VK_NULL_HANDLE;
+
+ return result;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyPipeline(VkDevice device,
+ VkPipeline pipeline,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline);
+
+ if (!pipe)
+ return;
+
+ if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) {
+ dzn_graphics_pipeline *gfx = container_of(pipe, dzn_graphics_pipeline, base);
+ dzn_graphics_pipeline_destroy(gfx, pAllocator);
+ } else {
+ assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE);
+ dzn_compute_pipeline *compute = container_of(pipe, dzn_compute_pipeline, base);
+ dzn_compute_pipeline_destroy(compute, pAllocator);
+ }
+}
diff --git a/src/microsoft/vulkan/dzn_pipeline_cache.cpp b/src/microsoft/vulkan/dzn_pipeline_cache.cpp
new file mode 100644
index 00000000000..321ff840553
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_pipeline_cache.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include "vk_alloc.h"
+
+static void
+dzn_pipeline_cache_destroy(dzn_pipeline_cache *pcache,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (!pcache)
+ return;
+
+ dzn_device *device = container_of(pcache->base.device, dzn_device, vk);
+
+ vk_object_base_finish(&pcache->base);
+ vk_free2(&device->vk.alloc, pAllocator, pcache);
+}
+
+static VkResult
+dzn_pipeline_cache_create(dzn_device *device,
+ const VkPipelineCacheCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipelineCache *out)
+{
+ dzn_pipeline_cache *pcache = (dzn_pipeline_cache *)
+ vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pcache), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!pcache)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &pcache->base, VK_OBJECT_TYPE_PIPELINE_CACHE);
+
+ /* TODO: cache-ism! */
+
+ *out = dzn_pipeline_cache_to_handle(pcache);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreatePipelineCache(VkDevice device,
+ const VkPipelineCacheCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipelineCache *pPipelineCache)
+{
+ return dzn_pipeline_cache_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pPipelineCache);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyPipelineCache(VkDevice device,
+ VkPipelineCache pipelineCache,
+ const VkAllocationCallbacks *pAllocator)
+{
+ return dzn_pipeline_cache_destroy(dzn_pipeline_cache_from_handle(pipelineCache),
+ pAllocator);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_GetPipelineCacheData(VkDevice device,
+ VkPipelineCache pipelineCache,
+ size_t *pDataSize,
+ void *pData)
+{
+ // FIXME
+ *pDataSize = 0;
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_MergePipelineCaches(VkDevice device,
+ VkPipelineCache dstCache,
+ uint32_t srcCacheCount,
+ const VkPipelineCache *pSrcCaches)
+{
+ // FIXME
+ return VK_SUCCESS;
+}
diff --git a/src/microsoft/vulkan/dzn_private.h b/src/microsoft/vulkan/dzn_private.h
new file mode 100644
index 00000000000..2b4ccfe0f01
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_private.h
@@ -0,0 +1,1060 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef DZN_PRIVATE_H
+#define DZN_PRIVATE_H
+
+#include "vk_command_pool.h"
+#include "vk_command_buffer.h"
+#include "vk_cmd_queue.h"
+#include "vk_debug_report.h"
+#include "vk_device.h"
+#include "vk_image.h"
+#include "vk_log.h"
+#include "vk_physical_device.h"
+#include "vk_sync.h"
+#include "vk_queue.h"
+#include "vk_shader_module.h"
+#include "wsi_common.h"
+
+#include "util/bitset.h"
+#include "util/blob.h"
+#include "util/hash_table.h"
+#include "util/u_dynarray.h"
+#include "util/log.h"
+
+#include "shader_enums.h"
+
+#include "dzn_entrypoints.h"
+#include "dzn_nir.h"
+
+#include <vulkan/vulkan.h>
+#include <vulkan/vk_icd.h>
+
+#include <dxgi1_4.h>
+
+#define D3D12_IGNORE_SDK_LAYERS
+#include <directx/d3d12.h>
+#include <dxcapi.h>
+#include <wrl/client.h>
+
+#include "spirv_to_dxil.h"
+
+using Microsoft::WRL::ComPtr;
+
+#define DZN_SWAP(a, b) \
+ do { \
+ auto __tmp = a; \
+ a = b; \
+ b = __tmp; \
+ } while (0)
+
+#define dzn_stub() unreachable("Unsupported feature")
+
+struct dzn_instance;
+struct dzn_device;
+
+struct dzn_meta {
+ static void
+ compile_shader(struct dzn_device *pdev,
+ nir_shader *nir,
+ D3D12_SHADER_BYTECODE *slot);
+
+ struct dzn_device *device;
+};
+
+struct dzn_meta_indirect_draw {
+ ID3D12RootSignature *root_sig;
+ ID3D12PipelineState *pipeline_state;
+};
+
+enum dzn_index_type {
+ DZN_NO_INDEX,
+ DZN_INDEX_2B,
+ DZN_INDEX_4B,
+ DZN_NUM_INDEX_TYPE,
+};
+
+static inline enum dzn_index_type
+dzn_index_type_from_size(uint8_t index_size)
+{
+ switch (index_size) {
+ case 0: return DZN_NO_INDEX;
+ case 2: return DZN_INDEX_2B;
+ case 4: return DZN_INDEX_4B;
+ default: unreachable("Invalid index size");
+ }
+}
+
+static inline enum dzn_index_type
+dzn_index_type_from_dxgi_format(DXGI_FORMAT format)
+{
+ switch (format) {
+ case DXGI_FORMAT_UNKNOWN: return DZN_NO_INDEX;
+ case DXGI_FORMAT_R16_UINT: return DZN_INDEX_2B;
+ case DXGI_FORMAT_R32_UINT: return DZN_INDEX_4B;
+ default: unreachable("Invalid index format");
+ }
+}
+
+static inline uint8_t
+dzn_index_size(enum dzn_index_type type)
+{
+ switch (type) {
+ case DZN_NO_INDEX: return 0;
+ case DZN_INDEX_2B: return 2;
+ case DZN_INDEX_4B: return 4;
+ default: unreachable("Invalid index type");
+ }
+}
+
+struct dzn_meta_triangle_fan_rewrite_index {
+ ID3D12RootSignature *root_sig;
+ ID3D12PipelineState *pipeline_state;
+ ID3D12CommandSignature *cmd_sig;
+};
+
+struct dzn_meta_blit_key {
+ union {
+ struct {
+ DXGI_FORMAT out_format;
+ uint32_t samples : 6;
+ uint32_t loc : 4;
+ uint32_t out_type : 4;
+ uint32_t sampler_dim : 4;
+ uint32_t src_is_array : 1;
+ uint32_t resolve : 1;
+ uint32_t linear_filter : 1;
+ uint32_t padding : 11;
+ };
+ const uint64_t u64;
+ };
+};
+
+struct dzn_meta_blit {
+ ID3D12RootSignature *root_sig;
+ ID3D12PipelineState *pipeline_state;
+};
+
+struct dzn_meta_blits {
+ mtx_t shaders_lock;
+ D3D12_SHADER_BYTECODE vs;
+ struct hash_table *fs;
+ mtx_t contexts_lock;
+ struct hash_table_u64 *contexts;
+};
+
+const dzn_meta_blit *
+dzn_meta_blits_get_context(dzn_device *device, const dzn_meta_blit_key *key);
+
+#define MAX_SYNC_TYPES 2
+#define MAX_QUEUE_FAMILIES 3
+
+struct dzn_physical_device {
+ struct vk_physical_device vk;
+ struct list_head link;
+
+ struct vk_device_extension_table supported_extensions;
+ struct vk_physical_device_dispatch_table dispatch;
+
+ IDXGIAdapter1 *adapter;
+ DXGI_ADAPTER_DESC1 adapter_desc;
+
+ uint32_t queue_family_count;
+ struct {
+ VkQueueFamilyProperties props;
+ D3D12_COMMAND_QUEUE_DESC desc;
+ } queue_families[MAX_QUEUE_FAMILIES];
+
+ uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
+ uint8_t device_uuid[VK_UUID_SIZE];
+ uint8_t driver_uuid[VK_UUID_SIZE];
+
+ struct wsi_device wsi_device;
+
+ mtx_t dev_lock;
+ ID3D12Device1 *dev;
+ D3D_FEATURE_LEVEL feature_level;
+ D3D12_FEATURE_DATA_ARCHITECTURE1 architecture;
+ D3D12_FEATURE_DATA_D3D12_OPTIONS options;
+ VkPhysicalDeviceMemoryProperties memory;
+ D3D12_HEAP_FLAGS heap_flags_for_mem_type[VK_MAX_MEMORY_TYPES];
+ const struct vk_sync_type *sync_types[MAX_SYNC_TYPES + 1];
+ float timestamp_period;
+};
+
+D3D12_FEATURE_DATA_FORMAT_SUPPORT
+dzn_physical_device_get_format_support(dzn_physical_device *pdev,
+ VkFormat format);
+
+uint32_t
+dzn_physical_device_get_mem_type_mask_for_resource(const dzn_physical_device *pdev,
+ const D3D12_RESOURCE_DESC *desc);
+
+#define dzn_debug_ignored_stype(sType) \
+ mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
+
+IDXGIFactory4 *
+dxgi_get_factory(bool debug);
+
+IDxcValidator *
+dxil_get_validator(void);
+
+IDxcLibrary *
+dxc_get_library(void);
+
+IDxcCompiler *
+dxc_get_compiler(void);
+
+PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE
+d3d12_get_serialize_root_sig(void);
+
+void
+d3d12_enable_debug_layer();
+
+void
+d3d12_enable_gpu_validation();
+
+ID3D12Device1 *
+d3d12_create_device(IUnknown *adapter, bool experimental_features);
+
+struct dzn_queue {
+ struct vk_queue vk;
+
+ ID3D12CommandQueue *cmdqueue;
+ ID3D12Fence *fence;
+ uint64_t fence_point = 0;
+};
+
+struct dzn_device {
+ struct vk_device vk;
+ struct vk_device_extension_table enabled_extensions;
+ struct vk_device_dispatch_table cmd_dispatch;
+
+ ID3D12Device1 *dev;
+
+ struct dzn_meta_indirect_draw indirect_draws[DZN_NUM_INDIRECT_DRAW_TYPES];
+ struct dzn_meta_triangle_fan_rewrite_index triangle_fan[DZN_NUM_INDEX_TYPE];
+ struct dzn_meta_blits blits;
+
+ struct {
+#define DZN_QUERY_REFS_SECTION_SIZE 4096
+#define DZN_QUERY_REFS_ALL_ONES_OFFSET 0
+#define DZN_QUERY_REFS_ALL_ZEROS_OFFSET (DZN_QUERY_REFS_ALL_ONES_OFFSET + DZN_QUERY_REFS_SECTION_SIZE)
+#define DZN_QUERY_REFS_RES_SIZE (DZN_QUERY_REFS_ALL_ZEROS_OFFSET + DZN_QUERY_REFS_SECTION_SIZE)
+ ID3D12Resource *refs;
+ } queries;
+};
+
+void dzn_meta_finish(dzn_device *device);
+
+VkResult dzn_meta_init(dzn_device *device);
+
+const dzn_meta_blit *
+dzn_meta_blits_get_context(dzn_device *device,
+ const dzn_meta_blit_key *key);
+
+ID3D12RootSignature *
+dzn_device_create_root_sig(dzn_device *device,
+ const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc);
+
+struct dzn_device_memory {
+ struct vk_object_base base;
+
+ struct list_head link;
+
+ ID3D12Heap *heap;
+ VkDeviceSize size;
+ D3D12_RESOURCE_STATES initial_state; /* initial state for this memory type */
+
+ /* A buffer-resource spanning the entire heap, used for mapping memory */
+ ID3D12Resource *map_res;
+
+ VkDeviceSize map_size;
+ void *map;
+};
+
+enum dzn_cmd_bindpoint_dirty {
+ DZN_CMD_BINDPOINT_DIRTY_PIPELINE = 1 << 0,
+ DZN_CMD_BINDPOINT_DIRTY_HEAPS = 1 << 1,
+ DZN_CMD_BINDPOINT_DIRTY_SYSVALS = 1 << 2,
+};
+
+enum dzn_cmd_dirty {
+ DZN_CMD_DIRTY_VIEWPORTS = 1 << 0,
+ DZN_CMD_DIRTY_SCISSORS = 1 << 1,
+ DZN_CMD_DIRTY_IB = 1 << 2,
+ DZN_CMD_DIRTY_STENCIL_REF = 1 << 3,
+ DZN_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 4,
+ DZN_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 5,
+};
+
+#define MAX_VBS 16
+#define MAX_VP 16
+#define MAX_SCISSOR 16
+#define MAX_SETS 4
+#define MAX_DYNAMIC_UNIFORM_BUFFERS 8
+#define MAX_DYNAMIC_STORAGE_BUFFERS 4
+#define MAX_DYNAMIC_BUFFERS \
+ (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
+#define MAX_PUSH_CONSTANT_DWORDS 32
+
+#define NUM_BIND_POINT VK_PIPELINE_BIND_POINT_COMPUTE + 1
+#define NUM_POOL_TYPES D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER + 1
+
+#define dzn_foreach_pool_type(type) \
+ for (D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; \
+ type <= D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; \
+ type = (D3D12_DESCRIPTOR_HEAP_TYPE)(type + 1))
+
+struct dzn_cmd_event_signal {
+ struct dzn_event *event;
+ bool value;
+};
+
+struct dzn_cmd_buffer;
+
+struct dzn_attachment {
+ uint32_t idx;
+ VkFormat format;
+ uint32_t samples;
+ union {
+ bool color;
+ struct {
+ bool depth;
+ bool stencil;
+ };
+ } clear;
+ D3D12_RESOURCE_STATES before, last, after;
+};
+
+struct dzn_attachment_ref {
+ uint32_t idx;
+ D3D12_RESOURCE_STATES before, during;
+};
+
+struct dzn_descriptor_state {
+ struct {
+ const struct dzn_descriptor_set *set;
+ uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
+ } sets[MAX_SETS];
+ struct dzn_descriptor_heap *heaps[NUM_POOL_TYPES];
+};
+
+struct dzn_sampler;
+struct dzn_image_view;
+
+struct dzn_buffer_desc {
+ VkDescriptorType type;
+ const struct dzn_buffer *buffer;
+ VkDeviceSize range;
+ VkDeviceSize offset;
+};
+
+struct dzn_descriptor_heap {
+ ID3D12Device *dev;
+ ID3D12DescriptorHeap *heap;
+ D3D12_DESCRIPTOR_HEAP_TYPE type;
+ SIZE_T cpu_base;
+ uint64_t gpu_base;
+ uint32_t desc_count;
+ uint32_t desc_sz;
+};
+
+bool
+dzn_descriptor_type_depends_on_shader_usage(VkDescriptorType type);
+
+D3D12_CPU_DESCRIPTOR_HANDLE
+dzn_descriptor_heap_get_cpu_handle(const dzn_descriptor_heap *heap, uint32_t slot);
+
+D3D12_GPU_DESCRIPTOR_HANDLE
+dzn_descriptor_heap_get_gpu_handle(const dzn_descriptor_heap *heap, uint32_t slot);
+
+void
+dzn_descriptor_heap_write_image_view_desc(dzn_descriptor_heap *heap,
+ uint32_t heap_offset,
+ bool writeable,
+ bool cube_as_2darray,
+ const dzn_image_view *iview);
+
+void
+dzn_descriptor_heap_write_buffer_desc(dzn_descriptor_heap *heap,
+ uint32_t heap_offset,
+ bool writeable,
+ const dzn_buffer_desc *bdesc);
+
+void
+dzn_descriptor_heap_copy(dzn_descriptor_heap *dst_heap, uint32_t dst_heap_offset,
+ const dzn_descriptor_heap *src_heap, uint32_t src_heap_offset,
+ uint32_t desc_count);
+
+VkResult
+dzn_descriptor_heap_init(dzn_descriptor_heap *heap, dzn_device *device,
+ D3D12_DESCRIPTOR_HEAP_TYPE type, uint32_t count,
+ bool shader_visible);
+
+void
+dzn_descriptor_heap_finish(dzn_descriptor_heap *heap);
+
+struct dzn_descriptor_heap_pool_entry {
+ struct list_head link;
+ dzn_descriptor_heap heap;
+};
+
+struct dzn_descriptor_heap_pool {
+ const VkAllocationCallbacks *alloc;
+ D3D12_DESCRIPTOR_HEAP_TYPE type;
+ bool shader_visible;
+ struct list_head active_heaps, free_heaps;
+ uint32_t offset;
+ uint32_t desc_sz;
+};
+
+void
+dzn_descriptor_heap_pool_init(dzn_descriptor_heap_pool *pool,
+ dzn_device *device,
+ D3D12_DESCRIPTOR_HEAP_TYPE type,
+ bool shader_visible,
+ const VkAllocationCallbacks *alloc);
+
+void
+dzn_descriptor_heap_pool_finish(dzn_descriptor_heap_pool *pool);
+
+void
+dzn_descriptor_heap_pool_reset(dzn_descriptor_heap_pool *pool);
+
+VkResult
+dzn_descriptor_heap_pool_alloc_slots(dzn_descriptor_heap_pool *pool,
+ dzn_device *device,
+ uint32_t num_slots,
+ dzn_descriptor_heap **heap,
+ uint32_t *first_slot);
+
+struct dzn_cmd_buffer_query_range {
+ struct dzn_query_pool *qpool;
+ uint32_t start, count;
+};
+
+struct dzn_cmd_buffer_query_pool_state {
+ struct util_dynarray reset, collect, wait, signal;
+};
+
+struct dzn_internal_resource {
+ struct list_head link;
+ ID3D12Resource *res;
+};
+
+enum dzn_event_state {
+ DZN_EVENT_STATE_EXTERNAL_WAIT = -1,
+ DZN_EVENT_STATE_RESET = 0,
+ DZN_EVENT_STATE_SET = 1,
+};
+
+struct dzn_cmd_buffer_push_constant_state {
+ uint32_t offset;
+ uint32_t end;
+ uint32_t values[MAX_PUSH_CONSTANT_DWORDS];
+};
+
+struct dzn_cmd_buffer_state {
+ struct dzn_framebuffer *framebuffer;
+ D3D12_RECT render_area;
+ const struct dzn_pipeline *pipeline;
+ dzn_descriptor_heap *heaps[NUM_POOL_TYPES];
+ struct dzn_render_pass *pass;
+ struct {
+ BITSET_DECLARE(dirty, MAX_VBS);
+ D3D12_VERTEX_BUFFER_VIEW views[MAX_VBS];
+ } vb;
+ struct {
+ D3D12_INDEX_BUFFER_VIEW view;
+ } ib;
+ struct {
+ struct {
+ struct {
+ uint32_t ref, compare_mask, write_mask;
+ } front, back;
+ } stencil_test;
+ } zsa;
+ D3D12_VIEWPORT viewports[MAX_VP];
+ D3D12_RECT scissors[MAX_SCISSOR];
+ struct {
+ struct dzn_cmd_buffer_push_constant_state gfx, compute;
+ } push_constant;
+ uint32_t dirty;
+ uint32_t subpass;
+ struct {
+ struct dzn_pipeline *pipeline;
+ struct dzn_descriptor_state desc_state;
+ uint32_t dirty;
+ } bindpoint[NUM_BIND_POINT];
+ union {
+ struct dxil_spirv_vertex_runtime_data gfx;
+ struct dxil_spirv_compute_runtime_data compute;
+ } sysvals;
+};
+
+struct dzn_cmd_buffer_rtv_key {
+ const struct dzn_image *image;
+ D3D12_RENDER_TARGET_VIEW_DESC desc;
+};
+
+struct dzn_cmd_buffer_rtv_entry {
+ struct dzn_cmd_buffer_rtv_key key;
+ D3D12_CPU_DESCRIPTOR_HANDLE handle;
+};
+
+struct dzn_cmd_buffer_dsv_key {
+ const struct dzn_image *image;
+ D3D12_DEPTH_STENCIL_VIEW_DESC desc;
+};
+
+struct dzn_cmd_buffer_dsv_entry {
+ struct dzn_cmd_buffer_dsv_key key;
+ D3D12_CPU_DESCRIPTOR_HANDLE handle;
+};
+
+struct dzn_cmd_buffer {
+ struct vk_command_buffer vk;
+ VkResult error;
+ struct dzn_cmd_buffer_state state;
+
+ struct {
+ struct hash_table *ht;
+ struct util_dynarray reset;
+ struct util_dynarray wait;
+ struct util_dynarray signal;
+ } queries;
+
+ struct {
+ struct hash_table *ht;
+ struct util_dynarray wait;
+ struct util_dynarray signal;
+ } events;
+
+ struct {
+ struct hash_table *ht;
+ dzn_descriptor_heap_pool pool;
+ } rtvs, dsvs;
+
+ dzn_descriptor_heap_pool cbv_srv_uav_pool, sampler_pool;
+
+ struct list_head internal_bufs;
+
+ ID3D12CommandAllocator *cmdalloc;
+ ID3D12GraphicsCommandList1 *cmdlist;
+};
+
+struct dzn_descriptor_pool {
+ struct vk_object_base base;
+ VkAllocationCallbacks alloc;
+
+ uint32_t set_count;
+ uint32_t used_set_count;
+ dzn_descriptor_set *sets;
+ dzn_descriptor_heap heaps[NUM_POOL_TYPES];
+ uint32_t desc_count[NUM_POOL_TYPES];
+ uint32_t used_desc_count[NUM_POOL_TYPES];
+ uint32_t free_offset[NUM_POOL_TYPES];
+ mtx_t defragment_lock;
+};
+
+#define MAX_SHADER_VISIBILITIES (D3D12_SHADER_VISIBILITY_PIXEL + 1)
+
+struct dzn_descriptor_set_layout_binding {
+ VkDescriptorType type;
+ D3D12_SHADER_VISIBILITY visibility;
+ uint32_t base_shader_register;
+ uint32_t range_idx[NUM_POOL_TYPES];
+ union {
+ struct {
+ uint32_t static_sampler_idx;
+ uint32_t immutable_sampler_idx;
+ };
+ uint32_t dynamic_buffer_idx;
+ };
+};
+
+struct dzn_descriptor_set_layout {
+ struct vk_object_base base;
+ uint32_t range_count[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES];
+ const D3D12_DESCRIPTOR_RANGE1 *ranges[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES];
+ uint32_t range_desc_count[NUM_POOL_TYPES];
+ uint32_t static_sampler_count;
+ const D3D12_STATIC_SAMPLER_DESC *static_samplers;
+ uint32_t immutable_sampler_count;
+ const dzn_sampler **immutable_samplers;
+ struct {
+ uint32_t bindings[MAX_DYNAMIC_BUFFERS];
+ uint32_t count;
+ uint32_t desc_count;
+ uint32_t range_offset;
+ } dynamic_buffers;
+ uint32_t binding_count;
+ const struct dzn_descriptor_set_layout_binding *bindings;
+};
+
+struct dzn_descriptor_set {
+ struct vk_object_base base;
+ struct dzn_buffer_desc dynamic_buffers[MAX_DYNAMIC_BUFFERS];
+ dzn_descriptor_pool *pool;
+ uint32_t heap_offsets[NUM_POOL_TYPES];
+ uint32_t heap_sizes[NUM_POOL_TYPES];
+ const struct dzn_descriptor_set_layout *layout;
+};
+
+struct dzn_pipeline_layout {
+ struct vk_object_base base;
+ int32_t refcount;
+ struct {
+ uint32_t heap_offsets[NUM_POOL_TYPES];
+ struct {
+ uint32_t srv, uav;
+ } dynamic_buffer_heap_offsets[MAX_DYNAMIC_BUFFERS];
+ uint32_t dynamic_buffer_count;
+ uint32_t range_desc_count[NUM_POOL_TYPES];
+ } sets[MAX_SETS];
+ dxil_spirv_vulkan_descriptor_set binding_translation[MAX_SETS];
+ uint32_t set_count;
+ uint32_t desc_count[NUM_POOL_TYPES];
+ struct {
+ uint32_t param_count;
+ uint32_t sets_param_count;
+ uint32_t sysval_cbv_param_idx;
+ uint32_t push_constant_cbv_param_idx;
+ D3D12_DESCRIPTOR_HEAP_TYPE type[MAX_SHADER_VISIBILITIES];
+ ID3D12RootSignature *sig;
+ } root;
+};
+
+dzn_pipeline_layout *
+dzn_pipeline_layout_ref(dzn_pipeline_layout *layout);
+
+void
+dzn_pipeline_layout_unref(dzn_pipeline_layout *layout);
+
+#define MAX_RTS 8
+#define MAX_INPUT_ATTACHMENTS 4
+
+struct dzn_subpass {
+ uint32_t color_count;
+ struct dzn_attachment_ref colors[MAX_RTS];
+ struct dzn_attachment_ref resolve[MAX_RTS];
+ struct dzn_attachment_ref zs;
+ uint32_t input_count;
+ struct dzn_attachment_ref inputs[MAX_INPUT_ATTACHMENTS];
+};
+
+struct dzn_render_pass {
+ struct vk_object_base base;
+ uint32_t attachment_count;
+ struct dzn_attachment *attachments;
+ uint32_t subpass_count;
+ struct dzn_subpass *subpasses;
+};
+
+struct dzn_pipeline_cache {
+ struct vk_object_base base;
+};
+
+enum dzn_register_space {
+ DZN_REGISTER_SPACE_SYSVALS = MAX_SETS,
+ DZN_REGISTER_SPACE_PUSH_CONSTANT,
+};
+
+class dzn_shader_blob : public IDxcBlob {
+public:
+ dzn_shader_blob(void *buf, size_t sz) : data(buf), size(sz) {}
+
+ LPVOID STDMETHODCALLTYPE GetBufferPointer(void) override { return data; }
+
+ SIZE_T STDMETHODCALLTYPE GetBufferSize() override { return size; }
+
+ HRESULT STDMETHODCALLTYPE QueryInterface(REFIID, void**) override { return E_NOINTERFACE; }
+
+ ULONG STDMETHODCALLTYPE AddRef() override { return 1; }
+
+ ULONG STDMETHODCALLTYPE Release() override { return 0; }
+
+ void *data;
+ size_t size;
+};
+
+struct dzn_pipeline {
+ struct vk_object_base base;
+ VkPipelineBindPoint type;
+ dzn_device *device;
+ struct {
+ uint32_t sets_param_count;
+ uint32_t sysval_cbv_param_idx;
+ uint32_t push_constant_cbv_param_idx;
+ D3D12_DESCRIPTOR_HEAP_TYPE type[MAX_SHADER_VISIBILITIES];
+ ID3D12RootSignature *sig;
+ } root;
+ struct {
+ uint32_t heap_offsets[NUM_POOL_TYPES];
+ struct {
+ uint32_t srv, uav;
+ } dynamic_buffer_heap_offsets[MAX_DYNAMIC_BUFFERS];
+ uint32_t dynamic_buffer_count;
+ uint32_t range_desc_count[NUM_POOL_TYPES];
+ } sets[MAX_SETS];
+ uint32_t desc_count[NUM_POOL_TYPES];
+ ID3D12PipelineState *state;
+};
+
+enum dzn_indirect_draw_cmd_sig_type {
+ DZN_INDIRECT_DRAW_CMD_SIG,
+ DZN_INDIRECT_INDEXED_DRAW_CMD_SIG,
+ DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG,
+ DZN_NUM_INDIRECT_DRAW_CMD_SIGS,
+};
+
+struct dzn_graphics_pipeline {
+ dzn_pipeline base;
+ struct {
+ unsigned count;
+ uint32_t strides[MAX_VBS];
+ } vb;
+
+ struct {
+ bool triangle_fan;
+ D3D_PRIMITIVE_TOPOLOGY topology;
+ } ia;
+
+ struct {
+ unsigned count;
+ bool dynamic;
+ D3D12_VIEWPORT desc[MAX_VP];
+ } vp;
+
+ struct {
+ unsigned count;
+ bool dynamic;
+ D3D12_RECT desc[MAX_SCISSOR];
+ } scissor;
+
+ struct {
+ struct {
+ bool enable;
+ bool independent_front_back;
+ bool dynamic_ref;
+ bool dynamic_write_mask;
+ bool dynamic_compare_mask;
+ struct {
+ uint32_t ref;
+ uint32_t write_mask;
+ uint32_t compare_mask;
+ bool uses_ref;
+ } front, back;
+ } stencil_test;
+ } zsa;
+
+ ID3D12CommandSignature *indirect_cmd_sigs[DZN_NUM_INDIRECT_DRAW_CMD_SIGS];
+};
+
+ID3D12CommandSignature *
+dzn_graphics_pipeline_get_indirect_cmd_sig(dzn_graphics_pipeline *pipeline,
+ enum dzn_indirect_draw_cmd_sig_type cmd_sig_type);
+
+struct dzn_compute_pipeline {
+ dzn_pipeline base;
+ struct {
+ uint32_t x, y, z;
+ } local_size;
+
+ ID3D12CommandSignature *indirect_cmd_sig;
+};
+
+ID3D12CommandSignature *
+dzn_compute_pipeline_get_indirect_cmd_sig(dzn_compute_pipeline *pipeline);
+
+#define MAX_MIP_LEVELS 14
+
+struct dzn_image {
+ struct vk_image vk;
+
+ struct {
+ uint32_t row_stride = 0;
+ uint32_t size = 0;
+ } linear;
+ D3D12_RESOURCE_DESC desc;
+ ID3D12Resource *res;
+ dzn_device_memory *mem;
+ VkDeviceSize mem_offset;
+};
+
+DXGI_FORMAT
+dzn_image_get_dxgi_format(VkFormat format,
+ VkImageUsageFlags usage,
+ VkImageAspectFlags aspects);
+
+VkFormat
+dzn_image_get_plane_format(VkFormat fmt, VkImageAspectFlags aspect);
+
+DXGI_FORMAT
+dzn_image_get_placed_footprint_format(VkFormat fmt, VkImageAspectFlags aspect);
+
+D3D12_DEPTH_STENCIL_VIEW_DESC
+dzn_image_get_dsv_desc(const dzn_image *image,
+ const VkImageSubresourceRange *range,
+ uint32_t level);
+
+D3D12_RENDER_TARGET_VIEW_DESC
+dzn_image_get_rtv_desc(const dzn_image *image,
+ const VkImageSubresourceRange *range,
+ uint32_t level);
+
+D3D12_RESOURCE_STATES
+dzn_image_layout_to_state(VkImageLayout layout);
+
+uint32_t
+dzn_image_layers_get_subresource_index(const dzn_image *image,
+ const VkImageSubresourceLayers *subres,
+ VkImageAspectFlagBits aspect,
+ uint32_t layer);
+uint32_t
+dzn_image_range_get_subresource_index(const dzn_image *image,
+ const VkImageSubresourceRange *range,
+ VkImageAspectFlagBits aspect,
+ uint32_t level, uint32_t layer);
+
+uint32_t
+dzn_image_get_subresource_index(const dzn_image *image,
+ const VkImageSubresource *subres,
+ VkImageAspectFlagBits aspect);
+
+D3D12_TEXTURE_COPY_LOCATION
+dzn_image_get_copy_loc(const dzn_image *image,
+ const VkImageSubresourceLayers *layers,
+ VkImageAspectFlagBits aspect,
+ uint32_t layer);
+
+struct dzn_image_view {
+ struct vk_image_view vk;
+ D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
+ D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
+ D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
+ D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc;
+};
+
+void
+dzn_image_view_init(dzn_device *device,
+ dzn_image_view *iview,
+ const VkImageViewCreateInfo *info);
+
+void
+dzn_image_view_finish(dzn_image_view *iview);
+
+struct dzn_buffer {
+ struct vk_object_base base;
+
+ VkDeviceSize size;
+
+ D3D12_RESOURCE_DESC desc;
+ ID3D12Resource *res;
+
+ VkBufferCreateFlags create_flags;
+ VkBufferUsageFlags usage;
+};
+
+DXGI_FORMAT
+dzn_buffer_get_dxgi_format(VkFormat format);
+
+D3D12_TEXTURE_COPY_LOCATION
+dzn_buffer_get_copy_loc(const dzn_buffer *buf, VkFormat format,
+ const VkBufferImageCopy2KHR *info,
+ VkImageAspectFlagBits aspect,
+ uint32_t layer);
+
+D3D12_TEXTURE_COPY_LOCATION
+dzn_buffer_get_line_copy_loc(const dzn_buffer *buf, VkFormat format,
+ const VkBufferImageCopy2KHR *region,
+ const D3D12_TEXTURE_COPY_LOCATION *loc,
+ uint32_t y, uint32_t z, uint32_t *start_x);
+
+bool
+dzn_buffer_supports_region_copy(const D3D12_TEXTURE_COPY_LOCATION *loc);
+
+struct dzn_buffer_view {
+ struct vk_object_base base;
+
+ const dzn_buffer *buffer;
+
+ D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
+ D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
+};
+
+struct dzn_framebuffer {
+ struct vk_object_base base;
+
+ uint32_t width, height, layers;
+
+ uint32_t attachment_count;
+ struct dzn_image_view **attachments;
+};
+
+struct dzn_sampler {
+ struct vk_object_base base;
+ D3D12_SAMPLER_DESC desc;
+ D3D12_STATIC_BORDER_COLOR static_border_color = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK;
+};
+
+/* This is defined as a macro so that it works for both
+ * VkImageSubresourceRange and VkImageSubresourceLayers
+ */
+#define dzn_get_layer_count(_image, _range) \
+ ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
+ (_image)->vk.array_layers - (_range)->baseArrayLayer : (_range)->layerCount)
+
+#define dzn_get_level_count(_image, _range) \
+ ((_range)->levelCount == VK_REMAINING_MIP_LEVELS ? \
+ (_image)->vk.mip_levels - (_range)->baseMipLevel : (_range)->levelCount)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+DXGI_FORMAT dzn_pipe_to_dxgi_format(enum pipe_format in);
+D3D12_FILTER dzn_translate_sampler_filter(const VkSamplerCreateInfo *create_info);
+D3D12_COMPARISON_FUNC dzn_translate_compare_op(VkCompareOp in);
+void dzn_translate_viewport(D3D12_VIEWPORT *out, const VkViewport *in);
+void dzn_translate_rect(D3D12_RECT *out, const VkRect2D *in);
+#ifdef __cplusplus
+}
+#endif
+
+#define dzn_foreach_aspect(aspect, mask) \
+ for (VkImageAspectFlagBits aspect = VK_IMAGE_ASPECT_COLOR_BIT; \
+ aspect <= VK_IMAGE_ASPECT_STENCIL_BIT; \
+ aspect = (VkImageAspectFlagBits)(aspect << 1)) \
+ if (mask & aspect)
+
+VkResult dzn_wsi_init(struct dzn_physical_device *physical_device);
+void dzn_wsi_finish(struct dzn_physical_device *physical_device);
+
+struct dzn_app_info {
+ const char *app_name;
+ uint32_t app_version;
+ const char *engine_name;
+ uint32_t engine_version;
+ uint32_t api_version;
+};
+
+enum dzn_debug_flags {
+ DZN_DEBUG_SYNC = 1 << 0,
+ DZN_DEBUG_NIR = 1 << 1,
+ DZN_DEBUG_DXIL = 1 << 2,
+ DZN_DEBUG_WARP = 1 << 3,
+ DZN_DEBUG_INTERNAL = 1 << 4,
+ DZN_DEBUG_SIG = 1 << 5,
+ DZN_DEBUG_GBV = 1 << 6,
+};
+
+struct dzn_instance {
+ struct vk_instance vk;
+
+ struct {
+ IDxcValidator *validator;
+ IDxcLibrary *library;
+ IDxcCompiler *compiler;
+ } dxc;
+ struct {
+ PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE serialize_root_sig;
+ } d3d12;
+ bool physical_devices_enumerated;
+ uint32_t debug_flags;
+
+ struct list_head physical_devices;
+};
+
+struct dzn_event {
+ struct vk_object_base base;
+ ID3D12Fence *fence;
+};
+
+struct dzn_sync {
+ struct vk_sync vk;
+ ID3D12Fence *fence;
+};
+
+extern const struct vk_sync_type dzn_sync_type;
+
+struct dzn_query {
+ D3D12_QUERY_TYPE type;
+ ID3D12Fence *fence;
+ uint64_t fence_value;
+};
+
+struct dzn_query_pool {
+ struct vk_object_base base;
+
+ D3D12_QUERY_HEAP_TYPE heap_type;
+ ID3D12QueryHeap *heap;
+ uint32_t query_count;
+ struct dzn_query *queries;
+ mtx_t queries_lock;
+ ID3D12Resource *resolve_buffer;
+ ID3D12Resource *collect_buffer;
+ VkQueryPipelineStatisticFlags pipeline_statistics;
+ uint32_t query_size;
+ uint64_t *collect_map;
+};
+
+D3D12_QUERY_TYPE
+dzn_query_pool_get_query_type(const dzn_query_pool *qpool, VkQueryControlFlags flag);
+
+uint32_t
+dzn_query_pool_get_result_offset(const dzn_query_pool *qpool, uint32_t query);
+
+uint32_t
+dzn_query_pool_get_availability_offset(const dzn_query_pool *qpool, uint32_t query);
+
+uint32_t
+dzn_query_pool_get_result_size(const dzn_query_pool *qpool, uint32_t count);
+
+VK_DEFINE_HANDLE_CASTS(dzn_cmd_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER)
+VK_DEFINE_HANDLE_CASTS(dzn_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
+VK_DEFINE_HANDLE_CASTS(dzn_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
+VK_DEFINE_HANDLE_CASTS(dzn_physical_device, vk.base, VkPhysicalDevice, VK_OBJECT_TYPE_PHYSICAL_DEVICE)
+VK_DEFINE_HANDLE_CASTS(dzn_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_buffer, base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_buffer_view, base, VkBufferView, VK_OBJECT_TYPE_BUFFER_VIEW)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_device_memory, base, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_descriptor_set, base, VkDescriptorSet, VK_OBJECT_TYPE_DESCRIPTOR_SET)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_descriptor_set_layout, base, VkDescriptorSetLayout, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_framebuffer, base, VkFramebuffer, VK_OBJECT_TYPE_FRAMEBUFFER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_image_view, vk.base, VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_graphics_pipeline, base.base, VkPipeline, VK_OBJECT_TYPE_PIPELINE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_compute_pipeline, base.base, VkPipeline, VK_OBJECT_TYPE_PIPELINE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_pipeline_cache, base, VkPipelineCache, VK_OBJECT_TYPE_PIPELINE_CACHE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_pipeline_layout, base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_query_pool, base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_render_pass, base, VkRenderPass, VK_OBJECT_TYPE_RENDER_PASS)
+VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_sampler, base, VkSampler, VK_OBJECT_TYPE_SAMPLER)
+
+#endif /* DZN_PRIVATE_H */
diff --git a/src/microsoft/vulkan/dzn_query.cpp b/src/microsoft/vulkan/dzn_query.cpp
new file mode 100644
index 00000000000..c16ae0abe39
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_query.cpp
@@ -0,0 +1,327 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include "vk_alloc.h"
+#include "vk_debug_report.h"
+#include "vk_util.h"
+
+static D3D12_QUERY_HEAP_TYPE
+dzn_query_pool_get_heap_type(VkQueryType in)
+{
+ switch (in) {
+ case VK_QUERY_TYPE_OCCLUSION: return D3D12_QUERY_HEAP_TYPE_OCCLUSION;
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
+ case VK_QUERY_TYPE_TIMESTAMP: return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
+ default: unreachable("Unsupported query type");
+ }
+}
+
+D3D12_QUERY_TYPE
+dzn_query_pool_get_query_type(const dzn_query_pool *qpool,
+ VkQueryControlFlags flags)
+{
+ switch (qpool->heap_type) {
+ case D3D12_QUERY_HEAP_TYPE_OCCLUSION:
+ return flags & VK_QUERY_CONTROL_PRECISE_BIT ?
+ D3D12_QUERY_TYPE_OCCLUSION : D3D12_QUERY_TYPE_BINARY_OCCLUSION;
+ case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
+ case D3D12_QUERY_HEAP_TYPE_TIMESTAMP: return D3D12_QUERY_TYPE_TIMESTAMP;
+ default: unreachable("Unsupported query type");
+ }
+}
+
+static void
+dzn_query_pool_destroy(dzn_query_pool *qpool,
+ const VkAllocationCallbacks *alloc)
+{
+ if (!qpool)
+ return;
+
+ dzn_device *device = container_of(qpool->base.device, dzn_device, vk);
+
+ if (qpool->collect_map)
+ qpool->collect_buffer->Unmap(0, NULL);
+
+ if (qpool->collect_buffer)
+ qpool->collect_buffer->Release();
+
+ if (qpool->resolve_buffer)
+ qpool->resolve_buffer->Release();
+
+ if (qpool->heap)
+ qpool->heap->Release();
+
+ for (uint32_t q = 0; q < qpool->query_count; q++) {
+ if (qpool->queries[q].fence)
+ qpool->queries[q].fence->Release();
+ }
+
+ mtx_destroy(&qpool->queries_lock);
+ vk_object_base_finish(&qpool->base);
+ vk_free2(&device->vk.alloc, alloc, qpool);
+}
+
+static VkResult
+dzn_query_pool_create(dzn_device *device,
+ const VkQueryPoolCreateInfo *info,
+ const VkAllocationCallbacks *alloc,
+ VkQueryPool *out)
+{
+ VK_MULTIALLOC(ma);
+ VK_MULTIALLOC_DECL(&ma, dzn_query_pool, qpool, 1);
+ VK_MULTIALLOC_DECL(&ma, dzn_query, queries, info->queryCount);
+
+ if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &qpool->base, VK_OBJECT_TYPE_QUERY_POOL);
+
+ mtx_init(&qpool->queries_lock, mtx_plain);
+ qpool->query_count = info->queryCount;
+ qpool->queries = queries;
+
+ D3D12_QUERY_HEAP_DESC desc = { 0 };
+ qpool->heap_type = desc.Type = dzn_query_pool_get_heap_type(info->queryType);
+ desc.Count = info->queryCount;
+ desc.NodeMask = 0;
+
+ HRESULT hres =
+ device->dev->CreateQueryHeap(&desc, IID_PPV_ARGS(&qpool->heap));
+ if (FAILED(hres)) {
+ dzn_query_pool_destroy(qpool, alloc);
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+
+ switch (info->queryType) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ case VK_QUERY_TYPE_TIMESTAMP:
+ qpool->query_size = sizeof(uint64_t);
+ break;
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+ qpool->pipeline_statistics = info->pipelineStatistics;
+ qpool->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
+ break;
+ default: unreachable("Unsupported query type");
+ }
+
+ D3D12_HEAP_PROPERTIES hprops =
+ device->dev->GetCustomHeapProperties(0, D3D12_HEAP_TYPE_DEFAULT);
+ D3D12_RESOURCE_DESC rdesc = {
+ .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
+ .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
+ .Width = info->queryCount * qpool->query_size,
+ .Height = 1,
+ .DepthOrArraySize = 1,
+ .MipLevels = 1,
+ .Format = DXGI_FORMAT_UNKNOWN,
+ .SampleDesc = { .Count = 1, .Quality = 0 },
+ .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
+ .Flags = D3D12_RESOURCE_FLAG_NONE,
+ };
+
+ hres = device->dev->CreateCommittedResource(&hprops,
+ D3D12_HEAP_FLAG_NONE,
+ &rdesc,
+ D3D12_RESOURCE_STATE_COPY_DEST,
+ NULL, IID_PPV_ARGS(&qpool->resolve_buffer));
+ if (FAILED(hres)) {
+ dzn_query_pool_destroy(qpool, alloc);
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+
+ hprops = device->dev->GetCustomHeapProperties(0, D3D12_HEAP_TYPE_READBACK);
+ rdesc.Width = info->queryCount * (qpool->query_size + sizeof(uint64_t));
+ hres = device->dev->CreateCommittedResource(&hprops,
+ D3D12_HEAP_FLAG_NONE,
+ &rdesc,
+ D3D12_RESOURCE_STATE_COPY_DEST,
+ NULL, IID_PPV_ARGS(&qpool->collect_buffer));
+ if (FAILED(hres)) {
+ dzn_query_pool_destroy(qpool, alloc);
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+
+ hres = qpool->collect_buffer->Map(0, NULL, (void **)&qpool->collect_map);
+ if (FAILED(hres)) {
+ dzn_query_pool_destroy(qpool, alloc);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ memset(qpool->collect_map, 0, rdesc.Width);
+
+ *out = dzn_query_pool_to_handle(qpool);
+ return VK_SUCCESS;
+}
+
+uint32_t
+dzn_query_pool_get_result_offset(const dzn_query_pool *qpool, uint32_t query)
+{
+ return query * qpool->query_size;
+}
+
+uint32_t
+dzn_query_pool_get_result_size(const dzn_query_pool *qpool, uint32_t query_count)
+{
+ return query_count * qpool->query_size;
+}
+
+uint32_t
+dzn_query_pool_get_availability_offset(const dzn_query_pool *qpool, uint32_t query)
+{
+ return (qpool->query_count * qpool->query_size) + (sizeof(uint64_t) * query);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_CreateQueryPool(VkDevice device,
+ const VkQueryPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkQueryPool *pQueryPool)
+{
+ return dzn_query_pool_create(dzn_device_from_handle(device),
+ pCreateInfo, pAllocator, pQueryPool);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_DestroyQueryPool(VkDevice device,
+ VkQueryPool queryPool,
+ const VkAllocationCallbacks *pAllocator)
+{
+ dzn_query_pool_destroy(dzn_query_pool_from_handle(queryPool), pAllocator);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+dzn_ResetQueryPool(VkDevice device,
+ VkQueryPool queryPool,
+ uint32_t firstQuery,
+ uint32_t queryCount)
+{
+ VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
+
+ mtx_lock(&qpool->queries_lock);
+ for (uint32_t q = 0; q < queryCount; q++) {
+ dzn_query *query = &qpool->queries[firstQuery + q];
+
+ query->fence_value = 0;
+ if (query->fence) {
+ query->fence->Release();
+ query->fence = NULL;
+ }
+ }
+ mtx_lock(&qpool->queries_lock);
+
+ memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_result_offset(qpool, firstQuery),
+ 0, queryCount * qpool->query_size);
+ memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_availability_offset(qpool, firstQuery),
+ 0, queryCount * sizeof(uint64_t));
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+dzn_GetQueryPoolResults(VkDevice device,
+ VkQueryPool queryPool,
+ uint32_t firstQuery,
+ uint32_t queryCount,
+ size_t dataSize,
+ void *pData,
+ VkDeviceSize stride,
+ VkQueryResultFlags flags)
+{
+ VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
+
+ uint32_t step = (flags & VK_QUERY_RESULT_64_BIT) ?
+ sizeof(uint64_t) : sizeof(uint32_t);
+ VkResult result = VK_SUCCESS;
+
+ for (uint32_t q = 0; q < queryCount; q++) {
+ dzn_query *query = &qpool->queries[q + firstQuery];
+
+ uint8_t *dst_ptr = (uint8_t *)pData + (stride * q);
+ uint8_t *src_ptr =
+ (uint8_t *)qpool->collect_map +
+ dzn_query_pool_get_result_offset(qpool, firstQuery + q);
+ uint64_t available = 0;
+
+ if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ ComPtr<ID3D12Fence> query_fence(NULL);
+ uint64_t query_fence_val = 0;
+
+ while (true) {
+ mtx_lock(&qpool->queries_lock);
+ query_fence = ComPtr<ID3D12Fence>(query->fence);
+ query_fence_val = query->fence_value;
+ mtx_unlock(&qpool->queries_lock);
+
+ if (query_fence.Get())
+ break;
+
+ /* Check again in 10ms.
+ * FIXME: decrease the polling period if it happens to hurt latency.
+ */
+ Sleep(10);
+ }
+
+ query_fence->SetEventOnCompletion(query_fence_val, NULL);
+ available = UINT64_MAX;
+ } else {
+ mtx_lock(&qpool->queries_lock);
+ ComPtr<ID3D12Fence> query_fence(query->fence);
+ uint64_t query_fence_val = query->fence_value;
+ mtx_unlock(&qpool->queries_lock);
+
+ if (query_fence.Get() &&
+ query_fence->GetCompletedValue() >= query_fence_val)
+ available = UINT64_MAX;
+ }
+
+ if (qpool->heap_type != D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
+ if (available)
+ memcpy(dst_ptr, src_ptr, step);
+ else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
+ memset(dst_ptr, 0, step);
+
+ dst_ptr += step;
+ } else {
+ for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
+ if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
+ continue;
+
+ if (available)
+ memcpy(dst_ptr, src_ptr + (c * sizeof(uint64_t)), step);
+ else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
+ memset(dst_ptr, 0, step);
+
+ dst_ptr += step;
+ }
+ }
+
+ if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
+ memcpy(dst_ptr, &available, step);
+
+ if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ result = VK_NOT_READY;
+ }
+
+ return result;
+}
diff --git a/src/microsoft/vulkan/dzn_sync.cpp b/src/microsoft/vulkan/dzn_sync.cpp
new file mode 100644
index 00000000000..583543f72d7
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_sync.cpp
@@ -0,0 +1,203 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include "vk_alloc.h"
+#include "vk_debug_report.h"
+#include "vk_util.h"
+
+#include "util/macros.h"
+#include "util/os_time.h"
+
+static VkResult
+dzn_sync_init(struct vk_device *device,
+ struct vk_sync *sync,
+ uint64_t initial_value)
+{
+ dzn_sync *dsync = container_of(sync, dzn_sync, vk);
+ dzn_device *ddev = container_of(device, dzn_device, vk);
+
+ assert(!(sync->flags & VK_SYNC_IS_SHAREABLE));
+
+ if (FAILED(ddev->dev->CreateFence(initial_value, D3D12_FENCE_FLAG_NONE,
+ IID_PPV_ARGS(&dsync->fence))))
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ return VK_SUCCESS;
+}
+
+static void
+dzn_sync_finish(struct vk_device *device,
+ struct vk_sync *sync)
+{
+ dzn_sync *dsync = container_of(sync, dzn_sync, vk);
+
+ dsync->fence->Release();
+}
+
+static VkResult
+dzn_sync_signal(struct vk_device *device,
+ struct vk_sync *sync,
+ uint64_t value)
+{
+ dzn_sync *dsync = container_of(sync, dzn_sync, vk);
+
+ if (!(sync->flags & VK_SYNC_IS_TIMELINE))
+ value = 1;
+
+ if (FAILED(dsync->fence->Signal(value)))
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_sync_get_value(struct vk_device *device,
+ struct vk_sync *sync,
+ uint64_t *value)
+{
+ dzn_sync *dsync = container_of(sync, dzn_sync, vk);
+
+ *value = dsync->fence->GetCompletedValue();
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_sync_reset(struct vk_device *device,
+ struct vk_sync *sync)
+{
+ dzn_sync *dsync = container_of(sync, dzn_sync, vk);
+
+ if (FAILED(dsync->fence->Signal(0)))
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_sync_move(struct vk_device *device,
+ struct vk_sync *dst,
+ struct vk_sync *src)
+{
+ dzn_device *ddev = container_of(device, dzn_device, vk);
+ dzn_sync *ddst = container_of(dst, dzn_sync, vk);
+ dzn_sync *dsrc = container_of(src, dzn_sync, vk);
+ ID3D12Fence *new_fence;
+
+ if (FAILED(ddev->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE,
+ IID_PPV_ARGS(&new_fence))))
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ ddst->fence->Release();
+ ddst->fence = dsrc->fence;
+ dsrc->fence = new_fence;
+ return VK_SUCCESS;
+}
+
+static VkResult
+dzn_sync_wait(struct vk_device *device,
+ uint32_t wait_count,
+ const struct vk_sync_wait *waits,
+ enum vk_sync_wait_flags wait_flags,
+ uint64_t abs_timeout_ns)
+{
+ dzn_device *ddev = container_of(device, dzn_device, vk);
+
+ HANDLE event = CreateEventA(NULL, FALSE, FALSE, NULL);
+ if (event == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ STACK_ARRAY(ID3D12Fence *, fences, wait_count);
+ STACK_ARRAY(uint64_t, values, wait_count);
+
+ for (uint32_t i = 0; i < wait_count; i++) {
+ dzn_sync *sync = container_of(waits[i].sync, dzn_sync, vk);
+
+ fences[i] = sync->fence;
+ values[i] = (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? waits[i].wait_value : 1;
+ }
+
+ D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags =
+ (wait_flags & VK_SYNC_WAIT_ANY) ?
+ D3D12_MULTIPLE_FENCE_WAIT_FLAG_ANY :
+ D3D12_MULTIPLE_FENCE_WAIT_FLAG_ALL;
+
+ if (FAILED(ddev->dev->SetEventOnMultipleFenceCompletion(fences, values,
+ wait_count, flags,
+ event))) {
+ STACK_ARRAY_FINISH(fences);
+ STACK_ARRAY_FINISH(values);
+ CloseHandle(event);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ DWORD timeout_ms;
+
+ if (abs_timeout_ns == OS_TIMEOUT_INFINITE) {
+ timeout_ms = INFINITE;
+ } else {
+ uint64_t cur_time = os_time_get_nano();
+ uint64_t rel_timeout_ns =
+ abs_timeout_ns > cur_time ? abs_timeout_ns - cur_time : 0;
+
+ timeout_ms = (rel_timeout_ns / 1000000) + (rel_timeout_ns % 1000000 ? 1 : 0);
+ }
+
+ DWORD res =
+ WaitForSingleObject(event, timeout_ms);
+
+ CloseHandle(event);
+
+ STACK_ARRAY_FINISH(fences);
+ STACK_ARRAY_FINISH(values);
+
+ if (res == WAIT_TIMEOUT)
+ return VK_TIMEOUT;
+ else if (res != WAIT_OBJECT_0)
+ return vk_error(device, VK_ERROR_UNKNOWN);
+
+ return VK_SUCCESS;
+}
+
+const struct vk_sync_type dzn_sync_type = {
+ .size = sizeof(dzn_sync),
+ .features = (enum vk_sync_features)
+ (VK_SYNC_FEATURE_BINARY |
+ VK_SYNC_FEATURE_TIMELINE |
+ VK_SYNC_FEATURE_GPU_WAIT |
+ VK_SYNC_FEATURE_GPU_MULTI_WAIT |
+ VK_SYNC_FEATURE_CPU_WAIT |
+ VK_SYNC_FEATURE_CPU_RESET |
+ VK_SYNC_FEATURE_CPU_SIGNAL |
+ VK_SYNC_FEATURE_WAIT_ANY |
+ VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL),
+
+ .init = dzn_sync_init,
+ .finish = dzn_sync_finish,
+ .signal = dzn_sync_signal,
+ .get_value = dzn_sync_get_value,
+ .reset = dzn_sync_reset,
+ .move = dzn_sync_move,
+ .wait_many = dzn_sync_wait,
+};
diff --git a/src/microsoft/vulkan/dzn_util.c b/src/microsoft/vulkan/dzn_util.c
new file mode 100644
index 00000000000..dd811396f91
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_util.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <directx/d3d12.h>
+#include <vulkan/vulkan.h>
+
+#include "util/format/u_format.h"
+
+static const DXGI_FORMAT formats[PIPE_FORMAT_COUNT] = {
+#define MAP_FORMAT_NORM(FMT) \
+ [PIPE_FORMAT_ ## FMT ## _UNORM] = DXGI_FORMAT_ ## FMT ## _UNORM, \
+ [PIPE_FORMAT_ ## FMT ## _SNORM] = DXGI_FORMAT_ ## FMT ## _SNORM,
+
+#define MAP_FORMAT_INT(FMT) \
+ [PIPE_FORMAT_ ## FMT ## _UINT] = DXGI_FORMAT_ ## FMT ## _UINT, \
+ [PIPE_FORMAT_ ## FMT ## _SINT] = DXGI_FORMAT_ ## FMT ## _SINT,
+
+#define MAP_FORMAT_SRGB(FMT) \
+ [PIPE_FORMAT_ ## FMT ## _SRGB] = DXGI_FORMAT_ ## FMT ## _UNORM_SRGB,
+
+#define MAP_FORMAT_FLOAT(FMT) \
+ [PIPE_FORMAT_ ## FMT ## _FLOAT] = DXGI_FORMAT_ ## FMT ## _FLOAT,
+
+#define MAP_EMU_FORMAT_NO_ALPHA(BITS, TYPE) \
+ [PIPE_FORMAT_L ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \
+ [PIPE_FORMAT_I ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \
+ [PIPE_FORMAT_L ## BITS ## A ## BITS ## _ ## TYPE] = \
+ DXGI_FORMAT_R ## BITS ## G ## BITS ## _ ## TYPE,
+
+#define MAP_EMU_FORMAT(BITS, TYPE) \
+ [PIPE_FORMAT_A ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \
+ MAP_EMU_FORMAT_NO_ALPHA(BITS, TYPE)
+
+ MAP_FORMAT_NORM(R8)
+ MAP_FORMAT_INT(R8)
+
+ MAP_FORMAT_NORM(R8G8)
+ MAP_FORMAT_INT(R8G8)
+
+ MAP_FORMAT_NORM(R8G8B8A8)
+ MAP_FORMAT_INT(R8G8B8A8)
+ MAP_FORMAT_SRGB(R8G8B8A8)
+
+ [PIPE_FORMAT_B8G8R8X8_UNORM] = DXGI_FORMAT_B8G8R8X8_UNORM,
+ [PIPE_FORMAT_B8G8R8A8_UNORM] = DXGI_FORMAT_B8G8R8A8_UNORM,
+ [PIPE_FORMAT_B4G4R4A4_UNORM] = DXGI_FORMAT_B4G4R4A4_UNORM,
+ [PIPE_FORMAT_A4R4G4B4_UNORM] = DXGI_FORMAT_B4G4R4A4_UNORM,
+ [PIPE_FORMAT_B5G6R5_UNORM] = DXGI_FORMAT_B5G6R5_UNORM,
+ [PIPE_FORMAT_B5G5R5A1_UNORM] = DXGI_FORMAT_B5G5R5A1_UNORM,
+
+ MAP_FORMAT_SRGB(B8G8R8A8)
+
+ MAP_FORMAT_INT(R32)
+ MAP_FORMAT_FLOAT(R32)
+ MAP_FORMAT_INT(R32G32)
+ MAP_FORMAT_FLOAT(R32G32)
+ MAP_FORMAT_INT(R32G32B32)
+ MAP_FORMAT_FLOAT(R32G32B32)
+ MAP_FORMAT_INT(R32G32B32A32)
+ MAP_FORMAT_FLOAT(R32G32B32A32)
+
+ MAP_FORMAT_NORM(R16)
+ MAP_FORMAT_INT(R16)
+ MAP_FORMAT_FLOAT(R16)
+
+ MAP_FORMAT_NORM(R16G16)
+ MAP_FORMAT_INT(R16G16)
+ MAP_FORMAT_FLOAT(R16G16)
+
+ MAP_FORMAT_NORM(R16G16B16A16)
+ MAP_FORMAT_INT(R16G16B16A16)
+ MAP_FORMAT_FLOAT(R16G16B16A16)
+
+ [PIPE_FORMAT_A8_UNORM] = DXGI_FORMAT_A8_UNORM,
+ MAP_EMU_FORMAT_NO_ALPHA(8, UNORM)
+ MAP_EMU_FORMAT(8, SNORM)
+ MAP_EMU_FORMAT(8, SINT)
+ MAP_EMU_FORMAT(8, UINT)
+ MAP_EMU_FORMAT(16, UNORM)
+ MAP_EMU_FORMAT(16, SNORM)
+ MAP_EMU_FORMAT(16, SINT)
+ MAP_EMU_FORMAT(16, UINT)
+ MAP_EMU_FORMAT(16, FLOAT)
+ MAP_EMU_FORMAT(32, SINT)
+ MAP_EMU_FORMAT(32, UINT)
+ MAP_EMU_FORMAT(32, FLOAT)
+
+ [PIPE_FORMAT_R9G9B9E5_FLOAT] = DXGI_FORMAT_R9G9B9E5_SHAREDEXP,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = DXGI_FORMAT_R11G11B10_FLOAT,
+ [PIPE_FORMAT_R10G10B10A2_UINT] = DXGI_FORMAT_R10G10B10A2_UINT,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = DXGI_FORMAT_R10G10B10A2_UNORM,
+
+ [PIPE_FORMAT_DXT1_RGB] = DXGI_FORMAT_BC1_UNORM,
+ [PIPE_FORMAT_DXT1_RGBA] = DXGI_FORMAT_BC1_UNORM,
+ [PIPE_FORMAT_DXT3_RGBA] = DXGI_FORMAT_BC2_UNORM,
+ [PIPE_FORMAT_DXT5_RGBA] = DXGI_FORMAT_BC3_UNORM,
+
+ [PIPE_FORMAT_DXT1_SRGB] = DXGI_FORMAT_BC1_UNORM_SRGB,
+ [PIPE_FORMAT_DXT1_SRGBA] = DXGI_FORMAT_BC1_UNORM_SRGB,
+ [PIPE_FORMAT_DXT3_SRGBA] = DXGI_FORMAT_BC2_UNORM_SRGB,
+ [PIPE_FORMAT_DXT5_SRGBA] = DXGI_FORMAT_BC3_UNORM_SRGB,
+
+ [PIPE_FORMAT_RGTC1_UNORM] = DXGI_FORMAT_BC4_UNORM,
+ [PIPE_FORMAT_RGTC1_SNORM] = DXGI_FORMAT_BC4_SNORM,
+ [PIPE_FORMAT_RGTC2_UNORM] = DXGI_FORMAT_BC5_UNORM,
+ [PIPE_FORMAT_RGTC2_SNORM] = DXGI_FORMAT_BC5_SNORM,
+
+ [PIPE_FORMAT_BPTC_RGB_UFLOAT] = DXGI_FORMAT_BC6H_UF16,
+ [PIPE_FORMAT_BPTC_RGB_FLOAT] = DXGI_FORMAT_BC6H_SF16,
+ [PIPE_FORMAT_BPTC_RGBA_UNORM] = DXGI_FORMAT_BC7_UNORM,
+ [PIPE_FORMAT_BPTC_SRGBA] = DXGI_FORMAT_BC7_UNORM_SRGB,
+
+ [PIPE_FORMAT_Z32_FLOAT] = DXGI_FORMAT_R32_TYPELESS,
+ [PIPE_FORMAT_Z16_UNORM] = DXGI_FORMAT_R16_TYPELESS,
+ [PIPE_FORMAT_Z24X8_UNORM] = DXGI_FORMAT_R24G8_TYPELESS,
+ [PIPE_FORMAT_X24S8_UINT] = DXGI_FORMAT_R24G8_TYPELESS,
+
+ [PIPE_FORMAT_Z24_UNORM_S8_UINT] = DXGI_FORMAT_R24G8_TYPELESS,
+ [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = DXGI_FORMAT_R32G8X24_TYPELESS,
+ [PIPE_FORMAT_X32_S8X24_UINT] = DXGI_FORMAT_R32G8X24_TYPELESS,
+};
+
+DXGI_FORMAT
+dzn_pipe_to_dxgi_format(enum pipe_format in)
+{
+ return formats[in];
+}
+
+struct dzn_sampler_filter_info {
+ VkFilter min, mag;
+ VkSamplerMipmapMode mipmap;
+};
+
+#define FILTER(__min, __mag, __mipmap) \
+{ \
+ .min = VK_FILTER_ ## __min, \
+ .mag = VK_FILTER_ ## __mag, \
+ .mipmap = VK_SAMPLER_MIPMAP_MODE_ ## __mipmap, \
+}
+
+static const struct dzn_sampler_filter_info filter_table[] = {
+ [D3D12_FILTER_MIN_MAG_MIP_POINT] = FILTER(NEAREST, NEAREST, NEAREST),
+ [D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR] = FILTER(NEAREST, NEAREST, LINEAR),
+ [D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT] = FILTER(NEAREST, LINEAR, NEAREST),
+ [D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR] = FILTER(NEAREST, LINEAR, LINEAR),
+ [D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT] = FILTER(LINEAR, NEAREST, NEAREST),
+ [D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR] = FILTER(LINEAR, NEAREST, LINEAR),
+ [D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT] = FILTER(LINEAR, LINEAR, NEAREST),
+ [D3D12_FILTER_MIN_MAG_MIP_LINEAR] = FILTER(LINEAR, LINEAR, LINEAR),
+};
+
+dzn_translate_sampler_filter(const VkSamplerCreateInfo *create_info)
+{
+ D3D12_FILTER filter;
+
+ if (!create_info->anisotropyEnable) {
+ unsigned i;
+ for (i = 0; i < ARRAY_SIZE(filter_table); i++) {
+ if (create_info->minFilter == filter_table[i].min &&
+ create_info->magFilter == filter_table[i].mag &&
+ create_info->mipmapMode == filter_table[i].mipmap) {
+ filter = (D3D12_FILTER)i;
+ break;
+ }
+ }
+
+ assert(i < ARRAY_SIZE(filter_table));
+ } else {
+ filter = D3D12_FILTER_ANISOTROPIC;
+ }
+
+ if (create_info->compareEnable)
+ filter = (D3D12_FILTER)(filter + D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT);
+
+ return filter;
+}
+
+D3D12_COMPARISON_FUNC
+dzn_translate_compare_op(VkCompareOp in)
+{
+ switch (in) {
+ case VK_COMPARE_OP_NEVER: return D3D12_COMPARISON_FUNC_NEVER;
+ case VK_COMPARE_OP_LESS: return D3D12_COMPARISON_FUNC_LESS;
+ case VK_COMPARE_OP_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL;
+ case VK_COMPARE_OP_LESS_OR_EQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL;
+ case VK_COMPARE_OP_GREATER: return D3D12_COMPARISON_FUNC_GREATER;
+ case VK_COMPARE_OP_NOT_EQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL;
+ case VK_COMPARE_OP_GREATER_OR_EQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
+ case VK_COMPARE_OP_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS;
+ default: unreachable("Invalid compare op");
+ }
+}
+
+void
+dzn_translate_viewport(D3D12_VIEWPORT *out,
+ const VkViewport *in)
+{
+ out->TopLeftX = in->x;
+ out->TopLeftY = in->y;
+ out->Width = in->width;
+ out->Height = abs(in->height);
+ out->MinDepth = MIN2(in->minDepth, in->maxDepth);
+ out->MaxDepth = MAX2(in->maxDepth, in->minDepth);
+}
+
+void
+dzn_translate_rect(D3D12_RECT *out,
+ const VkRect2D *in)
+{
+ out->left = in->offset.x;
+ out->top = in->offset.y;
+ out->right = in->offset.x + in->extent.width;
+ out->bottom = in->offset.y + in->extent.height;
+}
diff --git a/src/microsoft/vulkan/dzn_util.cpp b/src/microsoft/vulkan/dzn_util.cpp
new file mode 100644
index 00000000000..9c1740d8b97
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_util.cpp
@@ -0,0 +1,226 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+
+#include "vk_enum_to_str.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include <directx/d3d12sdklayers.h>
+
+IDXGIFactory4 *
+dxgi_get_factory(bool debug)
+{
+ static const GUID IID_IDXGIFactory4 = {
+ 0x1bc6ea02, 0xef36, 0x464f,
+ { 0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a }
+ };
+
+ HMODULE dxgi_mod = LoadLibraryA("DXGI.DLL");
+ if (!dxgi_mod) {
+ mesa_loge("failed to load DXGI.DLL\n");
+ return NULL;
+ }
+
+ typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY2)(UINT flags, REFIID riid, void **ppFactory);
+ PFN_CREATE_DXGI_FACTORY2 CreateDXGIFactory2;
+
+ CreateDXGIFactory2 = (PFN_CREATE_DXGI_FACTORY2)GetProcAddress(dxgi_mod, "CreateDXGIFactory2");
+ if (!CreateDXGIFactory2) {
+ mesa_loge("failed to load CreateDXGIFactory2 from DXGI.DLL\n");
+ return NULL;
+ }
+
+ UINT flags = 0;
+ if (debug)
+ flags |= DXGI_CREATE_FACTORY_DEBUG;
+
+ IDXGIFactory4 *factory;
+ HRESULT hr = CreateDXGIFactory2(flags, IID_IDXGIFactory4, (void **)&factory);
+ if (FAILED(hr)) {
+ mesa_loge("CreateDXGIFactory2 failed: %08x\n", hr);
+ return NULL;
+ }
+
+ return factory;
+}
+
+static ComPtr<ID3D12Debug>
+get_debug_interface()
+{
+ typedef HRESULT(WINAPI *PFN_D3D12_GET_DEBUG_INTERFACE)(REFIID riid, void **ppFactory);
+ PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface;
+
+ HMODULE d3d12_mod = LoadLibraryA("D3D12.DLL");
+ if (!d3d12_mod) {
+ mesa_loge("failed to load D3D12.DLL\n");
+ return NULL;
+ }
+
+ D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(d3d12_mod, "D3D12GetDebugInterface");
+ if (!D3D12GetDebugInterface) {
+ mesa_loge("failed to load D3D12GetDebugInterface from D3D12.DLL\n");
+ return NULL;
+ }
+
+ ComPtr<ID3D12Debug> debug;
+ if (FAILED(D3D12GetDebugInterface(IID_PPV_ARGS(&debug)))) {
+ mesa_loge("D3D12GetDebugInterface failed\n");
+ return NULL;
+ }
+
+ return debug;
+}
+
+void
+d3d12_enable_debug_layer()
+{
+ ComPtr<ID3D12Debug> debug = get_debug_interface();
+ if (debug)
+ debug->EnableDebugLayer();
+}
+
+void
+d3d12_enable_gpu_validation()
+{
+ ComPtr<ID3D12Debug> debug = get_debug_interface();
+ ComPtr<ID3D12Debug3> debug3;
+ if (debug &&
+ SUCCEEDED(debug->QueryInterface(IID_PPV_ARGS(&debug3))))
+ debug3->SetEnableGPUBasedValidation(true);
+}
+
+ID3D12Device1 *
+d3d12_create_device(IUnknown *adapter, bool experimental_features)
+{
+ typedef HRESULT(WINAPI *PFN_D3D12CREATEDEVICE)(IUnknown*, D3D_FEATURE_LEVEL, REFIID, void**);
+ PFN_D3D12CREATEDEVICE D3D12CreateDevice;
+
+ HMODULE d3d12_mod = LoadLibraryA("D3D12.DLL");
+ if (!d3d12_mod) {
+ mesa_loge("failed to load D3D12.DLL\n");
+ return NULL;
+ }
+
+#ifdef _WIN32
+ if (experimental_features)
+#endif
+ {
+ typedef HRESULT(WINAPI *PFN_D3D12ENABLEEXPERIMENTALFEATURES)(UINT, const IID*, void*, UINT*);
+ PFN_D3D12ENABLEEXPERIMENTALFEATURES D3D12EnableExperimentalFeatures =
+ (PFN_D3D12ENABLEEXPERIMENTALFEATURES)GetProcAddress(d3d12_mod, "D3D12EnableExperimentalFeatures");
+ if (FAILED(D3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModels, NULL, NULL))) {
+ mesa_loge("failed to enable experimental shader models\n");
+ return nullptr;
+ }
+ }
+
+ D3D12CreateDevice = (PFN_D3D12CREATEDEVICE)GetProcAddress(d3d12_mod, "D3D12CreateDevice");
+ if (!D3D12CreateDevice) {
+ mesa_loge("failed to load D3D12CreateDevice from D3D12.DLL\n");
+ return NULL;
+ }
+
+ ID3D12Device1 *dev;
+ if (SUCCEEDED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0,
+ IID_PPV_ARGS(&dev))))
+ return dev;
+
+ mesa_loge("D3D12CreateDevice failed\n");
+ return NULL;
+}
+
+IDxcValidator *
+dxil_get_validator(void)
+{
+ IDxcValidator *ret = NULL;
+
+ HMODULE dxil_mod = LoadLibraryA("dxil.dll");
+ if (!dxil_mod) {
+ mesa_loge("failed to load dxil.dll\n");
+ return ret;
+ }
+
+ DxcCreateInstanceProc CreateInstance = (DxcCreateInstanceProc)
+ GetProcAddress(dxil_mod, "DxcCreateInstance");
+ HRESULT hr = CreateInstance(CLSID_DxcValidator, IID_PPV_ARGS(&ret));
+ if (FAILED(hr))
+ mesa_loge("DxcCreateInstance failed: %08x\n", hr);
+
+ return ret;
+}
+
+PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE
+d3d12_get_serialize_root_sig(void)
+{
+ HMODULE d3d12_mod = LoadLibraryA("d3d12.dll");
+ if (!d3d12_mod) {
+ mesa_loge("failed to load d3d12.dll\n");
+ return NULL;
+ }
+
+ return (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)
+ GetProcAddress(d3d12_mod, "D3D12SerializeVersionedRootSignature");
+}
+
+IDxcLibrary *
+dxc_get_library(void)
+{
+ IDxcLibrary *ret = NULL;
+
+ HMODULE dxil_mod = LoadLibraryA("dxcompiler.dll");
+ if (!dxil_mod) {
+ mesa_loge("failed to load dxcompiler.dll\n");
+ return ret;
+ }
+
+ DxcCreateInstanceProc CreateInstance = (DxcCreateInstanceProc)
+ GetProcAddress(dxil_mod, "DxcCreateInstance");
+ HRESULT hr = CreateInstance(CLSID_DxcLibrary, IID_PPV_ARGS(&ret));
+ if (FAILED(hr))
+ mesa_loge("DxcCreateInstance failed: %08x\n", hr);
+
+ return ret;
+}
+
+IDxcCompiler *
+dxc_get_compiler(void)
+{
+ IDxcCompiler *ret = NULL;
+
+ HMODULE dxil_mod = LoadLibraryA("dxcompiler.dll");
+ if (!dxil_mod) {
+ mesa_loge("failed to load dxcompiler.dll\n");
+ return ret;
+ }
+
+ DxcCreateInstanceProc CreateInstance = (DxcCreateInstanceProc)
+ GetProcAddress(dxil_mod, "DxcCreateInstance");
+ HRESULT hr = CreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&ret));
+ if (FAILED(hr))
+ mesa_loge("DxcCreateInstance failed: %08x\n", hr);
+
+ return ret;
+}
diff --git a/src/microsoft/vulkan/dzn_wsi.cpp b/src/microsoft/vulkan/dzn_wsi.cpp
new file mode 100644
index 00000000000..1cb95ef0a00
--- /dev/null
+++ b/src/microsoft/vulkan/dzn_wsi.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "dzn_private.h"
+#include "vk_util.h"
+
+static PFN_vkVoidFunction VKAPI_PTR
+dzn_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
+{
+ VK_FROM_HANDLE(dzn_physical_device, pdevice, physicalDevice);
+ return vk_instance_get_proc_addr_unchecked(pdevice->vk.instance, pName);
+}
+
+void
+dzn_wsi_finish(struct dzn_physical_device *physical_device)
+{
+ wsi_device_finish(&physical_device->wsi_device,
+ &physical_device->vk.instance->alloc);
+}
+
+VkResult
+dzn_wsi_init(struct dzn_physical_device *physical_device)
+{
+ VkResult result;
+
+ /* TODO: implement a proper, non-sw winsys for D3D12 */
+ bool sw_device = true;
+
+ result = wsi_device_init(&physical_device->wsi_device,
+ dzn_physical_device_to_handle(physical_device),
+ dzn_wsi_proc_addr,
+ &physical_device->vk.instance->alloc,
+ -1, NULL, sw_device);
+
+ if (result != VK_SUCCESS)
+ return result;
+
+ physical_device->wsi_device.supports_modifiers = false;
+ physical_device->vk.wsi_device = &physical_device->wsi_device;
+ physical_device->wsi_device.signal_semaphore_with_memory = true;
+ physical_device->wsi_device.signal_fence_with_memory = true;
+
+ return VK_SUCCESS;
+}
diff --git a/src/microsoft/vulkan/meson.build b/src/microsoft/vulkan/meson.build
new file mode 100644
index 00000000000..fb859d97a9d
--- /dev/null
+++ b/src/microsoft/vulkan/meson.build
@@ -0,0 +1,123 @@
+# Copyright © Microsoft Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+dzn_entrypoints = custom_target(
+ 'dzn_entrypoints',
+ input : [vk_entrypoints_gen, vk_api_xml],
+ output : ['dzn_entrypoints.h', 'dzn_entrypoints.c'],
+ command : [
+ prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
+ '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'dzn'
+ ],
+ depend_files : vk_entrypoints_gen_depend_files,
+)
+
+libdzn_files = files(
+ 'dzn_cmd_buffer.cpp',
+ 'dzn_descriptor_set.cpp',
+ 'dzn_device.cpp',
+ 'dzn_image.cpp',
+ 'dzn_meta.cpp',
+ 'dzn_nir.c',
+ 'dzn_pass.cpp',
+ 'dzn_pipeline_cache.cpp',
+ 'dzn_pipeline.cpp',
+ 'dzn_query.cpp',
+ 'dzn_sync.cpp',
+ 'dzn_util.cpp',
+ 'dzn_util.c',
+ 'dzn_wsi.cpp',
+)
+
+dzn_deps = [
+ idep_libdxil_compiler,
+ idep_libspirv_to_dxil,
+ idep_nir,
+ idep_nir_headers,
+ idep_vulkan_util,
+ idep_vulkan_runtime,
+ idep_vulkan_wsi,
+ dep_dxheaders,
+]
+
+dzn_flags = [ ]
+
+if with_platform_windows
+ dzn_flags += '-DVK_USE_PLATFORM_WIN32_KHR'
+endif
+
+libvulkan_dzn = shared_library(
+ 'vulkan_dzn',
+ [libdzn_files, dzn_entrypoints, sha1_h],
+ vs_module_defs : 'vulkan_dzn.def',
+ include_directories : [
+ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux,
+ inc_compiler, inc_util
+ ],
+ dependencies : [dzn_deps, idep_vulkan_wsi],
+ c_args : dzn_flags,
+ cpp_args : dzn_flags,
+ gnu_symbol_visibility : 'hidden',
+ link_args : [ld_args_bsymbolic, ld_args_gc_sections],
+ name_prefix : host_machine.system() == 'windows' ? '' : 'lib',
+ install : true,
+ override_options: ['cpp_std=c++latest']
+)
+
+icd_file_name = 'libvulkan_dzn.so'
+module_dir = join_paths(get_option('prefix'), get_option('libdir'))
+if with_platform_windows
+ module_dir = join_paths(get_option('prefix'), get_option('bindir'))
+ icd_file_name = 'vulkan_dzn.dll'
+endif
+
+dzn_icd = custom_target(
+ 'dzn_icd',
+ input : [vk_icd_gen, vk_api_xml],
+ output : 'dzn_icd.@0@.json'.format(host_machine.cpu()),
+ command : [
+ prog_python, '@INPUT0@',
+ '--api-version', '1.2', '--xml', '@INPUT1@',
+ '--lib-path', join_paths(module_dir, icd_file_name),
+ '--out', '@OUTPUT@',
+ ],
+ build_by_default : true,
+ install_dir : with_vulkan_icd_dir,
+ install : true,
+)
+
+if meson.version().version_compare('>= 0.58')
+ _dev_icdname = 'dzn_devenv_icd.@0@.json'.format(host_machine.cpu())
+ custom_target(
+ 'dzn_devenv_icd',
+ input : [vk_icd_gen, vk_api_xml],
+ output : _dev_icdname,
+ command : [
+ prog_python, '@INPUT0@',
+ '--api-version', '1.1', '--xml', '@INPUT1@',
+ '--lib-path', meson.current_build_dir() / icd_file_name,
+ '--out', '@OUTPUT@',
+ ],
+ build_by_default : true,
+ )
+
+ devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname)
+endif
diff --git a/src/microsoft/vulkan/vulkan_dzn.def b/src/microsoft/vulkan/vulkan_dzn.def
new file mode 100644
index 00000000000..64a9caae593
--- /dev/null
+++ b/src/microsoft/vulkan/vulkan_dzn.def
@@ -0,0 +1,4 @@
+EXPORTS
+vk_icdNegotiateLoaderICDInterfaceVersion
+vk_icdGetInstanceProcAddr
+vk_icdGetPhysicalDeviceProcAddr