From 7bef19201822ab2aebfd244142ff3a23535019a7 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 9 Jan 2019 14:40:00 +0100 Subject: radv: add support for VK_EXT_memory_budget A simple Vulkan extension that allows apps to query size and usage of all exposed memory heaps. The different usage values are not really accurate because they are per drm-fd, but they should be close enough. Signed-off-by: Samuel Pitoiset Reviewed-by: Alex Smith Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_device.c | 72 +++++++++++++++++++++++ src/amd/vulkan/radv_extensions.py | 1 + src/amd/vulkan/radv_radeon_winsys.h | 4 ++ src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 38 +++++++++++- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 6 ++ src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h | 4 ++ 6 files changed, 124 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 6b5fd10c83b..6cadbe722ae 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1350,12 +1350,84 @@ void radv_GetPhysicalDeviceMemoryProperties( *pMemoryProperties = physical_device->memory_properties; } +static void +radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget) +{ + RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); + VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties; + uint64_t visible_vram_size = radv_get_visible_vram_size(device); + uint64_t vram_size = radv_get_vram_size(device); + uint64_t gtt_size = device->rad_info.gart_size; + uint64_t heap_budget, heap_usage; + + /* For all memory heaps, the computation of budget is as follow: + * heap_budget = heap_size - global_heap_usage + app_heap_usage + * + * The Vulkan spec 1.1.97 says that the budget should include any + * currently allocated device memory. + * + * Note that the application heap usages are not really accurate (eg. + * in presence of shared buffers). + */ + if (vram_size) { + heap_usage = device->ws->query_value(device->ws, + RADEON_ALLOCATED_VRAM); + + heap_budget = vram_size - + device->ws->query_value(device->ws, RADEON_VRAM_USAGE) + + heap_usage; + + memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = heap_budget; + memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] = heap_usage; + } + + if (visible_vram_size) { + heap_usage = device->ws->query_value(device->ws, + RADEON_ALLOCATED_VRAM_VIS); + + heap_budget = visible_vram_size - + device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + + heap_usage; + + memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_budget; + memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_usage; + } + + if (gtt_size) { + heap_usage = device->ws->query_value(device->ws, + RADEON_ALLOCATED_GTT); + + heap_budget = gtt_size - + device->ws->query_value(device->ws, RADEON_GTT_USAGE) + + heap_usage; + + memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = heap_budget; + memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] = heap_usage; + } + + /* The heapBudget and heapUsage values must be zero for array elements + * greater than or equal to + * VkPhysicalDeviceMemoryProperties::memoryHeapCount. + */ + for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) { + memoryBudget->heapBudget[i] = 0; + memoryBudget->heapUsage[i] = 0; + } +} + void radv_GetPhysicalDeviceMemoryProperties2( VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) { radv_GetPhysicalDeviceMemoryProperties(physicalDevice, &pMemoryProperties->memoryProperties); + + VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget = + vk_find_struct(pMemoryProperties->pNext, + PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT); + if (memory_budget) + radv_get_memory_budget_properties(physicalDevice, memory_budget); } VkResult radv_GetMemoryHostPointerPropertiesEXT( diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 9952bb9c1c6..491ed9d94c3 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -105,6 +105,7 @@ EXTENSIONS = [ Extension('VK_EXT_external_memory_dma_buf', 1, True), Extension('VK_EXT_external_memory_host', 1, 'device->rad_info.has_userptr'), Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'), + Extension('VK_EXT_memory_budget', 1, True), Extension('VK_EXT_pci_bus_info', 2, True), Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'), Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= CIK'), diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index e9d541ab150..d9b46d89cf3 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -84,6 +84,9 @@ enum radeon_ctx_priority { }; enum radeon_value_id { + RADEON_ALLOCATED_VRAM, + RADEON_ALLOCATED_VRAM_VIS, + RADEON_ALLOCATED_GTT, RADEON_TIMESTAMP, RADEON_NUM_BYTES_MOVED, RADEON_NUM_EVICTIONS, @@ -164,6 +167,7 @@ struct radeon_winsys_fence; struct radeon_winsys_bo { uint64_t va; bool is_local; + bool vram_cpu_access; }; struct radv_winsys_sem_counts { uint32_t syncobj_count; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index a9bd55eac8f..7194d5a3236 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -249,6 +249,7 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent, static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) { struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + struct radv_amdgpu_winsys *ws = bo->ws; if (p_atomic_dec_return(&bo->ref_count)) return; @@ -269,6 +270,17 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) 0, AMDGPU_VA_OP_UNMAP); amdgpu_bo_free(bo->bo); } + + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + p_atomic_add(&ws->allocated_vram, + -align64(bo->size, ws->info.gart_page_size)); + if (bo->base.vram_cpu_access) + p_atomic_add(&ws->allocated_vram_vis, + -align64(bo->size, ws->info.gart_page_size)); + if (bo->initial_domain & RADEON_DOMAIN_GTT) + p_atomic_add(&ws->allocated_gtt, + -align64(bo->size, ws->info.gart_page_size)); + amdgpu_va_range_free(bo->va_handle); FREE(bo); } @@ -344,8 +356,10 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, if (initial_domain & RADEON_DOMAIN_GTT) request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; - if (flags & RADEON_FLAG_CPU_ACCESS) + if (flags & RADEON_FLAG_CPU_ACCESS) { + bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM; request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + } if (flags & RADEON_FLAG_NO_CPU_ACCESS) request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (flags & RADEON_FLAG_GTT_WC) @@ -378,6 +392,17 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, bo->bo = buf_handle; bo->initial_domain = initial_domain; bo->is_shared = false; + + if (initial_domain & RADEON_DOMAIN_VRAM) + p_atomic_add(&ws->allocated_vram, + align64(bo->size, ws->info.gart_page_size)); + if (bo->base.vram_cpu_access) + p_atomic_add(&ws->allocated_vram_vis, + align64(bo->size, ws->info.gart_page_size)); + if (initial_domain & RADEON_DOMAIN_GTT) + p_atomic_add(&ws->allocated_gtt, + align64(bo->size, ws->info.gart_page_size)); + radv_amdgpu_add_buffer_to_global_list(bo); return (struct radeon_winsys_bo *)bo; error_va_map: @@ -474,6 +499,9 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, bo->bo = buf_handle; bo->initial_domain = RADEON_DOMAIN_GTT; + p_atomic_add(&ws->allocated_gtt, + align64(bo->size, ws->info.gart_page_size)); + radv_amdgpu_add_buffer_to_global_list(bo); return (struct radeon_winsys_bo *)bo; @@ -538,6 +566,14 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, bo->is_shared = true; bo->ws = ws; bo->ref_count = 1; + + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + p_atomic_add(&ws->allocated_vram, + align64(bo->size, ws->info.gart_page_size)); + if (bo->initial_domain & RADEON_DOMAIN_GTT) + p_atomic_add(&ws->allocated_gtt, + align64(bo->size, ws->info.gart_page_size)); + radv_amdgpu_add_buffer_to_global_list(bo); return (struct radeon_winsys_bo *)bo; error_va_map: diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index 9706c04e8cd..d3a57f6b4f3 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -72,6 +72,12 @@ static uint64_t radv_amdgpu_winsys_query_value(struct radeon_winsys *rws, uint64_t retval = 0; switch (value) { + case RADEON_ALLOCATED_VRAM: + return ws->allocated_vram; + case RADEON_ALLOCATED_VRAM_VIS: + return ws->allocated_vram_vis; + case RADEON_ALLOCATED_GTT: + return ws->allocated_gtt; case RADEON_TIMESTAMP: amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval); return retval; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h index 80a1c6f2926..edec0a1ed78 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h @@ -52,6 +52,10 @@ struct radv_amdgpu_winsys { pthread_mutex_t global_bo_list_lock; struct list_head global_bo_list; + + uint64_t allocated_vram; + uint64_t allocated_vram_vis; + uint64_t allocated_gtt; }; static inline struct radv_amdgpu_winsys * -- cgit v1.2.3