diff options
author | Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> | 2020-11-10 13:47:35 +0100 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2020-11-19 12:44:40 +0000 |
commit | 2be8cebd0b062aa91a6198ba8ae99cc8749b2e8d (patch) | |
tree | 4dac152007b7dfa7cd073c7f15bb2f9a3ebcfcf3 | |
parent | 111a1b2e1c073b34917d4720dca11fb1ba7e6b46 (diff) |
amdgpu_bo: make cache_entry a extensible array
Improves performance in SPECviewperf13 snx.
e.g.: test10 fps evolution: 270 -> 280.
"pahole radeonsi_dri.so -C amdgpu_winsys_bo" after:
struct amdgpu_winsys_bo {
struct pb_buffer base; /* 0 32 */
union {
struct {
amdgpu_va_handle va_handle; /* 32 8 */
uint32_t kms_handle; /* 40 4 */
int map_count; /* 44 4 */
} real; /* 32 16 */
[...]
} u; /* 32 40 */
/* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
[...]
struct pb_cache_entry cache_entry[]; /* 144 0 */
/* size: 144, cachelines: 3, members: 17 */
};
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7532>
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 18 | ||||
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_bo.h | 8 |
2 files changed, 15 insertions, 11 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index d17606eef73..1d037b96874 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -230,8 +230,8 @@ static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf) assert(bo->bo); /* slab buffers have a separate vtbl */ - if (bo->u.real.use_reusable_pool) - pb_cache_add_buffer(&bo->u.real.cache_entry); + if (bo->use_reusable_pool) + pb_cache_add_buffer(bo->cache_entry); else amdgpu_bo_destroy(_buf); } @@ -476,6 +476,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, struct amdgpu_winsys_bo *bo; amdgpu_va_handle va_handle = NULL; int r; + bool init_pb_cache; /* VRAM or GTT must be specified, but not both at the same time. */ assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT | @@ -484,13 +485,17 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, alignment = amdgpu_get_optimal_alignment(ws, size, alignment); - bo = CALLOC_STRUCT(amdgpu_winsys_bo); + init_pb_cache = heap >= 0 && (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING); + + bo = CALLOC(1, sizeof(struct amdgpu_winsys_bo) + + init_pb_cache * sizeof(struct pb_cache_entry)); if (!bo) { return NULL; } - if (heap >= 0) { - pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base, + if (init_pb_cache) { + bo->use_reusable_pool = true; + pb_cache_init_entry(&ws->bo_cache, bo->cache_entry, &bo->base, heap); } request.alloc_size = size; @@ -1378,7 +1383,6 @@ no_slab: return NULL; } - bo->u.real.use_reusable_pool = use_reusable_pool; return &bo->base; } @@ -1533,7 +1537,7 @@ static bool amdgpu_bo_get_handle(struct radeon_winsys *rws, if (!bo->bo) return false; - bo->u.real.use_reusable_pool = false; + bo->use_reusable_pool = false; switch (whandle->type) { case WINSYS_HANDLE_TYPE_SHARED: diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h index 2190794102c..d33c141e90f 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h @@ -59,15 +59,12 @@ struct amdgpu_winsys_bo { struct pb_buffer base; union { struct { - struct pb_cache_entry cache_entry; - amdgpu_va_handle va_handle; - int map_count; - bool use_reusable_pool; #if DEBUG struct list_head global_list_item; #endif uint32_t kms_handle; + int map_count; } real; struct { struct pb_slab_entry entry; @@ -91,6 +88,7 @@ struct amdgpu_winsys_bo { amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */ bool is_user_ptr; + bool use_reusable_pool; uint32_t unique_id; uint64_t va; simple_mtx_t lock; @@ -111,6 +109,8 @@ struct amdgpu_winsys_bo { unsigned num_fences; unsigned max_fences; struct pipe_fence_handle **fences; + + struct pb_cache_entry cache_entry[]; }; struct amdgpu_slab { |