summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>2020-11-10 13:47:35 +0100
committerMarge Bot <eric+marge@anholt.net>2020-11-19 12:44:40 +0000
commit2be8cebd0b062aa91a6198ba8ae99cc8749b2e8d (patch)
tree4dac152007b7dfa7cd073c7f15bb2f9a3ebcfcf3
parent111a1b2e1c073b34917d4720dca11fb1ba7e6b46 (diff)
amdgpu_bo: make cache_entry a extensible array
Improves performance in SPECviewperf13 snx. e.g.: test10 fps evolution: 270 -> 280. "pahole radeonsi_dri.so -C amdgpu_winsys_bo" after: struct amdgpu_winsys_bo { struct pb_buffer base; /* 0 32 */ union { struct { amdgpu_va_handle va_handle; /* 32 8 */ uint32_t kms_handle; /* 40 4 */ int map_count; /* 44 4 */ } real; /* 32 16 */ [...] } u; /* 32 40 */ /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */ [...] struct pb_cache_entry cache_entry[]; /* 144 0 */ /* size: 144, cachelines: 3, members: 17 */ }; Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7532>
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.c18
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.h8
2 files changed, 15 insertions, 11 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index d17606eef73..1d037b96874 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -230,8 +230,8 @@ static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
assert(bo->bo); /* slab buffers have a separate vtbl */
- if (bo->u.real.use_reusable_pool)
- pb_cache_add_buffer(&bo->u.real.cache_entry);
+ if (bo->use_reusable_pool)
+ pb_cache_add_buffer(bo->cache_entry);
else
amdgpu_bo_destroy(_buf);
}
@@ -476,6 +476,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
struct amdgpu_winsys_bo *bo;
amdgpu_va_handle va_handle = NULL;
int r;
+ bool init_pb_cache;
/* VRAM or GTT must be specified, but not both at the same time. */
assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT |
@@ -484,13 +485,17 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
alignment = amdgpu_get_optimal_alignment(ws, size, alignment);
- bo = CALLOC_STRUCT(amdgpu_winsys_bo);
+ init_pb_cache = heap >= 0 && (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING);
+
+ bo = CALLOC(1, sizeof(struct amdgpu_winsys_bo) +
+ init_pb_cache * sizeof(struct pb_cache_entry));
if (!bo) {
return NULL;
}
- if (heap >= 0) {
- pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
+ if (init_pb_cache) {
+ bo->use_reusable_pool = true;
+ pb_cache_init_entry(&ws->bo_cache, bo->cache_entry, &bo->base,
heap);
}
request.alloc_size = size;
@@ -1378,7 +1383,6 @@ no_slab:
return NULL;
}
- bo->u.real.use_reusable_pool = use_reusable_pool;
return &bo->base;
}
@@ -1533,7 +1537,7 @@ static bool amdgpu_bo_get_handle(struct radeon_winsys *rws,
if (!bo->bo)
return false;
- bo->u.real.use_reusable_pool = false;
+ bo->use_reusable_pool = false;
switch (whandle->type) {
case WINSYS_HANDLE_TYPE_SHARED:
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 2190794102c..d33c141e90f 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -59,15 +59,12 @@ struct amdgpu_winsys_bo {
struct pb_buffer base;
union {
struct {
- struct pb_cache_entry cache_entry;
-
amdgpu_va_handle va_handle;
- int map_count;
- bool use_reusable_pool;
#if DEBUG
struct list_head global_list_item;
#endif
uint32_t kms_handle;
+ int map_count;
} real;
struct {
struct pb_slab_entry entry;
@@ -91,6 +88,7 @@ struct amdgpu_winsys_bo {
amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */
bool is_user_ptr;
+ bool use_reusable_pool;
uint32_t unique_id;
uint64_t va;
simple_mtx_t lock;
@@ -111,6 +109,8 @@ struct amdgpu_winsys_bo {
unsigned num_fences;
unsigned max_fences;
struct pipe_fence_handle **fences;
+
+ struct pb_cache_entry cache_entry[];
};
struct amdgpu_slab {