summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2016-09-12 12:19:47 +0200
committerNicolai Hähnle <nicolai.haehnle@amd.com>2016-09-27 16:45:37 +0200
commitfb827c055cb1bdd2b18d0687c06c56b537d805f3 (patch)
treec863814a1cb94876760fb51e6bdf3e5c2f0e4bcd
parenta1e391e39df2b1d8169e773a30153167ab8e13e8 (diff)
winsys/radeon: enable buffer allocation from slabs
Only enable for chips with GPUVM, because older driver paths do not take the required offset into account. Reviewed-by: Marek Olšák <marek.olsak@amd.com>
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.c170
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.h12
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c24
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.h5
4 files changed, 209 insertions, 2 deletions
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index f9cf2e0d2cd..3af01f82fb6 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -42,6 +42,13 @@
#include <stdio.h>
#include <inttypes.h>
+static struct pb_buffer *
+radeon_winsys_bo_create(struct radeon_winsys *rws,
+ uint64_t size,
+ unsigned alignment,
+ enum radeon_bo_domain domain,
+ enum radeon_bo_flag flags);
+
static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
{
return (struct radeon_bo *)bo;
@@ -700,6 +707,120 @@ bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
}
+bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
+{
+ struct radeon_bo *bo = NULL; /* fix container_of */
+ bo = container_of(entry, bo, u.slab.entry);
+
+ return radeon_bo_can_reclaim(&bo->base);
+}
+
+static void radeon_bo_slab_destroy(struct pb_buffer *_buf)
+{
+ struct radeon_bo *bo = radeon_bo(_buf);
+
+ assert(!bo->handle);
+
+ pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
+}
+
+static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
+ radeon_bo_slab_destroy
+ /* other functions are never called */
+};
+
+struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
+ unsigned entry_size,
+ unsigned group_index)
+{
+ struct radeon_drm_winsys *ws = priv;
+ struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
+ enum radeon_bo_domain domains;
+ enum radeon_bo_flag flags = 0;
+ unsigned base_hash;
+
+ if (!slab)
+ return NULL;
+
+ if (heap & 1)
+ flags |= RADEON_FLAG_GTT_WC;
+ if (heap & 2)
+ flags |= RADEON_FLAG_CPU_ACCESS;
+
+ switch (heap >> 2) {
+ case 0:
+ domains = RADEON_DOMAIN_VRAM;
+ break;
+ default:
+ case 1:
+ domains = RADEON_DOMAIN_VRAM_GTT;
+ break;
+ case 2:
+ domains = RADEON_DOMAIN_GTT;
+ break;
+ }
+
+ slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
+ 64 * 1024, 64 * 1024,
+ domains, flags));
+ if (!slab->buffer)
+ goto fail;
+
+ assert(slab->buffer->handle);
+
+ slab->base.num_entries = slab->buffer->base.size / entry_size;
+ slab->base.num_free = slab->base.num_entries;
+ slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
+ if (!slab->entries)
+ goto fail_buffer;
+
+ LIST_INITHEAD(&slab->base.free);
+
+ base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
+
+ for (unsigned i = 0; i < slab->base.num_entries; ++i) {
+ struct radeon_bo *bo = &slab->entries[i];
+
+ bo->base.alignment = entry_size;
+ bo->base.usage = slab->buffer->base.usage;
+ bo->base.size = entry_size;
+ bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
+ bo->rws = ws;
+ bo->va = slab->buffer->va + i * entry_size;
+ bo->initial_domain = domains;
+ bo->hash = base_hash + i;
+ bo->u.slab.entry.slab = &slab->base;
+ bo->u.slab.entry.group_index = group_index;
+ bo->u.slab.real = slab->buffer;
+
+ LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
+ }
+
+ return &slab->base;
+
+fail_buffer:
+ radeon_bo_reference(&slab->buffer, NULL);
+fail:
+ FREE(slab);
+ return NULL;
+}
+
+void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
+{
+ struct radeon_slab *slab = (struct radeon_slab *)pslab;
+
+ for (unsigned i = 0; i < slab->base.num_entries; ++i) {
+ struct radeon_bo *bo = &slab->entries[i];
+ for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
+ radeon_bo_reference(&bo->u.slab.fences[j], NULL);
+ FREE(bo->u.slab.fences);
+ }
+
+ FREE(slab->entries);
+ radeon_bo_reference(&slab->buffer, NULL);
+ FREE(slab);
+}
+
static unsigned eg_tile_split(unsigned tile_split)
{
switch (tile_split) {
@@ -823,6 +944,54 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
if (size > UINT_MAX)
return NULL;
+ /* Sub-allocate small buffers from slabs. */
+ if (!(flags & RADEON_FLAG_HANDLE) &&
+ size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
+ ws->info.has_virtual_memory &&
+ alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
+ struct pb_slab_entry *entry;
+ unsigned heap = 0;
+
+ if (flags & RADEON_FLAG_GTT_WC)
+ heap |= 1;
+ if (flags & RADEON_FLAG_CPU_ACCESS)
+ heap |= 2;
+ if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
+ goto no_slab;
+
+ switch (domain) {
+ case RADEON_DOMAIN_VRAM:
+ heap |= 0 * 4;
+ break;
+ case RADEON_DOMAIN_VRAM_GTT:
+ heap |= 1 * 4;
+ break;
+ case RADEON_DOMAIN_GTT:
+ heap |= 2 * 4;
+ break;
+ default:
+ goto no_slab;
+ }
+
+ entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
+ if (!entry) {
+ /* Clear the cache and try again. */
+ pb_cache_release_all_buffers(&ws->bo_cache);
+
+ entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
+ }
+ if (!entry)
+ return NULL;
+
+ bo = NULL;
+ bo = container_of(entry, bo, u.slab.entry);
+
+ pipe_reference_init(&bo->base.reference, 1);
+
+ return &bo->base;
+ }
+no_slab:
+
/* This flag is irrelevant for the cache. */
flags &= ~RADEON_FLAG_HANDLE;
@@ -862,6 +1031,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
pb_cache_bucket);
if (!bo) {
/* Clear the cache and try again. */
+ pb_slabs_reclaim(&ws->bo_slabs);
pb_cache_release_all_buffers(&ws->bo_cache);
bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
pb_cache_bucket);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index 8f767fd2c73..236e94cbbff 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -74,10 +74,22 @@ struct radeon_bo {
int num_active_ioctls;
};
+struct radeon_slab {
+ struct pb_slab base;
+ struct radeon_bo *buffer;
+ struct radeon_bo *entries;
+};
+
void radeon_bo_destroy(struct pb_buffer *_buf);
bool radeon_bo_can_reclaim(struct pb_buffer *_buf);
void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws);
+bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry);
+struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
+ unsigned entry_size,
+ unsigned group_index);
+void radeon_bo_slab_free(void *priv, struct pb_slab *slab);
+
static inline
void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src)
{
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index e02f286b0c2..ae55746654b 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -545,6 +545,8 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
pipe_mutex_destroy(ws->hyperz_owner_mutex);
pipe_mutex_destroy(ws->cmask_owner_mutex);
+ if (ws->info.has_virtual_memory)
+ pb_slabs_deinit(&ws->bo_slabs);
pb_cache_deinit(&ws->bo_cache);
if (ws->gen >= DRV_R600) {
@@ -759,10 +761,25 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
radeon_bo_destroy,
radeon_bo_can_reclaim);
+ if (ws->info.has_virtual_memory) {
+ /* There is no fundamental obstacle to using slab buffer allocation
+ * without GPUVM, but enabling it requires making sure that the drivers
+ * honor the address offset.
+ */
+ if (!pb_slabs_init(&ws->bo_slabs,
+ RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2,
+ 12,
+ ws,
+ radeon_bo_can_reclaim_slab,
+ radeon_bo_slab_alloc,
+ radeon_bo_slab_free))
+ goto fail_cache;
+ }
+
if (ws->gen >= DRV_R600) {
ws->surf_man = radeon_surface_manager_new(ws->fd);
if (!ws->surf_man)
- goto fail;
+ goto fail_slab;
}
/* init reference */
@@ -819,7 +836,10 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
return &ws->base;
-fail:
+fail_slab:
+ if (ws->info.has_virtual_memory)
+ pb_slabs_deinit(&ws->bo_slabs);
+fail_cache:
pb_cache_deinit(&ws->bo_cache);
fail1:
pipe_mutex_unlock(fd_tab_mutex);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index b30055cf976..934cd584f86 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -32,6 +32,7 @@
#include "gallium/drivers/radeon/radeon_winsys.h"
#include "pipebuffer/pb_cache.h"
+#include "pipebuffer/pb_slab.h"
#include "util/u_queue.h"
#include "util/list.h"
#include <radeon_drm.h>
@@ -62,10 +63,14 @@ enum radeon_generation {
DRV_SI
};
+#define RADEON_SLAB_MIN_SIZE_LOG2 9
+#define RADEON_SLAB_MAX_SIZE_LOG2 14
+
struct radeon_drm_winsys {
struct radeon_winsys base;
struct pipe_reference reference;
struct pb_cache bo_cache;
+ struct pb_slabs bo_slabs;
int fd; /* DRM file descriptor */
int num_cs; /* The number of command streams created. */