summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2011-12-24 08:15:40 +0100
committerMarek Olšák <maraeo@gmail.com>2011-12-24 21:28:43 +0100
commit93f4e3cb6c1ca303ee1f5c2a2491a8eff33f2633 (patch)
treec6515ad448336db16756adba362412b025ce8cde
parente6e9becd5016df649d3c19a3e81e85bd63b895b7 (diff)
winsys/radeon: move managing GEM domains back to drivers
This partially reverts commit 363ff844753c46ac9c13866627e096b091ea81f8. It caused severe performance drops in Nexuiz. Reported by Phoronix. Tested by me on r300g and by IRC people on r600g.
-rw-r--r--src/gallium/drivers/r300/r300_context.h3
-rw-r--r--src/gallium/drivers/r300/r300_emit.c20
-rw-r--r--src/gallium/drivers/r300/r300_flush.c4
-rw-r--r--src/gallium/drivers/r300/r300_query.c2
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.c10
-rw-r--r--src/gallium/drivers/r300/r300_texture.c10
-rw-r--r--src/gallium/drivers/r600/r600.h3
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c30
-rw-r--r--src/gallium/drivers/r600/r600_hw_context_priv.h2
-rw-r--r--src/gallium/drivers/r600/r600_texture.c2
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.c35
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.h2
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.c43
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_winsys.h16
14 files changed, 106 insertions, 76 deletions
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index e1c12d9c516..5c0f53e9aad 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -304,2 +304,4 @@ struct r300_surface {
+ enum radeon_bo_domain domain;
+
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
@@ -387,2 +389,3 @@ struct r300_resource
struct radeon_winsys_cs_handle *cs_buf;
+ enum radeon_bo_domain domain;
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index d93a5786ff8..3897e990b1c 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -1192,3 +1192,4 @@ validate:
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
- RADEON_USAGE_READWRITE);
+ RADEON_USAGE_READWRITE,
+ r300_surface(fb->cbufs[i])->domain);
}
@@ -1199,3 +1200,4 @@ validate:
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
- RADEON_USAGE_READWRITE);
+ RADEON_USAGE_READWRITE,
+ r300_surface(fb->zsbuf)->domain);
}
@@ -1210,3 +1212,4 @@ validate:
tex = r300_resource(texstate->sampler_views[i]->base.texture);
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ);
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
+ tex->domain);
}
@@ -1216,3 +1219,3 @@ validate:
r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
- RADEON_USAGE_WRITE);
+ RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
/* ...vertex buffer for SWTCL path... */
@@ -1220,3 +1223,4 @@ validate:
r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf,
- RADEON_USAGE_READ);
+ RADEON_USAGE_READ,
+ r300_resource(r300->vbo)->domain);
/* ...vertex buffers for HWTCL path... */
@@ -1233,3 +1237,4 @@ validate:
r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf,
- RADEON_USAGE_READ);
+ RADEON_USAGE_READ,
+ r300_resource(buf)->domain);
}
@@ -1239,3 +1244,4 @@ validate:
r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
- RADEON_USAGE_READ);
+ RADEON_USAGE_READ,
+ r300_resource(index_buffer)->domain);
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 9459a95cd73..f8546443692 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -82,3 +82,3 @@ void r300_flush(struct pipe_context *pipe,
PIPE_BIND_CUSTOM,
- PIPE_USAGE_IMMUTABLE);
+ RADEON_DOMAIN_GTT);
/* Add the fence as a dummy relocation. */
@@ -86,3 +86,3 @@ void r300_flush(struct pipe_context *pipe,
r300->rws->buffer_get_cs_handle(*rfence),
- RADEON_USAGE_READWRITE);
+ RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT);
}
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 8f7de79538d..bcf6d0eb475 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -60,3 +60,3 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
q->buf = r300->rws->buffer_create(r300->rws, 4096, 4096,
- PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING);
+ PIPE_BIND_CUSTOM, RADEON_DOMAIN_GTT);
if (!q->buf) {
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index a5ec8ef9656..a8392d2dc52 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -189,2 +189,3 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
rbuf->b.user_ptr = NULL;
+ rbuf->domain = RADEON_DOMAIN_GTT;
rbuf->buf = NULL;
@@ -198,8 +199,2 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
-#ifdef PIPE_ARCH_BIG_ENDIAN
- /* Force buffer placement to GTT on big endian machines, because
- * the vertex fetcher can't swap bytes from VRAM. */
- rbuf->b.b.b.usage = PIPE_USAGE_STAGING;
-#endif
-
rbuf->buf =
@@ -207,3 +202,3 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
rbuf->b.b.b.width0, alignment,
- rbuf->b.b.b.bind, rbuf->b.b.b.usage);
+ rbuf->b.b.b.bind, rbuf->domain);
if (!rbuf->buf) {
@@ -241,2 +236,3 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
rbuf->b.user_ptr = ptr;
+ rbuf->domain = RADEON_DOMAIN_GTT;
rbuf->buf = NULL;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 2738f582f69..6fc60fb60d6 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -903,2 +903,5 @@ r300_texture_create_object(struct r300_screen *rscreen,
tex->tex.stride_in_bytes_override = stride_in_bytes_override;
+ tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ?
+ RADEON_DOMAIN_GTT :
+ RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT;
tex->buf = buffer;
@@ -910,3 +913,3 @@ r300_texture_create_object(struct r300_screen *rscreen,
tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048,
- base->bind, base->usage);
+ base->bind, tex->domain);
@@ -1021,2 +1024,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
+ /* Prefer VRAM if there are multiple domains to choose from. */
+ surface->domain = tex->domain;
+ if (surface->domain & RADEON_DOMAIN_VRAM)
+ surface->domain &= ~RADEON_DOMAIN_GTT;
+
surface->offset = r300_texture_get_offset(tex, level,
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index fbd12fbe1b7..4bfb5a980f1 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -90,2 +90,5 @@ struct r600_resource {
struct radeon_winsys_cs_handle *cs_buf;
+
+ /* Resource state. */
+ unsigned domains;
};
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index f4388867a92..a0386fe4b26 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -153,3 +153,30 @@ bool r600_init_resource(struct r600_screen *rscreen,
{
- res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, bind, usage);
+ uint32_t initial_domain, domains;
+
+ /* Staging resources particpate in transfers and blits only
+ * and are used for uploads and downloads from regular
+ * resources. We generate them internally for some transfers.
+ */
+ if (usage == PIPE_USAGE_STAGING) {
+ domains = RADEON_DOMAIN_GTT;
+ initial_domain = RADEON_DOMAIN_GTT;
+ } else {
+ domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
+
+ switch(usage) {
+ case PIPE_USAGE_DYNAMIC:
+ case PIPE_USAGE_STREAM:
+ case PIPE_USAGE_STAGING:
+ initial_domain = RADEON_DOMAIN_GTT;
+ break;
+ case PIPE_USAGE_DEFAULT:
+ case PIPE_USAGE_STATIC:
+ case PIPE_USAGE_IMMUTABLE:
+ default:
+ initial_domain = RADEON_DOMAIN_VRAM;
+ break;
+ }
+ }
+
+ res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, bind, initial_domain);
if (!res->buf) {
@@ -159,2 +186,3 @@ bool r600_init_resource(struct r600_screen *rscreen,
res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
+ res->domains = domains;
return true;
diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h
index 206de7e819f..2ad56242059 100644
--- a/src/gallium/drivers/r600/r600_hw_context_priv.h
+++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
@@ -92,3 +92,3 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6
- reloc_index = ctx->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, usage);
+ reloc_index = ctx->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, usage, rbo->domains);
if (reloc_index >= ctx->creloc)
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 2d041b04e49..8fe54c8a539 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -471,2 +471,3 @@ r600_texture_create_object(struct pipe_screen *screen,
resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf);
+ resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
}
@@ -476,2 +477,3 @@ r600_texture_create_object(struct pipe_screen *screen,
rtex->stencil->resource.cs_buf = rtex->resource.cs_buf;
+ rtex->stencil->resource.domains = rtex->resource.domains;
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index ccf9c4f5dc6..d4746ffc535 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -348,7 +348,5 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
- assert(rdesc->initial_domains && rdesc->reloc_domains);
+ assert(rdesc->initial_domains);
assert((rdesc->initial_domains &
~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
- assert((rdesc->reloc_domains &
- ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
@@ -379,3 +377,2 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
bo->handle = args.handle;
- bo->reloc_domains = rdesc->reloc_domains;
pipe_mutex_init(bo->map_mutex);
@@ -528,3 +525,4 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
unsigned alignment,
- unsigned bind, unsigned usage)
+ unsigned bind,
+ enum radeon_bo_domain domain)
{
@@ -538,27 +536,5 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
- /* Determine the memory domains. */
- switch (usage) {
- case PIPE_USAGE_STAGING:
- case PIPE_USAGE_STREAM:
- case PIPE_USAGE_DYNAMIC:
- desc.initial_domains = RADEON_GEM_DOMAIN_GTT;
- desc.reloc_domains = RADEON_GEM_DOMAIN_GTT;
- break;
- case PIPE_USAGE_IMMUTABLE:
- case PIPE_USAGE_STATIC:
- desc.initial_domains = RADEON_GEM_DOMAIN_VRAM;
- desc.reloc_domains = RADEON_GEM_DOMAIN_VRAM;
- break;
- default:
- if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER)) {
- desc.initial_domains = RADEON_GEM_DOMAIN_GTT;
- } else {
- desc.initial_domains = RADEON_GEM_DOMAIN_VRAM;
- }
- desc.reloc_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
- }
-
/* Additional criteria for the cache manager. */
- desc.base.usage = desc.initial_domains;
+ desc.base.usage = domain;
+ desc.initial_domains = domain;
@@ -620,3 +596,2 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
bo->name = whandle->handle;
- bo->reloc_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index ba71cfb3440..35d25e87eb3 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -44,3 +44,2 @@ struct radeon_bo_desc {
unsigned initial_domains;
- unsigned reloc_domains;
};
@@ -60,3 +59,2 @@ struct radeon_bo {
- uint32_t reloc_domains;
uint32_t handle;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 2239059cc53..e6109afd7ea 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -183,9 +183,10 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
static INLINE void update_reloc_domains(struct drm_radeon_cs_reloc *reloc,
- enum radeon_bo_usage usage,
- unsigned domains)
+ enum radeon_bo_domain rd,
+ enum radeon_bo_domain wd,
+ enum radeon_bo_domain *added_domains)
{
- if (usage & RADEON_USAGE_READ)
- reloc->read_domains |= domains;
- if (usage & RADEON_USAGE_WRITE)
- reloc->write_domain |= domains;
+ *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
+
+ reloc->read_domains |= rd;
+ reloc->write_domain |= wd;
}
@@ -211,3 +212,3 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
* This will prevent additional hash collisions if there are
- * several subsequent get_reloc calls of the same buffer.
+ * several consecutive get_reloc calls for the same buffer.
*
@@ -232,3 +233,4 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
enum radeon_bo_usage usage,
- unsigned *added_domains)
+ enum radeon_bo_domain domains,
+ enum radeon_bo_domain *added_domains)
{
@@ -237,2 +239,4 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
+ enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
+ enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
@@ -241,3 +245,3 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
if (reloc->handle == bo->handle) {
- update_reloc_domains(reloc, usage, bo->reloc_domains);
+ update_reloc_domains(reloc, rd, wd, added_domains);
return csc->reloc_indices_hashlist[hash];
@@ -250,3 +254,3 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
if (reloc->handle == bo->handle) {
- update_reloc_domains(reloc, usage, bo->reloc_domains);
+ update_reloc_domains(reloc, rd, wd, added_domains);
@@ -280,6 +284,4 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
reloc->handle = bo->handle;
- if (usage & RADEON_USAGE_READ)
- reloc->read_domains = bo->reloc_domains;
- if (usage & RADEON_USAGE_WRITE)
- reloc->write_domain = bo->reloc_domains;
+ reloc->read_domains = rd;
+ reloc->write_domain = wd;
reloc->flags = 0;
@@ -292,3 +294,3 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
- *added_domains = bo->reloc_domains;
+ *added_domains = rd | wd;
return csc->crelocs++;
@@ -298,3 +300,4 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
struct radeon_winsys_cs_handle *buf,
- enum radeon_bo_usage usage)
+ enum radeon_bo_usage usage,
+ enum radeon_bo_domain domains)
{
@@ -302,9 +305,9 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
struct radeon_bo *bo = (struct radeon_bo*)buf;
- unsigned added_domains = 0;
+ enum radeon_bo_domain added_domains;
- unsigned index = radeon_add_reloc(cs->csc, bo, usage, &added_domains);
+ unsigned index = radeon_add_reloc(cs->csc, bo, usage, domains, &added_domains);
- if (added_domains & RADEON_GEM_DOMAIN_GTT)
+ if (added_domains & RADEON_DOMAIN_GTT)
cs->csc->used_gart += bo->base.size;
- if (added_domains & RADEON_GEM_DOMAIN_VRAM)
+ if (added_domains & RADEON_DOMAIN_VRAM)
cs->csc->used_vram += bo->base.size;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index ea335d87113..59c1aad3308 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -60,2 +60,7 @@ enum radeon_bo_layout {
+enum radeon_bo_domain { /* bitfield */
+ RADEON_DOMAIN_GTT = 2,
+ RADEON_DOMAIN_VRAM = 4
+};
+
enum radeon_bo_usage { /* bitfield */
@@ -139,3 +144,3 @@ struct radeon_winsys {
* \param bind A bitmask of the PIPE_BIND_* flags.
- * \param usage A bitmask of the PIPE_USAGE_* flags.
+ * \param domain A bitmask of the RADEON_DOMAIN_* flags.
* \return The created buffer object.
@@ -145,3 +150,4 @@ struct radeon_winsys {
unsigned alignment,
- unsigned bind, unsigned usage);
+ unsigned bind,
+ enum radeon_bo_domain domain);
@@ -273,3 +279,4 @@ struct radeon_winsys {
* \param buf A winsys buffer to validate.
- * \param usage Whether the buffer is used for read and/or write.
+ * \param usage Whether the buffer is used for read and/or write.
+ * \param domain Bitmask of the RADEON_DOMAIN_* flags.
* \return Relocation index.
@@ -278,3 +285,4 @@ struct radeon_winsys {
struct radeon_winsys_cs_handle *buf,
- enum radeon_bo_usage usage);
+ enum radeon_bo_usage usage,
+ enum radeon_bo_domain domain);