summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/radeon
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2020-05-06 14:46:13 -0400
committerMarge Bot <eric+marge@anholt.net>2020-05-15 22:12:35 +0000
commit36c01248049abba6d79b5ff1ddfe38306e0ddbff (patch)
treee8b4491b949c8e44d682edcfe47b9bf5664e1fd8 /src/gallium/drivers/radeon
parentcbbc18bc678a826508ed0b904ab485c44b7348c8 (diff)
winsys/amdgpu: add RADEON_FLAG_UNCACHED for faster blits over PCIe
Small blits benefit more. Good access pattern is required. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4935>
Diffstat (limited to 'src/gallium/drivers/radeon')
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h89
1 files changed, 69 insertions, 20 deletions
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index f183fe34cbb..9aba2a9d95f 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -67,7 +67,8 @@ enum radeon_bo_flag
RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
RADEON_FLAG_READ_ONLY = (1 << 5),
RADEON_FLAG_32BIT = (1 << 6),
- RADEON_FLAG_ENCRYPTED = (1 << 7)
+ RADEON_FLAG_ENCRYPTED = (1 << 7),
+ RADEON_FLAG_UNCACHED = (1 << 8), /* only gfx9 and newer */
};
enum radeon_dependency_flag
@@ -712,6 +713,11 @@ enum radeon_heap
RADEON_HEAP_GTT_WC_READ_ONLY_32BIT,
RADEON_HEAP_GTT_WC_32BIT,
RADEON_HEAP_GTT,
+ RADEON_HEAP_GTT_UNCACHED_WC,
+ RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY,
+ RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT,
+ RADEON_HEAP_GTT_UNCACHED_WC_32BIT,
+ RADEON_HEAP_GTT_UNCACHED,
RADEON_MAX_SLAB_HEAPS,
RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS,
};
@@ -730,6 +736,11 @@ static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap hea
case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
case RADEON_HEAP_GTT_WC_32BIT:
case RADEON_HEAP_GTT:
+ case RADEON_HEAP_GTT_UNCACHED_WC:
+ case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY:
+ case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
+ case RADEON_HEAP_GTT_UNCACHED_WC_32BIT:
+ case RADEON_HEAP_GTT_UNCACHED:
return RADEON_DOMAIN_GTT;
default:
assert(0);
@@ -739,36 +750,69 @@ static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap hea
static inline unsigned radeon_flags_from_heap(enum radeon_heap heap)
{
- unsigned flags =
- RADEON_FLAG_NO_INTERPROCESS_SHARING | (heap != RADEON_HEAP_GTT ? RADEON_FLAG_GTT_WC : 0);
+ unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING;
switch (heap) {
- case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
- return flags | RADEON_FLAG_NO_CPU_ACCESS;
+ case RADEON_HEAP_GTT:
+ case RADEON_HEAP_GTT_UNCACHED:
+ break;
+ default:
+ flags |= RADEON_FLAG_GTT_WC;
+ }
- case RADEON_HEAP_VRAM_READ_ONLY:
- case RADEON_HEAP_GTT_WC_READ_ONLY:
- return flags | RADEON_FLAG_READ_ONLY;
+ switch (heap) {
+ case RADEON_HEAP_GTT_UNCACHED_WC:
+ case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY:
+ case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
+ case RADEON_HEAP_GTT_UNCACHED_WC_32BIT:
+ case RADEON_HEAP_GTT_UNCACHED:
+ flags |= RADEON_FLAG_UNCACHED;
+ break;
+ default:
+ break;
+ }
+ switch (heap) {
+ case RADEON_HEAP_VRAM_READ_ONLY:
case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
+ case RADEON_HEAP_GTT_WC_READ_ONLY:
case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
- return flags | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT;
+ case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY:
+ case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
+ flags |= RADEON_FLAG_READ_ONLY;
+ break;
+ default:
+ break;
+ }
+ switch (heap) {
+ case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
case RADEON_HEAP_VRAM_32BIT:
+ case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
case RADEON_HEAP_GTT_WC_32BIT:
- return flags | RADEON_FLAG_32BIT;
+ case RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT:
+ case RADEON_HEAP_GTT_UNCACHED_WC_32BIT:
+ flags |= RADEON_FLAG_32BIT;
+ default:
+ break;
+ }
- case RADEON_HEAP_VRAM:
- case RADEON_HEAP_GTT_WC:
- case RADEON_HEAP_GTT:
+ switch (heap) {
+ case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
+ flags |= RADEON_FLAG_NO_CPU_ACCESS;
+ break;
default:
- return flags;
+ break;
}
+
+ return flags;
}
/* Return the heap index for winsys allocators, or -1 on failure. */
static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags)
{
+ bool uncached;
+
/* VRAM implies WC (write combining) */
assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
/* NO_CPU_ACCESS implies VRAM only. */
@@ -779,7 +823,7 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
return -1;
/* Unsupported flags: NO_SUBALLOC, SPARSE. */
- if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS |
+ if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_UNCACHED |
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT))
return -1;
@@ -806,15 +850,20 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
}
break;
case RADEON_DOMAIN_GTT:
+ uncached = flags & RADEON_FLAG_UNCACHED;
+
switch (flags & (RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) {
case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
- return RADEON_HEAP_GTT_WC_READ_ONLY_32BIT;
+ return uncached ? RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY_32BIT
+ : RADEON_HEAP_GTT_WC_READ_ONLY_32BIT;
case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY:
- return RADEON_HEAP_GTT_WC_READ_ONLY;
+ return uncached ? RADEON_HEAP_GTT_UNCACHED_WC_READ_ONLY
+ : RADEON_HEAP_GTT_WC_READ_ONLY;
case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT:
- return RADEON_HEAP_GTT_WC_32BIT;
+ return uncached ? RADEON_HEAP_GTT_UNCACHED_WC_32BIT
+ : RADEON_HEAP_GTT_WC_32BIT;
case RADEON_FLAG_GTT_WC:
- return RADEON_HEAP_GTT_WC;
+ return uncached ? RADEON_HEAP_GTT_UNCACHED_WC : RADEON_HEAP_GTT_WC;
case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT:
case RADEON_FLAG_READ_ONLY:
assert(!"READ_ONLY without WC is disallowed");
@@ -823,7 +872,7 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
assert(!"32BIT without WC is disallowed");
return -1;
case 0:
- return RADEON_HEAP_GTT;
+ return uncached ? RADEON_HEAP_GTT_UNCACHED : RADEON_HEAP_GTT;
}
break;
default: