summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-12-12 14:10:57 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2011-12-13 01:38:09 +0000
commitc481bec356b2e40e66a000dbaaf261bf7aae930d (patch)
treeff2d9b5a1a0bdf07e9942d3ff81db58d00db3755
parent6c9aa6f9cf8e59ca6aa1866b83690a1de8cfb757 (diff)
sna: Experiment with creating the CPU pixmap using an LLC BO
A poor cousin to vmap is to instead allocate snooped bo and use a CPU mapping for zero-copy uploads into GPU resident memory. For maximum performance, we still need tiled GPU buffers so CPU bo are only useful in situations where we are frequently migrating data. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/kgem.c162
-rw-r--r--src/sna/kgem.h2
-rw-r--r--src/sna/sna_accel.c89
3 files changed, 183 insertions, 70 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index fac9c0ef..6a17bfe5 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -86,7 +86,11 @@ static inline void list_replace(struct list *old,
#endif
#define PAGE_SIZE 4096
-#define MAX_VMA_CACHE 128
+#define MAX_VMA_CACHE 256
+
+#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
+#define CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) & ~1))
+#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
struct kgem_partial_bo {
struct kgem_bo base;
@@ -618,9 +622,10 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
}
if (bo->map) {
- DBG(("%s: releasing vma for handle=%d, count=%d\n",
- __FUNCTION__, bo->handle, kgem->vma_count-1));
- munmap(bo->map, bo->size);
+ DBG(("%s: releasing %s vma for handle=%d, count=%d\n",
+ __FUNCTION__, IS_CPU_MAP(bo->map) ? "CPU" : "GTT",
+ bo->handle, kgem->vma_count-1));
+ munmap(CPU_MAP(bo->map), bo->size);
list_del(&bo->vma);
kgem->vma_count--;
}
@@ -657,34 +662,39 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
}
}
- if (!bo->reusable)
+ if (!bo->reusable) {
+ DBG(("%s: handle=%d, not reusable\n",
+ __FUNCTION__, bo->handle));
goto destroy;
-
- if (!bo->rq && !bo->needs_flush) {
- assert(!bo->purged);
-
- DBG(("%s: handle=%d, purged\n", __FUNCTION__, bo->handle));
-
- if (!gem_madvise(kgem->fd, bo->handle, I915_MADV_DONTNEED)) {
- kgem->need_purge |= bo->gpu;
- goto destroy;
- }
-
- bo->purged = true;
}
kgem->need_expire = true;
if (bo->rq) {
DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
list_move(&bo->list, active(kgem, bo->size));
- } else if (bo->purged) {
- DBG(("%s: handle=%d -> inactive\n", __FUNCTION__, bo->handle));
- list_move(&bo->list, inactive(kgem, bo->size));
- } else {
+ } else if (bo->needs_flush) {
DBG(("%s: handle=%d -> flushing\n", __FUNCTION__, bo->handle));
assert(list_is_empty(&bo->request));
list_add(&bo->request, &kgem->flushing);
list_move(&bo->list, active(kgem, bo->size));
+ } else {
+ if (!IS_CPU_MAP(bo->map)) {
+ assert(!bo->purged);
+
+ DBG(("%s: handle=%d, purged\n",
+ __FUNCTION__, bo->handle));
+
+ if (!gem_madvise(kgem->fd, bo->handle,
+ I915_MADV_DONTNEED)) {
+ kgem->need_purge |= bo->gpu;
+ goto destroy;
+ }
+
+ bo->purged = true;
+ }
+
+ DBG(("%s: handle=%d -> inactive\n", __FUNCTION__, bo->handle));
+ list_move(&bo->list, inactive(kgem, bo->size));
}
return;
@@ -1188,7 +1198,6 @@ bool kgem_expire_cache(struct kgem *kgem)
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
idle &= list_is_empty(&kgem->inactive[i]);
list_for_each_entry(bo, &kgem->inactive[i], list) {
- assert(bo->purged);
if (bo->delta) {
expire = now - MAX_INACTIVE_TIME;
break;
@@ -1213,8 +1222,9 @@ bool kgem_expire_cache(struct kgem *kgem)
bo = list_last_entry(&kgem->inactive[i],
struct kgem_bo, list);
- if (gem_madvise(kgem->fd, bo->handle,
- I915_MADV_DONTNEED) &&
+ if ((!bo->purged ||
+ gem_madvise(kgem->fd, bo->handle,
+ I915_MADV_DONTNEED)) &&
bo->delta > expire) {
idle = false;
break;
@@ -1844,32 +1854,47 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
return delta;
}
+static void kgem_trim_vma_cache(struct kgem *kgem)
+{
+ /* vma are limited on a per-process basis to around 64k.
+ * This includes all malloc arenas as well as other file
+ * mappings. In order to be fair and not hog the cache,
+ * and more importantly not to exhaust that limit and to
+ * start failing mappings, we keep our own number of open
+ * vma to within a conservative value.
+ */
+ while (kgem->vma_count > MAX_VMA_CACHE) {
+ struct kgem_bo *old;
+
+ old = list_first_entry(&kgem->vma_cache,
+ struct kgem_bo,
+ vma);
+ DBG(("%s: discarding %s vma cache for %d\n",
+ __FUNCTION__, IS_CPU_MAP(old->map) ? "CPU" : "GTT",
+ old->handle));
+ munmap(CPU_MAP(old->map), old->size);
+ old->map = NULL;
+ list_del(&old->vma);
+ kgem->vma_count--;
+ }
+}
+
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
{
void *ptr;
+ if (IS_CPU_MAP(bo->map)) {
+ DBG(("%s: discarding CPU vma cache for %d\n",
+ __FUNCTION__, bo->handle));
+ munmap(CPU_MAP(bo->map), bo->size);
+ bo->map = NULL;
+ list_del(&bo->vma);
+ kgem->vma_count--;
+ }
+
ptr = bo->map;
if (ptr == NULL) {
- /* vma are limited on a per-process basis to around 64k.
- * This includes all malloc arenas as well as other file
- * mappings. In order to be fair and not hog the cache,
- * and more importantly not to exhaust that limit and to
- * start failing mappings, we keep our own number of open
- * vma to within a conservative value.
- */
- while (kgem->vma_count > MAX_VMA_CACHE) {
- struct kgem_bo *old;
-
- old = list_first_entry(&kgem->vma_cache,
- struct kgem_bo,
- vma);
- DBG(("%s: discarding vma cache for %d\n",
- __FUNCTION__, old->handle));
- munmap(old->map, old->size);
- old->map = NULL;
- list_del(&old->vma);
- kgem->vma_count--;
- }
+ kgem_trim_vma_cache(kgem);
ptr = gem_mmap(kgem->fd, bo->handle, bo->size,
PROT_READ | PROT_WRITE);
@@ -1907,6 +1932,53 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
return ptr;
}
+void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
+{
+ struct drm_i915_gem_mmap mmap_arg;
+
+ DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, bo->handle, bo->size));
+
+ if (IS_CPU_MAP(bo->map)) {
+ void *ptr = CPU_MAP(bo->map);
+ list_del(&bo->vma);
+ kgem->vma_count--;
+ bo->map = NULL;
+ return ptr;
+ }
+
+ if (bo->map) {
+ DBG(("%s: discarding GTT vma cache for %d\n",
+ __FUNCTION__, bo->handle));
+ munmap(CPU_MAP(bo->map), bo->size);
+ bo->map = NULL;
+ list_del(&bo->vma);
+ kgem->vma_count--;
+ }
+
+ kgem_trim_vma_cache(kgem);
+
+ VG_CLEAR(mmap_arg);
+ mmap_arg.handle = bo->handle;
+ mmap_arg.offset = 0;
+ mmap_arg.size = bo->size;
+ if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
+ assert(0);
+ return NULL;
+ }
+
+ VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bo->size));
+ return (void *)(uintptr_t)mmap_arg.addr_ptr;
+}
+
+void kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr)
+{
+ assert(bo->map == NULL);
+
+ bo->map = MAKE_CPU_MAP(ptr);
+ list_move(&bo->vma, &kgem->vma_cache);
+ kgem->vma_count++;
+}
+
void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo)
{
if (bo->map == NULL)
@@ -1915,7 +1987,7 @@ void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo)
DBG(("%s: (debug) releasing vma for handle=%d, count=%d\n",
__FUNCTION__, bo->handle, kgem->vma_count-1));
- munmap(bo->map, bo->size);
+ munmap(CPU_MAP(bo->map), bo->size);
bo->map = NULL;
list_del(&bo->vma);
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 0d85f643..2fd5a551 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -319,6 +319,8 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot);
void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo);
+void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
+void kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr);
uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index c39b45ea..dc0fad50 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -61,6 +61,7 @@
#define USE_SPANS 0
#define USE_ZERO_SPANS 1
#define USE_BO_FOR_SCRATCH_PIXMAP 1
+#define USE_LLC_CPU_BO 1
static int sna_font_key;
@@ -177,6 +178,54 @@ static void sna_pixmap_destroy_gpu_bo(struct sna *sna, struct sna_pixmap *priv)
priv->source_count = SOURCE_BIAS;
}
+static void sna_pixmap_alloc_cpu(struct sna *sna,
+ PixmapPtr pixmap,
+ struct sna_pixmap *priv)
+{
+ if (USE_LLC_CPU_BO && sna->kgem.gen >= 60) {
+ DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__,
+ pixmap->drawable.width, pixmap->drawable.height));
+
+ priv->cpu_bo = kgem_create_2d(&sna->kgem,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ pixmap->drawable.bitsPerPixel,
+ I915_TILING_NONE,
+ CREATE_INACTIVE);
+ DBG(("%s: allocated CPU handle=%d\n", __FUNCTION__,
+ priv->cpu_bo->handle));
+
+ if (priv->cpu_bo) {
+ priv->ptr = kgem_bo_map__cpu(&sna->kgem, priv->cpu_bo);
+ if (priv->ptr == NULL) {
+ kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
+ priv->cpu_bo = NULL;
+ }
+ }
+ }
+
+ if (priv->ptr == NULL)
+ priv->ptr = malloc(pixmap->devKind * pixmap->drawable.height);
+
+ assert(priv->ptr);
+ pixmap->devPrivate.ptr = priv->ptr;
+}
+
+static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
+{
+ DBG(("%s: discarding CPU buffer, handle=%d, size=%d\n",
+ __FUNCTION__, priv->cpu_bo->handle, priv->cpu_bo->size));
+
+ if (priv->cpu_bo) {
+ kgem_bo_unmap__cpu(&sna->kgem, priv->cpu_bo, priv->ptr);
+ kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
+
+ priv->cpu_bo = NULL;
+ } else
+ free(priv->ptr);
+ priv->pixmap->devPrivate.ptr = priv->ptr = NULL;
+}
+
static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
{
struct sna *sna = to_sna_from_pixmap(pixmap);
@@ -191,6 +240,9 @@ static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
if (priv->gpu_bo)
kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
+ if (priv->ptr)
+ sna_pixmap_free_cpu(sna, priv);
+
if (priv->cpu_bo) {
if (kgem_bo_is_busy(priv->cpu_bo)) {
list_add_tail(&priv->list, &sna->deferred_free);
@@ -208,7 +260,6 @@ static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
return false;
}
- free(priv->ptr);
free(priv);
return true;
}
@@ -531,12 +582,10 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write)
__FUNCTION__, priv->gpu_bo, priv->gpu_damage, priv->gpu_only));
if (pixmap->devPrivate.ptr == NULL) {
- DBG(("%s: allocating CPU buffer\n", __FUNCTION__));
assert(priv->ptr == NULL);
assert(pixmap->devKind);
assert(priv->cpu_damage == NULL);
- priv->ptr = malloc(pixmap->devKind * pixmap->drawable.height);
- pixmap->devPrivate.ptr = priv->ptr;
+ sna_pixmap_alloc_cpu(sna, pixmap, priv);
}
if (priv->gpu_bo == NULL) {
@@ -644,12 +693,10 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
}
if (pixmap->devPrivate.ptr == NULL) {
- DBG(("%s: allocating CPU buffer\n", __FUNCTION__));
assert(priv->ptr == NULL);
assert(pixmap->devKind);
assert(priv->cpu_damage == NULL);
- priv->ptr = malloc(pixmap->devKind * pixmap->drawable.height);
- pixmap->devPrivate.ptr = priv->ptr;
+ sna_pixmap_alloc_cpu(sna, pixmap, priv);
}
if (priv->gpu_bo == NULL)
@@ -1397,13 +1444,6 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
if (!priv)
return false;
- if (pixmap->devPrivate.ptr == NULL) {
- if (priv->gpu_bo == NULL)
- return false;
- return sna_put_image_upload_blt(drawable, gc, region,
- x, y, w, h, bits, stride);
- }
-
if (gc->alu != GXcopy)
return false;
@@ -1432,6 +1472,9 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
if (priv->cpu_bo)
kgem_bo_sync(&sna->kgem, priv->cpu_bo, true);
+ if (pixmap->devPrivate.ptr == NULL)
+ sna_pixmap_alloc_cpu(sna, pixmap, priv);
+
if (region_subsumes_drawable(region, &pixmap->drawable)) {
DBG(("%s: replacing entire pixmap\n", __FUNCTION__));
sna_damage_all(&priv->cpu_damage,
@@ -2216,11 +2259,8 @@ fallback:
&sna->dirty_pixmaps);
}
- if (dst_pixmap->devPrivate.ptr == NULL) {
- DBG(("%s: allocating CPU buffer\n", __FUNCTION__));
- dst_priv->ptr = malloc(dst_pixmap->devKind * dst_pixmap->drawable.height);
- dst_pixmap->devPrivate.ptr = dst_priv->ptr;
- }
+ if (dst_pixmap->devPrivate.ptr == NULL)
+ sna_pixmap_alloc_cpu(sna, dst_pixmap, dst_priv);
} else
sna_drawable_move_region_to_cpu(&dst_pixmap->drawable,
&region, true);
@@ -8610,12 +8650,11 @@ static void sna_accel_inactive(struct sna *sna)
list_init(&preserve);
list_for_each_entry_safe(priv, next, &sna->active_pixmaps, inactive) {
- if (priv->ptr && sna_damage_is_all(&priv->gpu_damage,
- priv->pixmap->drawable.width,
- priv->pixmap->drawable.height)) {
- DBG(("%s: discarding CPU buffer\n", __FUNCTION__));
- free(priv->ptr);
- priv->pixmap->devPrivate.ptr = priv->ptr = NULL;
+ if (priv->ptr &&
+ sna_damage_is_all(&priv->gpu_damage,
+ priv->pixmap->drawable.width,
+ priv->pixmap->drawable.height)) {
+ sna_pixmap_free_cpu(sna, priv);
list_move(&priv->inactive, &preserve);
}
}