summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-12-17 16:28:04 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2011-12-17 21:26:35 +0000
commitd20d167a753d8e4fe581950e1bc49f29e0ec9f1f (patch)
tree70eb00a45d1c9c23528581cbeef6b947804f76bb
parentdd8fd6c90612ada39eb32b98adc5acc97e7902aa (diff)
sna: Upload to large pixmaps inplace
When the pixmap is large, larger than L2 cache size, we are unlikely to benefit from first copying the data to a shadow buffer -- as that shadow buffer itself will mostly reside in main memory. In such circumstances we may as perform the write to the GTT mapping of the GPU bo. As such, it is a fragile heuristic that may require further tuning. Avoiding that extra copy gives a 30% boost to putimage500/shmput500 at ~10% cost to putimage10/shmput10 on Atom (945gm/PineView), without any noticeable impact upon cairo. Reported-by: Michael Larabel <Michael@phoronix.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/kgem.c25
-rw-r--r--src/sna/kgem.h1
-rw-r--r--src/sna/sna.h2
-rw-r--r--src/sna/sna_accel.c109
4 files changed, 124 insertions, 13 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 17eca52f..5dde9a61 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -362,6 +362,29 @@ agp_aperture_size(struct pci_device *dev, int gen)
return dev->regions[gen < 30 ? 0 : 2].size;
}
+static size_t
+cpu_cache_size(void)
+{
+ FILE *file = fopen("/proc/cpuinfo", "r");
+ size_t size = -1;
+ if (file) {
+ size_t len = 0;
+ char *line = NULL;
+ while (getline(&line, &len, file) != -1) {
+ int mb;
+ if (sscanf(line, "cache size : %d KB", &mb) == 1) {
+ size = mb * 1024;
+ break;
+ }
+ }
+ free(line);
+ fclose(file);
+ }
+ if (size == -1)
+ ErrorF("Unknown CPU cache size\n");
+ return size;
+}
+
static int gem_param(struct kgem *kgem, int name)
{
drm_i915_getparam_t gp;
@@ -388,6 +411,8 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
kgem->wedged = drmCommandNone(kgem->fd, DRM_I915_GEM_THROTTLE) == -EIO;
kgem->wedged |= DBG_NO_HW;
+ kgem->cpu_cache_pages = cpu_cache_size() >> 12;
+
list_init(&kgem->partial);
list_init(&kgem->requests);
list_init(&kgem->flushing);
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index b6455686..ae6ea47d 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -127,6 +127,7 @@ struct kgem {
uint32_t has_relaxed_fencing :1;
uint16_t fence_max;
+ uint16_t cpu_cache_pages;
uint32_t aperture_high, aperture_low, aperture;
uint32_t aperture_fenced, aperture_mappable;
uint32_t max_object_size;
diff --git a/src/sna/sna.h b/src/sna/sna.h
index a112fe39..6c64d64e 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -142,6 +142,8 @@ struct sna_pixmap {
#define SOURCE_BIAS 4
uint16_t source_count;
uint8_t pinned :1;
+ uint8_t inplace :1;
+ uint8_t mapped :1;
uint8_t flush :1;
uint8_t gpu :1;
uint8_t freed :1;
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 76ac89dd..fbecabb2 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -55,12 +55,14 @@
#endif
#define FORCE_GPU_ONLY 0
+#define FORCE_INPLACE 0
#define FORCE_FALLBACK 0
#define FORCE_FLUSH 0
#define USE_SPANS 0
#define USE_ZERO_SPANS 1
#define USE_BO_FOR_SCRATCH_PIXMAP 1
+#define INPLACE_MAP 1
static int sna_font_key;
@@ -171,6 +173,11 @@ static void sna_pixmap_destroy_gpu_bo(struct sna *sna, struct sna_pixmap *priv)
priv->gpu_bo = NULL;
}
+ if (priv->mapped) {
+ priv->pixmap->devPrivate.ptr = NULL;
+ priv->mapped = 0;
+ }
+
list_del(&priv->inactive);
/* and reset the upload counter */
@@ -358,7 +365,8 @@ static inline void sna_set_pixmap(PixmapPtr pixmap, struct sna_pixmap *sna)
dixSetPrivate(&pixmap->devPrivates, &sna_pixmap_index, sna);
}
-static struct sna_pixmap *_sna_pixmap_attach(PixmapPtr pixmap)
+static struct sna_pixmap *_sna_pixmap_attach(struct sna *sna,
+ PixmapPtr pixmap)
{
struct sna_pixmap *priv;
@@ -366,6 +374,18 @@ static struct sna_pixmap *_sna_pixmap_attach(PixmapPtr pixmap)
if (!priv)
return NULL;
+#if FORCE_INPLACE > 0
+ priv->inplace = 1;
+#elif FORCE_INPLACE < 0
+ priv->inplace = 0;
+#else
+ /* If the pixmap is larger than 2x the L2 cache, we presume that
+ * it will always be quicker to upload directly than to copy via
+ * the shadow.
+ */
+ priv->inplace =
+ (pixmap->devKind * pixmap->drawable.height >> 13) > sna->kgem.cpu_cache_pages;
+#endif
list_init(&priv->list);
list_init(&priv->inactive);
priv->pixmap = pixmap;
@@ -383,6 +403,7 @@ struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap)
if (priv)
return priv;
+ sna = to_sna_from_pixmap(pixmap);
switch (pixmap->usage_hint) {
case CREATE_PIXMAP_USAGE_GLYPH_PICTURE:
#if FAKE_CREATE_PIXMAP_USAGE_SCRATCH_HEADER
@@ -395,7 +416,6 @@ struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap)
break;
default:
- sna = to_sna_from_pixmap(pixmap);
if (!kgem_can_create_2d(&sna->kgem,
pixmap->drawable.width,
pixmap->drawable.height,
@@ -405,7 +425,7 @@ struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap)
break;
}
- return _sna_pixmap_attach(pixmap);
+ return _sna_pixmap_attach(sna, pixmap);
}
static inline PixmapPtr
@@ -461,7 +481,7 @@ sna_pixmap_create_scratch(ScreenPtr screen,
if (!pixmap)
return NullPixmap;
- priv = _sna_pixmap_attach(pixmap);
+ priv = _sna_pixmap_attach(sna, pixmap);
if (!priv) {
fbDestroyPixmap(pixmap);
return NullPixmap;
@@ -478,6 +498,7 @@ sna_pixmap_create_scratch(ScreenPtr screen,
}
priv->freed = 1;
+ priv->inplace = 1;
sna_damage_all(&priv->gpu_damage, width, height);
miModifyPixmapHeader(pixmap,
@@ -595,6 +616,32 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
__FUNCTION__, priv->gpu_bo, priv->gpu_damage));
if ((flags & MOVE_READ) == 0) {
+ assert(flags == MOVE_WRITE);
+
+ if (priv->inplace && priv->gpu_bo && INPLACE_MAP) {
+ if (priv->gpu_bo->gpu) {
+ sna_pixmap_destroy_gpu_bo(sna, priv);
+ if (!sna_pixmap_move_to_gpu(pixmap))
+ goto skip_inplace_map;
+ }
+
+ pixmap->devPrivate.ptr =
+ kgem_bo_map(&sna->kgem, priv->gpu_bo,
+ PROT_WRITE);
+ priv->mapped = 1;
+
+ sna_damage_all(&priv->gpu_damage,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ sna_damage_destroy(&priv->cpu_damage);
+ if (priv->cpu_bo)
+ sna_pixmap_free_cpu(sna, priv);
+
+ priv->gpu = true;
+ return true;
+ }
+
+skip_inplace_map:
if (priv->cpu_bo && priv->cpu_bo->gpu) {
if (priv->cpu_bo->exec == NULL)
kgem_retire(&sna->kgem);
@@ -608,6 +655,11 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
sna_damage_destroy(&priv->gpu_damage);
}
+ if (priv->mapped) {
+ pixmap->devPrivate.ptr = NULL;
+ priv->mapped = 0;
+ }
+
if (pixmap->devPrivate.ptr == NULL &&
!sna_pixmap_alloc_cpu(sna, pixmap, priv))
return false;
@@ -722,18 +774,46 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
return sna_pixmap_move_to_cpu(pixmap, flags);
}
- if ((flags & MOVE_READ) == 0 && priv->cpu_bo && !priv->cpu_bo->vmap) {
- if (sync_will_stall(priv->cpu_bo) && priv->cpu_bo->exec == NULL)
- kgem_retire(&sna->kgem);
- if (sync_will_stall(priv->cpu_bo)) {
- sna_damage_subtract(&priv->cpu_damage, region);
- if (!sna_pixmap_move_to_gpu(pixmap))
- return false;
+ if ((flags & MOVE_READ) == 0) {
+ assert(flags == MOVE_WRITE);
- sna_pixmap_free_cpu(sna, priv);
+ if (priv->inplace && priv->gpu_bo && INPLACE_MAP) {
+ if (sync_will_stall(priv->gpu_bo) &&
+ priv->gpu_bo->exec == NULL)
+ kgem_retire(&sna->kgem);
+
+ if (!sync_will_stall(priv->gpu_bo)) {
+ pixmap->devPrivate.ptr =
+ kgem_bo_map(&sna->kgem, priv->gpu_bo,
+ PROT_WRITE);
+ priv->mapped = 1;
+
+ sna_damage_subtract(&priv->cpu_damage, region);
+ sna_damage_add(&priv->gpu_damage, region);
+
+ priv->gpu = true;
+ return true;
+ }
+ }
+
+ if (priv->cpu_bo && !priv->cpu_bo->vmap) {
+ if (sync_will_stall(priv->cpu_bo) && priv->cpu_bo->exec == NULL)
+ kgem_retire(&sna->kgem);
+ if (sync_will_stall(priv->cpu_bo)) {
+ sna_damage_subtract(&priv->cpu_damage, region);
+ if (!sna_pixmap_move_to_gpu(pixmap))
+ return false;
+
+ sna_pixmap_free_cpu(sna, priv);
+ }
}
}
+ if (priv->mapped) {
+ pixmap->devPrivate.ptr = NULL;
+ priv->mapped = 0;
+ }
+
if (pixmap->devPrivate.ptr == NULL &&
!sna_pixmap_alloc_cpu(sna, pixmap, priv))
return false;
@@ -1501,7 +1581,7 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
* So we try again with vma caching and only for pixmaps who will be
* immediately flushed...
*/
- if (priv->flush &&
+ if ((priv->flush || (priv->inplace && priv->gpu_bo)) &&
sna_put_image_upload_blt(drawable, gc, region,
x, y, w, h, bits, stride)) {
if (region_subsumes_drawable(region, &pixmap->drawable)) {
@@ -2097,6 +2177,9 @@ fallback:
static bool copy_use_gpu_bo(struct sna *sna,
struct sna_pixmap *priv)
{
+ if (priv->inplace)
+ return true;
+
if (!priv->cpu_bo)
return false;