summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2010-03-31 11:50:27 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2010-04-15 21:37:32 +0100
commitc374c94e41d6e7d677334171e3255778d77cbe18 (patch)
treedb45edc9c88579664b30e292f6c52db8b8c4fade
parent96aa7a236ac0605324a94f7b7d10413cb219f071 (diff)
uxa: Reuse in-flight bo
When we need to allocate a new bo for use as a gpu target, first check if we can reuse a pixmap that has already been relocated into the aperture as a temporary target, for instance a glyph mask or a clip mask. Before: backend test min(s) median(s) stddev. xlib firefox-planet-gnome 50.568 50.873 0.30% xcb firefox-planet-gnome 49.686 53.003 3.92% xlib evolution 40.115 40.131 0.86% xcb evolution 28.241 28.285 0.18% After: backend test min(s) median(s) stddev. xlib firefox-planet-gnome 47.759 48.233 0.80% xcb firefox-planet-gnome 48.611 48.657 0.87% xlib evolution 38.954 38.991 0.05% xcb evolution 26.561 26.654 0.19% And even more dramatic improvements when using a font size larger than the maximum size of the glyph cache: xcb firefox-36-20090611: 1.79x speedup xlib firefox-36-20090611: 1.74x speedup xcb firefox-36-20090609: 1.62x speedup xlib firefox-36-20090609: 1.59x speedup Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/i830.h13
-rw-r--r--src/i830_batchbuffer.c12
-rw-r--r--src/i830_driver.c1
-rw-r--r--src/i830_uxa.c88
4 files changed, 82 insertions, 32 deletions
diff --git a/src/i830.h b/src/i830.h
index 43c58875..88949f6f 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -136,14 +136,22 @@ list_is_empty(struct list *head)
#define list_first_entry(ptr, type, member) \
list_entry((ptr)->next, type, member)
+#define list_foreach(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+#define list_foreach_entry(pos, type, head, member) \
+ for (pos = list_entry((head)->next, type, member);\
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, type, member))
+
struct intel_pixmap {
dri_bo *bo;
- uint32_t tiling;
+ uint32_t tiling, stride;
uint32_t flush_write_domain;
uint32_t flush_read_domains;
uint32_t batch_write_domain;
uint32_t batch_read_domains;
- struct list flush, batch;
+ struct list flush, batch, in_flight;
};
struct intel_pixmap *i830_get_pixmap_intel(PixmapPtr pixmap);
@@ -252,6 +260,7 @@ typedef struct intel_screen_private {
int batch_atomic_limit;
struct list batch_pixmaps;
struct list flush_pixmaps;
+ struct list in_flight;
/* For Xvideo */
Bool use_drmmode_overlay;
diff --git a/src/i830_batchbuffer.c b/src/i830_batchbuffer.c
index a02e9808..492472e7 100644
--- a/src/i830_batchbuffer.c
+++ b/src/i830_batchbuffer.c
@@ -195,6 +195,18 @@ void intel_batch_submit(ScrnInfoPtr scrn)
list_del(&entry->flush);
}
+ while (!list_is_empty(&intel->in_flight)) {
+ struct intel_pixmap *entry;
+
+ entry = list_first_entry(&intel->in_flight,
+ struct intel_pixmap,
+ in_flight);
+
+ dri_bo_unreference(entry->bo);
+ list_del(&entry->in_flight);
+ xfree(entry);
+ }
+
/* Save a ref to the last batch emitted, which we use for syncing
* in debug code.
*/
diff --git a/src/i830_driver.c b/src/i830_driver.c
index d0ce552d..db8af063 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -1054,6 +1054,7 @@ void i830_init_bufmgr(ScrnInfoPtr scrn)
list_init(&intel->batch_pixmaps);
list_init(&intel->flush_pixmaps);
+ list_init(&intel->in_flight);
}
Bool i830_crtc_on(xf86CrtcPtr crtc)
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index f3f0f651..984069e7 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -127,7 +127,7 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
{
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn);
- int pitch_align;
+ int pitch, pitch_align;
int size;
if (*tiling != I915_TILING_NONE) {
@@ -151,6 +151,9 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
}
}
+ pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8;
+ if (pitch <= 256)
+ *tiling = I915_TILING_NONE;
repeat:
if (*tiling == I915_TILING_NONE) {
pitch_align = intel->accel_pixmap_pitch_alignment;
@@ -158,8 +161,7 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
pitch_align = 512;
}
- *stride = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8,
- pitch_align);
+ *stride = ROUND_TO(pitch, pitch_align);
if (*tiling == I915_TILING_NONE) {
/* Round the height up so that the GPU's access to a 2x2 aligned
@@ -548,17 +550,19 @@ dri_bo *i830_get_pixmap_bo(PixmapPtr pixmap)
void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
{
+ ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+ intel_screen_private *intel = intel_get_screen_private(scrn);
struct intel_pixmap *priv;
priv = i830_get_pixmap_intel(pixmap);
if (priv != NULL) {
- dri_bo_unreference(priv->bo);
-
- priv->flush_read_domains = priv->flush_write_domain = 0;
- priv->batch_read_domains = priv->batch_write_domain = 0;
- list_del(&priv->batch);
- list_del(&priv->flush);
+ if (list_is_empty(&priv->batch)) {
+ dri_bo_unreference(priv->bo);
+ } else {
+ list_add(&priv->in_flight, &intel->in_flight);
+ priv = NULL;
+ }
}
if (bo != NULL) {
@@ -576,6 +580,7 @@ void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
dri_bo_reference(bo);
priv->bo = bo;
+ priv->stride = i830_pixmap_pitch(pixmap);
ret = drm_intel_bo_get_tiling(bo,
&priv->tiling,
@@ -883,35 +888,26 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
if (w && h) {
struct intel_pixmap *priv;
- unsigned int size;
+ unsigned int size, tiling;
int stride;
- priv = xcalloc(1, sizeof (struct intel_pixmap));
- if (priv == NULL) {
- fbDestroyPixmap(pixmap);
- return NullPixmap;
- }
-
/* Always attempt to tile, compute_size() will remove the
* tiling for pixmaps that are either too large or too small
* to be effectively tiled.
*/
- priv->tiling = I915_TILING_X;
+ tiling = I915_TILING_X;
if (usage == INTEL_CREATE_PIXMAP_TILING_Y)
- priv->tiling = I915_TILING_Y;
+ tiling = I915_TILING_Y;
if (usage == UXA_CREATE_PIXMAP_FOR_MAP)
- priv->tiling = I915_TILING_NONE;
-
- if (priv->tiling != I915_TILING_NONE) {
- if (w < 256)
- priv->tiling = I915_TILING_NONE;
- if (h < 8)
- priv->tiling = I915_TILING_NONE;
- if (h < 32 && priv->tiling == I915_TILING_Y)
- priv->tiling = I915_TILING_X;
+ tiling = I915_TILING_NONE;
+
+ if (tiling != I915_TILING_NONE) {
+ if (h <= 4)
+ tiling = I915_TILING_NONE;
+ if (h <= 16 && tiling == I915_TILING_Y)
+ tiling = I915_TILING_X;
}
- size = i830_uxa_pixmap_compute_size(pixmap, w, h,
- &priv->tiling, &stride);
+ size = i830_uxa_pixmap_compute_size(pixmap, w, h, &tiling, &stride);
/* Fail very large allocations on 32-bit systems. Large BOs will
* tend to hit SW fallbacks frequently, and also will tend to fail
@@ -923,7 +919,37 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
*/
if (sizeof(unsigned long) == 4 &&
size > (unsigned int)(1024 * 1024 * 1024)) {
- xfree(priv);
+ fbDestroyPixmap(pixmap);
+ return NullPixmap;
+ }
+
+ /* Perform a premilinary search for an in-flight bo */
+ if (usage != UXA_CREATE_PIXMAP_FOR_MAP) {
+ int aligned_h;
+
+ if (tiling == I915_TILING_X)
+ aligned_h = ALIGN(h, 8);
+ else if (tiling == I915_TILING_Y)
+ aligned_h = ALIGN(h, 32);
+ else
+ aligned_h = ALIGN(h, 2);
+
+ list_foreach_entry(priv, struct intel_pixmap,
+ &intel->in_flight,
+ in_flight) {
+ if (priv->tiling == tiling &&
+ priv->stride >= stride &&
+ priv->bo->size >= priv->stride * aligned_h) {
+ list_del(&priv->in_flight);
+ screen->ModifyPixmapHeader(pixmap, w, h, 0, 0, priv->stride, NULL);
+ i830_uxa_set_pixmap_intel(pixmap, priv);
+ return pixmap;
+ }
+ }
+ }
+
+ priv = xcalloc(1, sizeof (struct intel_pixmap));
+ if (priv == NULL) {
fbDestroyPixmap(pixmap);
return NullPixmap;
}
@@ -941,6 +967,8 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
return NullPixmap;
}
+ priv->stride = stride;
+ priv->tiling = tiling;
if (priv->tiling != I915_TILING_NONE)
drm_intel_bo_set_tiling(priv->bo,
&priv->tiling,