summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-11-01 17:02:17 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2011-11-01 21:12:02 +0000
commite0fd07bc251296784bf70f02877765171a053cc6 (patch)
tree0d0dba54a0cc85cffb90060d0e09448ee9c5288c
parent22c43efe6b9b5f669593aa9f3af6ee437426c5d2 (diff)
sna: Accelerate XYPixmap upload when using GXcopy
Mostly for the lols. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/sna_accel.c187
-rw-r--r--src/sna/sna_blt.c6
-rw-r--r--src/sna/sna_reg.h3
3 files changed, 167 insertions, 29 deletions
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index b2d0c33f..29ea66ce 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -140,6 +140,28 @@ static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char
#define assert_pixmap_contains_box(p, b)
#endif
+inline static bool
+sna_fill_init_blt(struct sna_fill_op *fill,
+ struct sna *sna,
+ PixmapPtr pixmap,
+ struct kgem_bo *bo,
+ uint8_t alu,
+ uint32_t pixel)
+{
+ return sna->render.fill(sna, alu, pixmap, bo, pixel, fill);
+}
+
+static Bool
+sna_copy_init_blt(struct sna_copy_op *copy,
+ struct sna *sna,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint8_t alu)
+{
+ memset(copy, 0, sizeof(*copy));
+ return sna->render.copy(sna, alu, src, src_bo, dst, dst_bo, copy);
+}
+
static void sna_pixmap_destroy_gpu_bo(struct sna *sna, struct sna_pixmap *priv)
{
kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
@@ -1446,6 +1468,133 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
return true;
}
+static Bool
+sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
+ int x, int y, int w, int h, int left,char *bits)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ struct kgem_bo *bo = priv->gpu_bo;
+ int16_t dx, dy;
+ unsigned i, skip;
+
+ if (gc->alu != GXcopy)
+ return false;
+
+ if (!sna_drawable_use_gpu_bo(&pixmap->drawable, &region->extents))
+ return false;
+
+ if (bo->tiling == I915_TILING_Y)
+ return false;
+
+ assert_pixmap_contains_box(pixmap, RegionExtents(region));
+ if (!priv->gpu_only)
+ sna_damage_add(&priv->gpu_damage, region);
+
+ DBG(("%s: upload(%d, %d, %d, %d)\n", __FUNCTION__, x, y, w, h));
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+ x += dx + drawable->x;
+ y += dy + drawable->y;
+
+ kgem_set_mode(&sna->kgem, KGEM_BLT);
+
+ skip = h * BitmapBytePad(w + left);
+ for (i = 1 << (gc->depth-1); i; i >>= 1, bits += skip) {
+ const BoxRec *box = REGION_RECTS(region);
+ int n = REGION_NUM_RECTS(region);
+
+ if ((gc->planemask & i) == 0)
+ continue;
+
+ /* Region is pre-clipped and translated into pixmap space */
+ do {
+ int bx1 = (box->x1 - x) & ~7;
+ int bx2 = (box->x2 - x + 7) & ~7;
+ int bw = (bx2 - bx1)/8;
+ int bh = box->y2 - box->y1;
+ int bstride = ALIGN(bw, 2);
+ int src_stride;
+ uint8_t *dst, *src;
+ uint32_t *b;
+ struct kgem_bo *upload;
+ void *ptr;
+
+ if (!kgem_check_batch(&sna->kgem, 12) ||
+ !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+ !kgem_check_reloc(&sna->kgem, 2)) {
+ _kgem_submit(&sna->kgem);
+ _kgem_set_mode(&sna->kgem, KGEM_BLT);
+ }
+
+ upload = kgem_create_buffer(&sna->kgem,
+ bstride*bh,
+ KGEM_BUFFER_WRITE,
+ &ptr);
+ if (!upload)
+ break;
+
+ dst = ptr;
+ bstride -= bw;
+
+ src_stride = BitmapBytePad(w);
+ src = (uint8_t*)bits + (box->y1 - y) * src_stride + bx1/8;
+ src_stride -= bw;
+ do {
+ int i = bw;
+ do {
+ *dst++ = byte_reverse(*src++);
+ } while (--i);
+ dst += bstride;
+ src += src_stride;
+ } while (--bh);
+
+ b = sna->kgem.batch + sna->kgem.nbatch;
+ b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT;
+ if (drawable->bitsPerPixel >=3224)
+ b[0] |= 3 << 20;
+ b[0] |= ((box->x1 - x) & 7) << 17;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 40) {
+ if (bo->tiling)
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 31; /* solid pattern */
+ b[1] |= blt_depth(drawable->depth) << 24;
+ b[1] |= 0xce << 16; /* S or (D and !P) */
+ b[2] = box->y1 << 16 | box->x1;
+ b[3] = box->y2 << 16 | box->x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
+ bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
+ upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = 0;
+ b[7] = i;
+ b[8] = i;
+ b[9] = i;
+ b[10] = -1;
+ b[11] = -1;
+
+ sna->kgem.nbatch += 12;
+ kgem_bo_destroy(&sna->kgem, upload);
+
+ box++;
+ } while (--n);
+ }
+
+ sna->blt_state.fill_bo = 0;
+ return true;
+}
+
static void
sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
int x, int y, int w, int h, int left, int format,
@@ -1491,11 +1640,11 @@ sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
if (!RegionNotEmpty(&region))
return;
- if (!PM_IS_SOLID(drawable, gc->planemask))
- goto fallback;
-
switch (format) {
case ZPixmap:
+ if (!PM_IS_SOLID(drawable, gc->planemask))
+ goto fallback;
+
if (sna_put_zpixmap_blt(drawable, gc, &region,
x, y, w, h,
bits, PixmapBytePad(w, depth)))
@@ -1503,12 +1652,22 @@ sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
break;
case XYBitmap:
+ if (!PM_IS_SOLID(drawable, gc->planemask))
+ goto fallback;
+
if (sna_put_xybitmap_blt(drawable, gc, &region,
x, y, w, h,
bits))
return;
break;
+ case XYPixmap:
+ if (sna_put_xypixmap_blt(drawable, gc, &region,
+ x, y, w, h, left,
+ bits))
+ return;
+ break;
+
default:
break;
}
@@ -2032,28 +2191,6 @@ box_intersect(BoxPtr a, const BoxRec *b)
return a->x1 < a->x2 && a->y1 < a->y2;
}
-inline static bool
-sna_fill_init_blt(struct sna_fill_op *fill,
- struct sna *sna,
- PixmapPtr pixmap,
- struct kgem_bo *bo,
- uint8_t alu,
- uint32_t pixel)
-{
- return sna->render.fill(sna, alu, pixmap, bo, pixel, fill);
-}
-
-static Bool
-sna_copy_init_blt(struct sna_copy_op *copy,
- struct sna *sna,
- PixmapPtr src, struct kgem_bo *src_bo,
- PixmapPtr dst, struct kgem_bo *dst_bo,
- uint8_t alu)
-{
- memset(copy, 0, sizeof(*copy));
- return sna->render.copy(sna, alu, src, src_bo, dst, dst_bo, copy);
-}
-
static const BoxRec *
find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y)
{
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index ae24e625..12dd2a04 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -330,7 +330,7 @@ static void sna_blt_copy_one(struct sna *sna,
/* Compare against a previous fill */
if (kgem->nbatch >= 6 &&
blt->overwrites &&
- kgem->batch[kgem->nbatch-6] == ((blt->cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT_CMD) &&
+ kgem->batch[kgem->nbatch-6] == ((blt->cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT) &&
kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width)) &&
kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->handle) {
@@ -1515,7 +1515,7 @@ static Bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
assert(box->x1 >= 0);
assert(box->y1 >= 0);
- cmd = XY_COLOR_BLT_CMD;
+ cmd = XY_COLOR_BLT;
if (bpp == 32)
cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
@@ -1776,7 +1776,7 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
if (kgem->nbatch >= 6 &&
(alu == GXcopy || alu == GXclear) &&
kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->handle &&
- kgem->batch[kgem->nbatch-6] == ((cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT_CMD) &&
+ kgem->batch[kgem->nbatch-6] == ((cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT) &&
kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
DBG(("%s: deleting last fill\n", __FUNCTION__));
diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h
index f1fbd8b7..ff2ff3b7 100644
--- a/src/sna/sna_reg.h
+++ b/src/sna/sna_reg.h
@@ -43,7 +43,7 @@
#define BLT_DST_TILED (1<<11)
#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3))
-#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|(0x4))
+#define XY_COLOR_BLT ((2<<29)|(0x50<<22)|(0x4))
#define XY_SETUP_BLT ((2<<29)|(1<<22)|6)
#define XY_SETUP_MONO_PATTERN_SL_BLT ((2<<29)|(0x11<<22)|7)
#define XY_SETUP_CLIP ((2<<29)|(3<<22)|1)
@@ -55,6 +55,7 @@
#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7)
#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6))
#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22))
+#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa)
/* FLUSH commands */
#define BRW_3D(Pipeline,Opcode,Subopcode) \