summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-01-06 15:26:11 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2012-01-06 17:50:01 +0000
commit9f1935bb4e894264053d94e53c99d5ad607700fb (patch)
tree689fdc95dda28ba227cf26d9ee2347a303962bb8
parent141001df6c9c3485c500ed531a214c09b46c1d3b (diff)
sna: Support performing alpha-fixup on the source
By inlining the swizzling of the alpha-channel we can support BLT copies from an alpha-less pixmap to an alpha-destination. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/blt.c105
-rw-r--r--src/sna/sna.h13
-rw-r--r--src/sna/sna_blt.c356
-rw-r--r--src/sna/sna_io.c241
-rw-r--r--src/sna/sna_reg.h1
-rw-r--r--test/.gitignore1
-rw-r--r--test/Makefile.am1
-rw-r--r--test/render-copy-alphaless.c285
8 files changed, 969 insertions, 34 deletions
diff --git a/src/sna/blt.c b/src/sna/blt.c
index 7a77fa49..d28ad985 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -106,3 +106,108 @@ memcpy_blt(const void *src, void *dst, int bpp,
break;
}
}
+
+void
+memcpy_xor(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height,
+ uint32_t and, uint32_t or)
+{
+ uint8_t *src_bytes;
+ uint8_t *dst_bytes;
+ int i;
+
+ assert(width && height);
+ assert(bpp >= 8);
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d, bpp=%d, and=%x, xor=%x\n",
+ __FUNCTION__,
+ src_x, src_y, dst_x, dst_y,
+ width, height,
+ src_stride, dst_stride,
+ bpp, and, or));
+
+ bpp /= 8;
+ src_bytes = (uint8_t *)src + src_stride * src_y + src_x * bpp;
+ dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * bpp;
+
+ if (and == 0xffffffff) {
+ switch (bpp) {
+ case 1:
+ do {
+ for (i = 0; i < width; i++)
+ dst_bytes[i] = src_bytes[i] | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 2:
+ do {
+ uint16_t *d = (uint16_t *)dst_bytes;
+ uint16_t *s = (uint16_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = s[i] | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 4:
+ do {
+ uint32_t *d = (uint32_t *)dst_bytes;
+ uint32_t *s = (uint32_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = s[i] | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+ }
+ } else {
+ switch (bpp) {
+ case 1:
+ do {
+ for (i = 0; i < width; i++)
+ dst_bytes[i] = (src_bytes[i] & and) | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 2:
+ do {
+ uint16_t *d = (uint16_t *)dst_bytes;
+ uint16_t *s = (uint16_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = (s[i] & and) | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 4:
+ do {
+ uint32_t *d = (uint32_t *)dst_bytes;
+ uint32_t *s = (uint32_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = (s[i] & and) | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+ }
+ }
+}
diff --git a/src/sna/sna.h b/src/sna/sna.h
index f16324e0..de4de5c8 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -685,6 +685,11 @@ void sna_write_boxes(struct sna *sna, PixmapPtr dst,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const void *src, int stride, int16_t src_dx, int16_t src_dy,
const BoxRec *box, int n);
+void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
+ struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const void *src, int stride, int16_t src_dx, int16_t src_dy,
+ const BoxRec *box, int nbox,
+ uint32_t and, uint32_t or);
struct kgem_bo *sna_replace(struct sna *sna,
PixmapPtr pixmap,
@@ -713,6 +718,14 @@ memcpy_blt(const void *src, void *dst, int bpp,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
+void
+memcpy_xor(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height,
+ uint32_t and, uint32_t or);
+
#define SNA_CREATE_FB 0x10
#define SNA_CREATE_SCRATCH 0x11
#define SNA_CREATE_GLYPH 0x12
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 5879e973..07771a90 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -307,6 +307,104 @@ static Bool sna_blt_copy_init(struct sna *sna,
return TRUE;
}
+static Bool sna_blt_alpha_fixup_init(struct sna *sna,
+ struct sna_blt_state *blt,
+ struct kgem_bo *src,
+ struct kgem_bo *dst,
+ int bpp, uint32_t alpha)
+{
+ struct kgem *kgem = &sna->kgem;
+
+ blt->bo[0] = src;
+ blt->bo[1] = dst;
+
+ blt->cmd = XY_FULL_MONO_PATTERN_BLT;
+ blt->pitch[0] = src->pitch;
+ if (kgem->gen >= 40 && src->tiling) {
+ blt->cmd |= BLT_SRC_TILED;
+ blt->pitch[0] >>= 2;
+ }
+ assert(blt->pitch[0] < MAXSHORT);
+
+ blt->pitch[1] = dst->pitch;
+ if (kgem->gen >= 40 && dst->tiling) {
+ blt->cmd |= BLT_DST_TILED;
+ blt->pitch[1] >>= 2;
+ }
+ assert(blt->pitch[1] < MAXSHORT);
+
+ blt->overwrites = 1;
+ blt->br13 = (0xfc << 16) | blt->pitch[1];
+ switch (bpp) {
+ default: assert(0);
+ case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ blt->br13 |= 1 << 25; /* RGB8888 */
+ case 16: blt->br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+ blt->pixel = alpha;
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (!kgem_check_bo_fenced(kgem, src, dst, NULL)) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ sna->blt_state.fill_bo = 0;
+ return TRUE;
+}
+
+static void sna_blt_alpha_fixup_one(struct sna *sna,
+ const struct sna_blt_state *blt,
+ int src_x, int src_y,
+ int width, int height,
+ int dst_x, int dst_y)
+{
+ struct kgem *kgem = &sna->kgem;
+ uint32_t *b;
+
+ DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
+ __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
+
+ assert(src_x >= 0);
+ assert(src_y >= 0);
+ assert((src_y + height) * blt->bo[0]->pitch <= blt->bo[0]->size);
+ assert(dst_x >= 0);
+ assert(dst_y >= 0);
+ assert((dst_y + height) * blt->bo[1]->pitch <= blt->bo[1]->size);
+ assert(width > 0);
+ assert(height > 0);
+
+ if (!kgem_check_batch(kgem, 12) || !kgem_check_reloc(kgem, 2)) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ b = kgem->batch + kgem->nbatch;
+ b[0] = blt->cmd;
+ b[1] = blt->br13;
+ b[2] = (dst_y << 16) | dst_x;
+ b[3] = ((dst_y + height) << 16) | (dst_x + width);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
+ blt->bo[1],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = blt->pitch[0];
+ b[6] = (src_y << 16) | src_x;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7,
+ blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = blt->pixel;
+ b[9] = blt->pixel;
+ b[10] = 0;
+ b[11] = 0;
+ kgem->nbatch += 12;
+}
+
static void sna_blt_copy_one(struct sna *sna,
const struct sna_blt_state *blt,
int src_x, int src_y,
@@ -930,9 +1028,90 @@ static void blt_composite_copy_boxes(struct sna *sna,
} while(--nbox);
}
+fastcall static void
+blt_composite_copy_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int x1, x2, y1, y2;
+ int src_x, src_y;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ r->src.x, r->src.y,
+ r->dst.x, r->dst.y,
+ r->width, r->height));
+
+ /* XXX higher layer should have clipped? */
+
+ x1 = r->dst.x + op->dst.x;
+ y1 = r->dst.y + op->dst.y;
+ x2 = x1 + r->width;
+ y2 = y1 + r->height;
+
+ src_x = r->src.x - x1;
+ src_y = r->src.y - y1;
+
+ /* clip against dst */
+ if (x1 < 0)
+ x1 = 0;
+ if (y1 < 0)
+ y1 = 0;
+
+ if (x2 > op->dst.width)
+ x2 = op->dst.width;
+
+ if (y2 > op->dst.height)
+ y2 = op->dst.height;
+
+ DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
+
+ if (x2 <= x1 || y2 <= y1)
+ return;
+
+ sna_blt_alpha_fixup_one(sna, &op->u.blt,
+ x1 + src_x, y1 + src_y,
+ x2 - x1, y2 - y1,
+ x1, y1);
+}
+
+fastcall static void
+blt_composite_copy_box_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box)
+{
+ DBG(("%s: box (%d, %d), (%d, %d)\n",
+ __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+ sna_blt_alpha_fixup_one(sna, &op->u.blt,
+ box->x1 + op->u.blt.sx,
+ box->y1 + op->u.blt.sy,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ box->x1 + op->dst.x,
+ box->y1 + op->dst.y);
+}
+
+static void
+blt_composite_copy_boxes_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+ do {
+ DBG(("%s: box (%d, %d), (%d, %d)\n",
+ __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+ sna_blt_alpha_fixup_one(sna, &op->u.blt,
+ box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ box->x1 + op->dst.x, box->y1 + op->dst.y);
+ box++;
+ } while(--nbox);
+}
+
static Bool
prepare_blt_copy(struct sna *sna,
- struct sna_composite_op *op)
+ struct sna_composite_op *op,
+ uint32_t alpha_fixup)
{
PixmapPtr src = op->u.blt.src_pixmap;
struct sna_pixmap *priv = sna_pixmap(src);
@@ -947,19 +1126,32 @@ prepare_blt_copy(struct sna *sna,
DBG(("%s\n", __FUNCTION__));
- op->blt = blt_composite_copy;
- op->box = blt_composite_copy_box;
- op->boxes = blt_composite_copy_boxes;
if (sna->kgem.gen >= 60)
op->done = gen6_blt_copy_done;
else
op->done = blt_done;
- return sna_blt_copy_init(sna, &op->u.blt,
- priv->gpu_bo,
- op->dst.bo,
- src->drawable.bitsPerPixel,
- GXcopy);
+ if (alpha_fixup) {
+ op->blt = blt_composite_copy_with_alpha;
+ op->box = blt_composite_copy_box_with_alpha;
+ op->boxes = blt_composite_copy_boxes_with_alpha;
+
+ return sna_blt_alpha_fixup_init(sna, &op->u.blt,
+ priv->gpu_bo,
+ op->dst.bo,
+ src->drawable.bitsPerPixel,
+ alpha_fixup);
+ } else {
+ op->blt = blt_composite_copy;
+ op->box = blt_composite_copy_box;
+ op->boxes = blt_composite_copy_boxes;
+
+ return sna_blt_copy_init(sna, &op->u.blt,
+ priv->gpu_bo,
+ op->dst.bo,
+ src->drawable.bitsPerPixel,
+ GXcopy);
+ }
}
static void blt_vmap_done(struct sna *sna, const struct sna_composite_op *op)
@@ -1082,9 +1274,80 @@ static void blt_put_composite_boxes(struct sna *sna,
}
}
+fastcall static void
+blt_put_composite_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ PixmapPtr dst = op->dst.pixmap;
+ PixmapPtr src = op->u.blt.src_pixmap;
+ struct sna_pixmap *dst_priv = sna_pixmap(dst);
+ int pitch = src->devKind;
+ char *data = src->devPrivate.ptr;
+
+ int16_t dst_x = r->dst.x + op->dst.x;
+ int16_t dst_y = r->dst.y + op->dst.y;
+ int16_t src_x = r->src.x + op->u.blt.sx;
+ int16_t src_y = r->src.y + op->u.blt.sy;
+ BoxRec box;
+
+ box.x1 = dst_x;
+ box.y1 = dst_y;
+ box.x2 = dst_x + r->width;
+ box.y2 = dst_y + r->height;
+
+ sna_write_boxes__xor(sna, dst,
+ dst_priv->gpu_bo, 0, 0,
+ data, pitch, src_x, src_y,
+ &box, 1,
+ 0xffffffff, op->u.blt.pixel);
+}
+
+fastcall static void
+blt_put_composite_box_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box)
+{
+ PixmapPtr src = op->u.blt.src_pixmap;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
+ op->u.blt.sx, op->u.blt.sy,
+ op->dst.x, op->dst.y));
+
+ sna_write_boxes__xor(sna, op->dst.pixmap,
+ op->dst.bo, op->dst.x, op->dst.y,
+ src->devPrivate.ptr,
+ src->devKind,
+ op->u.blt.sx, op->u.blt.sy,
+ box, 1,
+ 0xffffffff, op->u.blt.pixel);
+}
+
+static void
+blt_put_composite_boxes_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int n)
+{
+ PixmapPtr src = op->u.blt.src_pixmap;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
+ op->u.blt.sx, op->u.blt.sy,
+ op->dst.x, op->dst.y,
+ box->x1, box->y1, box->x2, box->y2, n));
+
+ sna_write_boxes__xor(sna, op->dst.pixmap,
+ op->dst.bo, op->dst.x, op->dst.y,
+ src->devPrivate.ptr,
+ src->devKind,
+ op->u.blt.sx, op->u.blt.sy,
+ box, n,
+ 0xffffffff, op->u.blt.pixel);
+}
+
static Bool
prepare_blt_put(struct sna *sna,
- struct sna_composite_op *op)
+ struct sna_composite_op *op,
+ uint32_t alpha_fixup)
{
PixmapPtr src = op->u.blt.src_pixmap;
struct sna_pixmap *priv = sna_pixmap(src);
@@ -1105,26 +1368,43 @@ prepare_blt_put(struct sna *sna,
free_bo = src_bo;
}
if (src_bo) {
- op->blt = blt_composite_copy;
- op->box = blt_composite_copy_box;
- op->boxes = blt_composite_copy_boxes;
-
op->u.blt.src_pixmap = (void *)free_bo;
op->done = blt_vmap_done;
src_bo->pitch = src->devKind;
- if (!sna_blt_copy_init(sna, &op->u.blt,
- src_bo, op->dst.bo,
- op->dst.pixmap->drawable.bitsPerPixel,
- GXcopy))
- return FALSE;
+ if (alpha_fixup) {
+ op->blt = blt_composite_copy_with_alpha;
+ op->box = blt_composite_copy_box_with_alpha;
+ op->boxes = blt_composite_copy_boxes_with_alpha;
+
+ return sna_blt_alpha_fixup_init(sna, &op->u.blt,
+ src_bo, op->dst.bo,
+ op->dst.pixmap->drawable.bitsPerPixel,
+ alpha_fixup);
+ } else {
+ op->blt = blt_composite_copy;
+ op->box = blt_composite_copy_box;
+ op->boxes = blt_composite_copy_boxes;
+
+ return sna_blt_copy_init(sna, &op->u.blt,
+ src_bo, op->dst.bo,
+ op->dst.pixmap->drawable.bitsPerPixel,
+ GXcopy);
+ }
} else {
if (!sna_pixmap_move_to_cpu(src, MOVE_READ))
return FALSE;
- op->blt = blt_put_composite;
- op->box = blt_put_composite_box;
- op->boxes = blt_put_composite_boxes;
+ if (alpha_fixup) {
+ op->u.blt.pixel = alpha_fixup;
+ op->blt = blt_put_composite_with_alpha;
+ op->box = blt_put_composite_box_with_alpha;
+ op->boxes = blt_put_composite_boxes_with_alpha;
+ } else {
+ op->blt = blt_put_composite;
+ op->box = blt_put_composite_box;
+ op->boxes = blt_put_composite_boxes;
+ }
op->done = nop_done;
}
@@ -1209,6 +1489,13 @@ reduce_damage(struct sna_composite_op *op,
op->damage = NULL;
}
+#define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format), \
+ PICT_FORMAT_TYPE(format), \
+ 0, \
+ PICT_FORMAT_R(format), \
+ PICT_FORMAT_G(format), \
+ PICT_FORMAT_B(format))
+
Bool
sna_blt_composite(struct sna *sna,
uint32_t op,
@@ -1223,6 +1510,7 @@ sna_blt_composite(struct sna *sna,
PictFormat src_format = src->format;
struct sna_pixmap *priv;
int16_t tx, ty;
+ uint32_t alpha_fixup;
Bool ret;
#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
@@ -1309,13 +1597,13 @@ sna_blt_composite(struct sna *sna,
return FALSE;
}
+ alpha_fixup = 0;
if (!(dst->format == src_format ||
- dst->format == PICT_FORMAT(PICT_FORMAT_BPP(src_format),
- PICT_FORMAT_TYPE(src_format),
- 0,
- PICT_FORMAT_R(src_format),
- PICT_FORMAT_G(src_format),
- PICT_FORMAT_B(src_format)))) {
+ dst->format == alphaless(src_format) ||
+ (alphaless(dst->format) == alphaless(src_format) &&
+ sna_get_pixel_from_rgba(&alpha_fixup,
+ 0, 0, 0, 0xffff,
+ dst->format)))) {
DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
__FUNCTION__, (unsigned)src_format, dst->format));
return FALSE;
@@ -1349,18 +1637,18 @@ sna_blt_composite(struct sna *sna,
tmp->u.blt.sx = x - dst_x;
tmp->u.blt.sy = y - dst_y;
- DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d)\n",
+ DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
__FUNCTION__,
- tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy));
+ tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
if (has_gpu_area(blt->src_pixmap, x, y, width, height))
- ret = prepare_blt_copy(sna, tmp);
+ ret = prepare_blt_copy(sna, tmp, alpha_fixup);
else if (has_cpu_area(blt->src_pixmap, x, y, width, height))
- ret = prepare_blt_put(sna, tmp);
+ ret = prepare_blt_put(sna, tmp, alpha_fixup);
else if (sna_pixmap_move_to_gpu(blt->src_pixmap, MOVE_READ))
- ret = prepare_blt_copy(sna, tmp);
+ ret = prepare_blt_copy(sna, tmp, alpha_fixup);
else
- ret = prepare_blt_put(sna, tmp);
+ ret = prepare_blt_put(sna, tmp, alpha_fixup);
return ret;
}
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index c5e66f14..aef3f509 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -504,6 +504,247 @@ fallback:
sna->blt_state.fill_bo = 0;
}
+static void
+write_boxes_inplace__xor(struct kgem *kgem,
+ const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
+ struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n,
+ uint32_t and, uint32_t or)
+{
+ int dst_pitch = bo->pitch;
+ int src_pitch = stride;
+ void *dst;
+
+ DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
+
+ kgem_bo_submit(kgem, bo);
+
+ dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE);
+ if (dst == NULL)
+ return;
+
+ do {
+ DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ bpp, src_pitch, dst_pitch));
+
+ memcpy_xor(src, dst, bpp,
+ src_pitch, dst_pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ and, or);
+ box++;
+ } while (--n);
+}
+
+void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
+ struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const void *src, int stride, int16_t src_dx, int16_t src_dy,
+ const BoxRec *box, int nbox,
+ uint32_t and, uint32_t or)
+{
+ struct kgem *kgem = &sna->kgem;
+ struct kgem_bo *src_bo;
+ void *ptr;
+ int offset;
+ int n, cmd, br13;
+
+ DBG(("%s x %d\n", __FUNCTION__, nbox));
+
+ if (DEBUG_NO_IO || kgem->wedged ||
+ !kgem_bo_map_will_stall(kgem, dst_bo)) {
+fallback:
+ write_boxes_inplace__xor(kgem,
+ src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ box, nbox,
+ and, or);
+ return;
+ }
+
+ /* Try to avoid switching rings... */
+ if (dst_bo->tiling == I915_TILING_Y || kgem->ring == KGEM_RENDER) {
+ PixmapRec tmp;
+ BoxRec extents;
+
+ /* XXX Composite? Not that we should ever reach here! */
+
+ extents = box[0];
+ for (n = 1; n < nbox; n++) {
+ if (box[n].x1 < extents.x1)
+ extents.x1 = box[n].x1;
+ if (box[n].x2 > extents.x2)
+ extents.x2 = box[n].x2;
+
+ if (box[n].y1 < extents.y1)
+ extents.y1 = box[n].y1;
+ if (box[n].y2 > extents.y2)
+ extents.y2 = box[n].y2;
+ }
+
+ tmp.drawable.width = extents.x2 - extents.x1;
+ tmp.drawable.height = extents.y2 - extents.y1;
+ tmp.drawable.depth = dst->drawable.depth;
+ tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel;
+ tmp.devPrivate.ptr = NULL;
+
+ assert(tmp.drawable.width);
+ assert(tmp.drawable.height);
+
+ tmp.devKind = tmp.drawable.width * tmp.drawable.bitsPerPixel / 8;
+ tmp.devKind = ALIGN(tmp.devKind, 4);
+
+ src_bo = kgem_create_buffer(kgem,
+ tmp.drawable.height * tmp.devKind,
+ KGEM_BUFFER_WRITE,
+ &ptr);
+ if (!src_bo)
+ goto fallback;
+
+ src_bo->pitch = tmp.devKind;
+
+ for (n = 0; n < nbox; n++) {
+ memcpy_xor(src, ptr, tmp.drawable.bitsPerPixel,
+ stride, tmp.devKind,
+ box[n].x1 + src_dx,
+ box[n].y1 + src_dy,
+ box[n].x1 - extents.x1,
+ box[n].y1 - extents.y1,
+ box[n].x2 - box[n].x1,
+ box[n].y2 - box[n].y1,
+ and, or);
+ }
+
+ n = sna->render.copy_boxes(sna, GXcopy,
+ &tmp, src_bo, -extents.x1, -extents.y1,
+ dst, dst_bo, dst_dx, dst_dy,
+ box, nbox);
+
+ kgem_bo_destroy(&sna->kgem, src_bo);
+
+ if (!n)
+ goto fallback;
+
+ return;
+ }
+
+ cmd = XY_SRC_COPY_BLT_CMD;
+ br13 = dst_bo->pitch;
+ if (kgem->gen >= 40 && dst_bo->tiling) {
+ cmd |= BLT_DST_TILED;
+ br13 >>= 2;
+ }
+ br13 |= 0xcc << 16;
+ switch (dst->drawable.bitsPerPixel) {
+ default:
+ case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ br13 |= 1 << 25; /* RGB8888 */
+ case 16: br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) ||
+ kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) ||
+ !kgem_check_batch(kgem, 8) ||
+ !kgem_check_bo_fenced(kgem, dst_bo, NULL)) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ /* Count the total number of bytes to be read and allocate a
+ * single buffer large enough. Or if it is very small, combine
+ * with other allocations. */
+ offset = 0;
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = box[n].y2 - box[n].y1;
+ int width = box[n].x2 - box[n].x1;
+ offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+ }
+
+ src_bo = kgem_create_buffer(kgem, offset,
+ KGEM_BUFFER_WRITE | (nbox ? KGEM_BUFFER_LAST : 0),
+ &ptr);
+ if (!src_bo)
+ break;
+
+ offset = 0;
+ do {
+ int height = box->y2 - box->y1;
+ int width = box->x2 - box->x1;
+ int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
+ uint32_t *b;
+
+ DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
+ __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ width, height,
+ offset, pitch));
+
+ assert(box->x1 + src_dx >= 0);
+ assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ memcpy_xor(src, (char *)ptr + offset,
+ dst->drawable.bitsPerPixel,
+ stride, pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ 0, 0,
+ width, height,
+ and, or);
+
+ b = kgem->batch + kgem->nbatch;
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
+ b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = 0;
+ b[6] = pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ offset);
+ kgem->nbatch += 8;
+
+ box++;
+ offset += pitch * height;
+ } while (--nbox_this_time);
+ assert(offset == src_bo->size);
+
+ if (nbox) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ kgem_bo_destroy(kgem, src_bo);
+ } while (nbox);
+
+ sna->blt_state.fill_bo = 0;
+}
+
struct kgem_bo *sna_replace(struct sna *sna,
PixmapPtr pixmap,
struct kgem_bo *bo,
diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h
index ff2ff3b7..551d64b0 100644
--- a/src/sna/sna_reg.h
+++ b/src/sna/sna_reg.h
@@ -55,6 +55,7 @@
#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7)
#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6))
#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22))
+#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa)
#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa)
/* FLUSH commands */
diff --git a/test/.gitignore b/test/.gitignore
index 4bfc70db..e24e3fd0 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -11,4 +11,5 @@ render-fill-copy
render-composite-solid
render-copyarea
render-copyarea-size
+render-copy-alphaless
mixed-stress
diff --git a/test/Makefile.am b/test/Makefile.am
index dc35f9f4..a14396e5 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -12,6 +12,7 @@ stress_TESTS = \
render-composite-solid \
render-copyarea \
render-copyarea-size \
+ render-copy-alphaless \
mixed-stress \
$(NULL)
diff --git a/test/render-copy-alphaless.c b/test/render-copy-alphaless.c
new file mode 100644
index 00000000..b9687049
--- /dev/null
+++ b/test/render-copy-alphaless.c
@@ -0,0 +1,285 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <X11/Xutil.h> /* for XDestroyImage */
+#include <pixman.h> /* for pixman blt functions */
+
+#include "test.h"
+
+static void
+show_cells(char *buf,
+ const uint32_t *real, const uint32_t *ref,
+ int x, int y, int w, int h)
+{
+ int i, j, len = 0;
+
+ for (j = y - 2; j <= y + 2; j++) {
+ if (j < 0 || j >= h)
+ continue;
+
+ for (i = x - 2; i <= x + 2; i++) {
+ if (i < 0 || i >= w)
+ continue;
+
+ len += sprintf(buf+len, "%08x ", real[j*w+i]);
+ }
+
+ len += sprintf(buf+len, "\t");
+
+ for (i = x - 2; i <= x + 2; i++) {
+ if (i < 0 || i >= w)
+ continue;
+
+ len += sprintf(buf+len, "%08x ", ref[j*w+i]);
+ }
+
+ len += sprintf(buf+len, "\n");
+ }
+}
+
+static void fill_rect(struct test_display *t, Picture p,
+ int x, int y, int w, int h,
+ uint8_t red, uint8_t green, uint8_t blue)
+{
+ Drawable tmp;
+ XRenderColor c;
+ Picture src;
+ XRenderPictFormat *format;
+
+ format = XRenderFindStandardFormat(t->dpy, PictStandardRGB24);
+
+ tmp = XCreatePixmap(t->dpy, DefaultRootWindow(t->dpy),
+ w, h, format->depth);
+
+ src = XRenderCreatePicture(t->dpy, tmp, format, 0, NULL);
+ c.red = (int)red << 8 | red;
+ c.green = (int)green << 8 | green;
+ c.blue = (int)blue << 8 | blue;
+ c.alpha = 0xffff;
+ XRenderFillRectangle(t->dpy, PictOpSrc, src, &c, 0, 0, w, h);
+ XRenderComposite(t->dpy, PictOpOver, src, 0, p, 0, 0, 0, 0, x, y, w, h);
+
+ XRenderFreePicture(t->dpy, src);
+ XFreePixmap(t->dpy, tmp);
+}
+
+static void pixel_tests(struct test *t, int reps, int sets, enum target target)
+{
+ struct test_target tt;
+ XImage image;
+ uint32_t *cells = malloc(t->real.width*t->real.height*4);
+ struct {
+ uint16_t x, y;
+ } *pixels = malloc(reps*sizeof(*pixels));
+ int r, s;
+
+ test_target_create_render(&t->real, target, &tt);
+
+ printf("Testing setting of single pixels (%s): ",
+ test_target_name(target));
+ fflush(stdout);
+
+ for (s = 0; s < sets; s++) {
+ for (r = 0; r < reps; r++) {
+ int x = rand() % (tt.width - 1);
+ int y = rand() % (tt.height - 1);
+ uint8_t red = rand();
+ uint8_t green = rand();
+ uint8_t blue = rand();
+
+ fill_rect(&t->real, tt.picture,
+ x, y, 1, 1,
+ red, green, blue);
+
+ pixels[r].x = x;
+ pixels[r].y = y;
+ cells[y*tt.width+x] = color(red, green, blue, 0xff);
+ }
+
+ test_init_image(&image, &t->real.shm, tt.format, 1, 1);
+
+ for (r = 0; r < reps; r++) {
+ uint32_t x = pixels[r].x;
+ uint32_t y = pixels[r].y;
+ uint32_t result;
+
+ XShmGetImage(t->real.dpy, tt.draw, &image,
+ x, y, AllPlanes);
+
+ result = *(uint32_t *)image.data;
+ if (!pixel_equal(image.depth, result,
+ cells[y*tt.width+x])) {
+ uint32_t mask = depth_mask(image.depth);
+
+ die("failed to set pixel (%d,%d) to %08x [%08x], found %08x [%08x] instead\n",
+ x, y,
+ cells[y*tt.width+x] & mask,
+ cells[y*tt.width+x],
+ result & mask,
+ result);
+ }
+ }
+ }
+ printf("passed [%d iterations x %d]\n", reps, sets);
+
+ test_target_destroy_render(&t->real, &tt);
+ free(pixels);
+ free(cells);
+}
+
+static void clear(struct test_display *dpy, struct test_target *tt)
+{
+ XRenderColor render_color = {0};
+ XRenderFillRectangle(dpy->dpy, PictOpClear, tt->picture, &render_color,
+ 0, 0, tt->width, tt->height);
+}
+
+static void area_tests(struct test *t, int reps, int sets, enum target target)
+{
+ struct test_target tt;
+ XImage image;
+ uint32_t *cells = calloc(sizeof(uint32_t), t->real.width*t->real.height);
+ int r, s, x, y;
+
+ printf("Testing area sets (%s): ", test_target_name(target));
+ fflush(stdout);
+
+ test_target_create_render(&t->real, target, &tt);
+ clear(&t->real, &tt);
+
+ test_init_image(&image, &t->real.shm, tt.format, tt.width, tt.height);
+
+ for (s = 0; s < sets; s++) {
+ for (r = 0; r < reps; r++) {
+ int w = 1 + rand() % (tt.width - 1);
+ int h = 1 + rand() % (tt.height - 1);
+ uint8_t red = rand();
+ uint8_t green = rand();
+ uint8_t blue = rand();
+
+ x = rand() % (2*tt.width) - tt.width;
+ y = rand() % (2*tt.height) - tt.height;
+
+ fill_rect(&t->real, tt.picture,
+ x, y, w, h,
+ red, green, blue);
+
+ if (x < 0)
+ w += x, x = 0;
+ if (y < 0)
+ h += y, y = 0;
+ if (x >= tt.width || y >= tt.height)
+ continue;
+
+ if (x + w > tt.width)
+ w = tt.width - x;
+ if (y + h > tt.height)
+ h = tt.height - y;
+ if (w <= 0 || h <= 0)
+ continue;
+
+ pixman_fill(cells, tt.width, 32, x, y, w, h,
+ color(red, green, blue, 0xff));
+ }
+
+ XShmGetImage(t->real.dpy, tt.draw, &image, 0, 0, AllPlanes);
+
+ for (y = 0; y < tt.height; y++) {
+ for (x = 0; x < tt.width; x++) {
+ uint32_t result = *(uint32_t *)
+ (image.data +
+ y*image.bytes_per_line +
+ x*image.bits_per_pixel/8);
+ if (!pixel_equal(image.depth, result, cells[y*tt.width+x])) {
+ char buf[600];
+ uint32_t mask = depth_mask(image.depth);
+ show_cells(buf,
+ (uint32_t*)image.data, cells,
+ x, y, tt.width, tt.height);
+
+ die("failed to set pixel (%d,%d) to %08x [%08x], found %08x [%08x] instead (set %d, reps %d)\n%s",
+ x, y,
+ cells[y*tt.width+x] & mask,
+ cells[y*tt.width+x],
+ result & mask,
+ result, s, reps, buf);
+ }
+ }
+ }
+ }
+
+ printf("passed [%d iterations x %d]\n", reps, sets);
+
+ test_target_destroy_render(&t->real, &tt);
+ free(cells);
+}
+
+static void rect_tests(struct test *t, int reps, int sets, enum target target, int use_window)
+{
+ struct test_target real, ref;
+ int r, s;
+ printf("Testing area fills (%s, using %s source): ",
+ test_target_name(target), use_window ? "window" : "pixmap");
+ fflush(stdout);
+
+ test_target_create_render(&t->real, target, &real);
+ clear(&t->real, &real);
+
+ test_target_create_render(&t->ref, target, &ref);
+ clear(&t->ref, &ref);
+
+ for (s = 0; s < sets; s++) {
+ for (r = 0; r < reps; r++) {
+ int x, y, w, h;
+ uint8_t red = rand();
+ uint8_t green = rand();
+ uint8_t blue = rand();
+
+ x = rand() % (real.width - 1);
+ y = rand() % (real.height - 1);
+ w = 1 + rand() % (real.width - x - 1);
+ h = 1 + rand() % (real.height - y - 1);
+
+ fill_rect(&t->real, real.picture,
+ x, y, w, h,
+ red, green, blue);
+ fill_rect(&t->ref, ref.picture,
+ x, y, w, h,
+ red, green, blue);
+ }
+
+ test_compare(t,
+ real.draw, real.format,
+ ref.draw, ref.format,
+ 0, 0, real.width, real.height,
+ "");
+ }
+
+ printf("passed [%d iterations x %d]\n", reps, sets);
+
+ test_target_destroy_render(&t->real, &real);
+ test_target_destroy_render(&t->ref, &ref);
+}
+
+int main(int argc, char **argv)
+{
+ struct test test;
+ int i;
+
+ test_init(&test, argc, argv);
+
+ for (i = 0; i <= DEFAULT_ITERATIONS; i++) {
+ int reps = 1 << i;
+ int sets = 1 << (12 - i);
+
+ if (sets < 2)
+ sets = 2;
+
+ pixel_tests(&test, reps, sets, PIXMAP);
+ area_tests(&test, reps, sets, PIXMAP);
+ rect_tests(&test, reps, sets, PIXMAP, 0);
+ }
+
+ return 0;
+}