summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-03-09 20:02:44 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2012-03-09 21:31:52 +0000
commit552e4fbd2c25eb5ab0ae77e11f5f8ba2fdb29daa (patch)
treec15f48ad0c565a50ef7ef974d7564934a0a718a8
parent494edfaaacaae13adfa5e727c66a83cb2294d330 (diff)
sna/traps: Add a fast path for narrow masks
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/kgem.c6
-rw-r--r--src/sna/kgem.h1
-rw-r--r--src/sna/sna_accel.c21
-rw-r--r--src/sna/sna_trapezoids.c390
4 files changed, 381 insertions, 37 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 5773d66d..db579d09 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3785,6 +3785,12 @@ done:
return kgem_create_proxy(&bo->base, offset, size);
}
+bool kgem_buffer_is_inplace(struct kgem_bo *_bo)
+{
+ struct kgem_partial_bo *bo = (struct kgem_partial_bo *)_bo->proxy;
+ return bo->write & KGEM_BUFFER_WRITE_INPLACE;
+}
+
struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
int width, int height, int bpp,
uint32_t flags,
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 6c31f335..dff8bb28 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -503,6 +503,7 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
int width, int height, int bpp,
uint32_t flags,
void **ret);
+bool kgem_buffer_is_inplace(struct kgem_bo *bo);
void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo);
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 3429438d..36191012 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2007,17 +2007,10 @@ sna_pixmap_create_upload(ScreenPtr screen,
pixmap = sna->freed_pixmap;
sna->freed_pixmap = NULL;
- pixmap->usage_hint = CREATE_PIXMAP_USAGE_SCRATCH;
pixmap->drawable.serialNumber = NEXT_SERIAL_NUMBER;
pixmap->refcnt = 1;
-
- DBG(("%s: serial=%ld, usage=%d\n",
- __FUNCTION__,
- pixmap->drawable.serialNumber,
- pixmap->usage_hint));
} else {
- pixmap = create_pixmap(sna, screen, 0, 0, depth,
- CREATE_PIXMAP_USAGE_SCRATCH);
+ pixmap = create_pixmap(sna, screen, 0, 0, depth, 0);
if (!pixmap)
return NullPixmap;
@@ -2035,8 +2028,7 @@ sna_pixmap_create_upload(ScreenPtr screen,
priv->gpu_bo = kgem_create_buffer_2d(&sna->kgem,
width, height, bpp,
- flags,
- &ptr);
+ flags, &ptr);
if (!priv->gpu_bo) {
free(priv);
fbDestroyPixmap(pixmap);
@@ -2058,6 +2050,15 @@ sna_pixmap_create_upload(ScreenPtr screen,
pixmap->devKind = priv->gpu_bo->pitch;
pixmap->devPrivate.ptr = ptr;
+ pixmap->usage_hint = 0;
+ if (!kgem_buffer_is_inplace(priv->gpu_bo))
+ pixmap->usage_hint = 1;
+
+ DBG(("%s: serial=%ld, usage=%d\n",
+ __FUNCTION__,
+ pixmap->drawable.serialNumber,
+ pixmap->usage_hint));
+
return pixmap;
}
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 8c6cf346..44933315 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1409,6 +1409,342 @@ tor_render(struct sna *sna,
}
}
+static void
+inplace_row(struct active_list *active, uint8_t *row, int width)
+{
+ struct edge *left = active->head.next;
+
+ assert(active->is_vertical);
+
+ while (&active->tail != left) {
+ struct edge *right;
+ int winding = left->dir;
+ grid_scaled_x_t lfx, rfx;
+ int lix, rix;
+
+ left->height_left -= FAST_SAMPLES_Y;
+ if (!left->height_left) {
+ left->prev->next = left->next;
+ left->next->prev = left->prev;
+ }
+
+ right = left->next;
+ do {
+ right->height_left -= FAST_SAMPLES_Y;
+ if (!right->height_left) {
+ right->prev->next = right->next;
+ right->next->prev = right->prev;
+ }
+
+ winding += right->dir;
+ if (0 == winding)
+ break;
+
+ right = right->next;
+ } while (1);
+
+ if (left->x.quo < 0) {
+ lix = lfx = 0;
+ } else if (left->x.quo > width * FAST_SAMPLES_X) {
+ lix = width;
+ lfx = 0;
+ } else
+ FAST_SAMPLES_X_TO_INT_FRAC(left->x.quo, lix, lfx);
+
+ if (right->x.quo < 0) {
+ rix = rfx = 0;
+ } else if (right->x.quo > width * FAST_SAMPLES_X) {
+ rix = width;
+ rfx = 0;
+ } else
+ FAST_SAMPLES_X_TO_INT_FRAC(right->x.quo, rix, rfx);
+ if (lix == rix) {
+ if (rfx != lfx)
+ row[lix] += (rfx-lfx) * 256 / FAST_SAMPLES_X;
+ } else {
+ if (lfx == 0)
+ row[lix] = 0xff;
+ else
+ row[lix] += 256 - lfx * 256 / FAST_SAMPLES_X;
+
+ if (rfx)
+ row[rix] += rfx * 256 / FAST_SAMPLES_X;
+
+ if (rix > ++lix) {
+ rix -= lix;
+#if 0
+ if (rix == 1)
+ row[lix] = 0xff;
+ else
+ memset(row+lix, 0xff, rix);
+#else
+ while (rix && lix & 3)
+ row[lix++] = 0xff, rix--;
+ while (rix > 4) {
+ *(uint32_t *)(row+lix) = 0xffffffff;
+ lix += 4;
+ rix -= 4;
+ }
+ if (rix & 2) {
+ *(uint16_t *)(row+lix) = 0xffff;
+ lix += 2;
+ }
+ if (rix & 1)
+ row[lix] = 0xff;
+#endif
+ }
+ }
+
+ left = right->next;
+ }
+}
+
+static inline uint8_t clip255(int x)
+{
+ if (x > 255)
+ return 255;
+
+ return x;
+}
+
+inline static void
+inplace_subrow(struct active_list *active, int8_t *row,
+ int width, int *min, int *max)
+{
+ struct edge *edge = active->head.next;
+ grid_scaled_x_t prev_x = INT_MIN;
+ int winding = 0, xstart = INT_MIN;
+
+ while (&active->tail != edge) {
+ struct edge *next = edge->next;
+
+ winding += edge->dir;
+ if (0 == winding) {
+ if (edge->x.quo >= FAST_SAMPLES_X * width) {
+ *max = width;
+ } else if (edge->next->x.quo != edge->x.quo) {
+ grid_scaled_x_t fx;
+ int ix;
+
+ xstart = edge->x.quo;
+ FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
+ row[ix++] -= FAST_SAMPLES_X - fx;
+ if (ix < width)
+ row[ix] -= fx;
+
+ if (ix > *max)
+ *max = ix;
+
+ xstart = INT_MIN;
+ }
+ } else if (xstart < 0) {
+ grid_scaled_x_t fx;
+ int ix;
+
+ xstart = MAX(edge->x.quo, 0);
+ FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
+ if (ix < *min)
+ *min = ix;
+
+ row[ix++] += FAST_SAMPLES_X - fx;
+ row[ix] += fx;
+ }
+
+ if (--edge->height_left) {
+ if (!edge->vertical) {
+ edge->x.quo += edge->dxdy.quo;
+ edge->x.rem += edge->dxdy.rem;
+ if (edge->x.rem >= 0) {
+ ++edge->x.quo;
+ edge->x.rem -= edge->dy;
+ }
+ }
+
+ if (edge->x.quo < prev_x) {
+ struct edge *pos = edge->prev;
+ pos->next = next;
+ next->prev = pos;
+ do {
+ pos = pos->prev;
+ } while (edge->x.quo < pos->x.quo);
+ pos->next->prev = edge;
+ edge->next = pos->next;
+ edge->prev = pos;
+ pos->next = edge;
+ } else
+ prev_x = edge->x.quo;
+ } else {
+ edge->prev->next = next;
+ next->prev = edge->prev;
+ }
+
+ edge = next;
+ }
+}
+
+inline static void
+inplace_end_subrows(struct active_list *active, uint8_t *row,
+ int8_t *buf, int width)
+{
+ int cover = 0;
+
+ while (width > 4) {
+ uint32_t dw;
+ int v;
+
+ dw = *(uint32_t *)buf;
+ buf += 4;
+
+ if (dw == 0){
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ v |= v << 8;
+ dw = v | v << 16;
+ } else if (dw) {
+ cover += (int8_t)(dw & 0xff);
+ assert(cover >= 0);
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ dw >>= 8;
+ dw |= v << 24;
+
+ cover += (int8_t)(dw & 0xff);
+ assert(cover >= 0);
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ dw >>= 8;
+ dw |= v << 24;
+
+ cover += (int8_t)(dw & 0xff);
+ assert(cover >= 0);
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ dw >>= 8;
+ dw |= v << 24;
+
+ cover += (int8_t)(dw & 0xff);
+ assert(cover >= 0);
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ dw >>= 8;
+ dw |= v << 24;
+ }
+
+ *(uint32_t *)row = dw;
+ row += 4;
+
+ width -= 4;
+ }
+
+ while (width--) {
+ int v;
+
+ cover += *buf++;
+ assert(cover >= 0);
+
+ v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+ v -= v >> 8;
+ *row++ = v;
+ }
+}
+
+#define TOR_INPLACE_SIZE 128
+static void
+tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
+{
+ int i, j, h = converter->ymax;
+ struct polygon *polygon = converter->polygon;
+ struct active_list *active = converter->active;
+ struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
+ uint8_t *row = scratch->devPrivate.ptr;
+ int stride = scratch->devKind;
+ int width = scratch->drawable.width;
+
+ __DBG(("%s: mono=%d, buf=%d\n", __FUNCTION__, mono, buf));
+ assert(!mono);
+
+ /* Render each pixel row. */
+ for (i = 0; i < h; i = j) {
+ int do_full_step = 0;
+ void *ptr = buf ?: row;
+
+ j = i + 1;
+
+ /* Determine if we can ignore this row or use the full pixel
+ * stepper. */
+ if (!polygon->y_buckets[i]) {
+ if (active->head.next == &active->tail) {
+ active->min_height = INT_MAX;
+ active->is_vertical = 1;
+ for (; j < h && !polygon->y_buckets[j]; j++)
+ ;
+ __DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
+ __FUNCTION__, i, j));
+
+ memset(row, 0, stride*(j-i));
+ row += stride*(j-i);
+ continue;
+ }
+
+ do_full_step = can_full_step(active);
+ }
+
+ __DBG(("%s: y=%d [%d], do_full_step=%d, new edges=%d, min_height=%d, vertical=%d\n",
+ __FUNCTION__,
+ i, i+ymin, do_full_step,
+ polygon->y_buckets[i] != NULL,
+ active->min_height,
+ active->is_vertical));
+ if (do_full_step) {
+ memset(ptr, 0, width);
+ inplace_row(active, ptr, width);
+ if (row != ptr)
+ memcpy(row, ptr, width);
+
+ if (active->is_vertical) {
+ while (j < h &&
+ polygon->y_buckets[j] == NULL &&
+ active->min_height >= 2*FAST_SAMPLES_Y)
+ {
+ active->min_height -= FAST_SAMPLES_Y;
+ row += stride;
+ memcpy(row, ptr, width);
+ j++;
+ }
+ if (j != i + 1)
+ step_edges(active, j - (i + 1));
+
+ __DBG(("%s: vertical edges, full step (%d, %d)\n",
+ __FUNCTION__, i, j));
+ }
+ } else {
+ grid_scaled_y_t suby;
+ int min = width, max = 0;
+
+ fill_buckets(active, polygon->y_buckets[i], buckets);
+
+ /* Subsample this row. */
+ memset(ptr, 0, width);
+ for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
+ if (buckets[suby]) {
+ merge_edges(active, buckets[suby]);
+ buckets[suby] = NULL;
+ }
+
+ inplace_subrow(active, ptr, width, &min, &max);
+ }
+ memset(row, 0, min);
+ if (max > min)
+ inplace_end_subrows(active, row+min, (int8_t*)ptr+min, max-min);
+ if (max < width)
+ memset(row+max, 0, width-max);
+ }
+
+ active->min_height -= FAST_SAMPLES_Y;
+ row += stride;
+ }
+}
+
struct mono_edge {
struct mono_edge *next, *prev;
@@ -1936,7 +2272,7 @@ trapezoids_bounds(int n, const xTrapezoid *t, BoxPtr box)
if (((x2 - t->right.p1.x) | (x2 - t->right.p2.x)) < 0) {
if (pixman_fixed_floor(t->right.p1.x) == pixman_fixed_floor(t->right.p2.x)) {
x2 = pixman_fixed_ceil(t->right.p1.x);
- } else {
+ } else {
if (t->right.p1.y == t->top)
fx1 = t->right.p1.x;
else
@@ -3007,7 +3343,6 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
int ntrap, xTrapezoid *traps)
{
struct tor tor;
- span_func_t span;
ScreenPtr screen = dst->pDrawable->pScreen;
PixmapPtr scratch;
PicturePtr mask;
@@ -3041,8 +3376,8 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
return true;
- DBG(("%s: extents (%d, %d), (%d, %d)\n",
- __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+ DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
+ __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
if (!sna_compute_composite_extents(&extents,
src, NULL, dst,
@@ -3096,15 +3431,18 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
tor_add_edge(&tor, &t, &t.right, -1);
}
- if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
- span = tor_blt_mask_mono;
- else
- span = tor_blt_mask;
-
- tor_render(NULL, &tor,
- scratch->devPrivate.ptr,
- (void *)(intptr_t)scratch->devKind,
- span, true);
+ if (extents.x2 <= TOR_INPLACE_SIZE) {
+ uint8_t buf[TOR_INPLACE_SIZE];
+ tor_inplace(&tor, scratch, is_mono(dst, maskFormat),
+ scratch->usage_hint ? NULL : buf);
+ } else {
+ tor_render(NULL, &tor,
+ scratch->devPrivate.ptr,
+ (void *)(intptr_t)scratch->devKind,
+ is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
+ true);
+ }
+ tor_fini(&tor);
mask = CreatePicture(0, &scratch->drawable,
PictureMatchFormat(screen, 8, PICT_a8),
@@ -3119,7 +3457,6 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
extents.x2, extents.y2);
FreePicture(mask, 0);
}
- tor_fini(&tor);
return true;
}
@@ -3535,7 +3872,6 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
int ntrap, xTrapezoid *traps)
{
struct tor tor;
- span_func_t span;
ScreenPtr screen = dst->pDrawable->pScreen;
PixmapPtr scratch;
PicturePtr mask;
@@ -3569,8 +3905,8 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
return true;
- DBG(("%s: extents (%d, %d), (%d, %d)\n",
- __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+ DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
+ __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
if (!sna_compute_composite_extents(&extents,
src, NULL, dst,
@@ -3624,15 +3960,16 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
tor_add_edge(&tor, &t, &t.right, -1);
}
- if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
- span = tor_blt_mask_mono;
- else
- span = tor_blt_mask;
-
- tor_render(NULL, &tor,
- scratch->devPrivate.ptr,
- (void *)(intptr_t)scratch->devKind,
- span, true);
+ if (extents.x2 <= TOR_INPLACE_SIZE) {
+ tor_inplace(&tor, scratch, is_mono(dst, maskFormat), NULL);
+ } else {
+ tor_render(NULL, &tor,
+ scratch->devPrivate.ptr,
+ (void *)(intptr_t)scratch->devKind,
+ is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
+ true);
+ }
+ tor_fini(&tor);
mask = CreatePicture(0, &scratch->drawable,
PictureMatchFormat(screen, 8, PICT_a8),
@@ -3675,7 +4012,6 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
done:
FreePicture(mask, 0);
}
- tor_fini(&tor);
return true;
}