diff options
| author | Søren Sandmann <sandmann@redhat.com> | 2008-11-21 17:28:27 -0500 |
|---|---|---|
| committer | Søren Sandmann <sandmann@redhat.com> | 2008-11-21 17:28:27 -0500 |
| commit | f9fc7b6ea5aafec8b480236d92550a1ab8af09d6 (patch) | |
| tree | 765ef810fdf0b7592047cb7e918f996f389e4874 | |
| parent | e66a6f0fdb8ac68e4054d031601bfe8485330f7e (diff) | |
| parent | 9ca1d92a07296bd933ae202065280d5fc1f48e58 (diff) | |
Merge git+ssh://sandmann@freedesktop.org/home/sandmann/genrender into graphgraph
| -rw-r--r-- | TODO | 45 | ||||
| -rwxr-xr-x | build.sh | 3 | ||||
| -rw-r--r-- | composite.c | 216 |
3 files changed, 261 insertions, 3 deletions
@@ -1,3 +1,37 @@ +Useful optimizations: + + - Peephole optimizations to eliminate redundant shuffles etc. + + - Dead code elimination - in some cases we will likely end up + computing stuff that is not used + + - Move-from-dead-register. Basically, + + x = y + + where y is dead should be eliminated. We are going to generate + a number of these. + + - Constant propagation could make generation of intermediate cod + simpler + + - Invariant code motion. Solids could then be generated in the + loop itself, rather than being special cased. + +Component alpha: + +Normal and component alpha can be treated largely the same way by +having the combiner function take (src, alpha, dest), and generating +alpha differently in the two cases. + +if (component_alpha) + alpha = src-alpha * mask; + src = src * mask; +else + alpha = src_alpha x 4 + src = src; + + The vector size for an operation is determined by - the intermediate format @@ -306,6 +340,13 @@ An intelligent instruction selector on ARM would probably be a big benefit. +- Three-register instructions + +It is what future x86s will have, it is what ARM wants, it is much +easier to generate code for, and it is easy to turn three-register +code into two-register code than the other way around. + + Older notes: - The generated ops should have a simpler prototype than the normal one. Maybe @@ -327,8 +368,8 @@ Older notes: code generation and to reduce the memory overhead of the code generation. For ops where source or mask is solid, src/mask_start should point to an 8888 - pixel arranged similar to the dest format. Ie., unpacking should happen before the - op is called. + pixel arranged similarly to the dest format. Ie., unpacking should + happen before the op is called. If we add transformations and filters, they can be added at the end of the argument list - that way the code won't have to change too much. @@ -3,6 +3,7 @@ #gcc -Wall -g codex86.c regalloc.c testjit.c `pkg-config --cflags --libs pixman-1` -gcc -Wall -g codex86.c testir.c ir.c `pkg-config --cflags --libs pixman-1 glib-2.0` +# gcc -Wall -g codex86.c testir.c ir.c `pkg-config --cflags --libs pixman-1 glib-2.0` +gcc -Wall -g composite.c `pkg-config --cflags --libs pixman-1 glib-2.0` -o composite diff --git a/composite.c b/composite.c new file mode 100644 index 0000000..8c76260 --- /dev/null +++ b/composite.c @@ -0,0 +1,216 @@ +#include <glib.h> +#include <pixman.h> +#include "pixman-private.h" +#include "ir.h" + +/* Describes a compositing operation */ +#define PIXMAN_null PIXMAN_FORMAT(0,0,0,0,0,0) + +/* For now, we don't support + * + * - transformations + * - filters + * - repeats + * - solid/gradients + */ +struct info +{ + pixman_format_code_t source_fmt; + pixman_format_code_t mask_fmt; + pixman_format_code_t dest_fmt; +}; + +static gboolean +verify_image_types (pixman_image_t *source, + pixman_image_t *mask, + pixman_image_t *dest) +{ + if (source->common.transform || + source->common.filter != PIXMAN_FILTER_NEAREST || + source->common.repeat != PIXMAN_REPEAT_NONE || + source->common.type != BITS) + { + return FALSE; + } + + if (mask) + { + if (mask->common.transform || + mask->common.filter != PIXMAN_FILTER_NEAREST || + mask->common.repeat != PIXMAN_REPEAT_NONE || + mask->common.type != BITS) + { + return FALSE; + } + } + + return TRUE; +} + +#define PIXMAN_a16r16g16b16 PIXMAN_FORMAT(64,PIXMAN_TYPE_ARGB,16,16,16,16); + +static pixman_format_code_t +determine_intermediate_fmt (pixman_op_t op, + pixman_format_code_t sfmt, + pixman_format_code_t mfmt, + pixman_format_code_t dfmt) +{ + if (op == PIXMAN_OP_ADD && + mfmt == PIXMAN_null && + sfmt == PIXMAN_a8 && + dfmt == PIXMAN_a8) + { + return PIXMAN_a8; + } + else if (PIXMAN_FORMAT_16BPC (sfmt) || + PIXMAN_FORMAT_16BPC (mfmt) || + PIXMAN_FORMAT_16BPC (dfmt)) + { + return PIXMAN_a16r16g16b16; + } + else + { + return PIXMAN_a8r8g8b8; + } +} + +static IR * +generate_intermediate () +{ +#if 0 + void (* CompositeOp) (uint32_t *src_start, + uint32_t src_skip, + + uint32_t *mask_start, + uint32_t mask_skip, + + uint32_t *dest_start, + uint32_t dest_skip, + + uint16_t width, + uint16_t height); +#endif + + /* + * Generate the intermediate code + */ + + /* Generate loops */ + + /* Note: this can almost deal with the case where n_pixels + * intermediate expanded fits in a register completely. In that case, + * unpacking y still be necessary, but we don't need the extra register. + * One way to deal with this would be to not do any packing and + * let a dead code eliminator do its job. + */ + + /* src1 = read (src, n_pixels); */ + /* src1 = convert (src1, sfmt, intermediate_fmt); */ + /* if (need_unpack) src1, src2 = unpack (src1); */ + + /* mask1 = read (mask, n_pixels); */ + /* mask1 = convert (mask1, mfmt, intermediate_fmt); */ + /* if (need_unpack) mask1, mask2 = unpack (mask); */ + + /* If component alpha, + * src1, alpha1 = combine_mask (src1, mask1); + * if (need_unpack) { src2, alpha2 = combine (src2, mask2); } + * else + * alpha1 = expand (src1 alpha); + * if (need_unpack) { alpha2 = expand (src2 alpha); } + */ + + /* dest1 = read (dest1, n_pixels); */ + /* dest1 = convert (dest1, dfmt, intermediate_fmt); */ + /* if (need_unpack) dest1, dest2 = unpack (dest1); */ + + /* res1 = combine (src1, alpha1, dest1) */ + + /* if (need_unpack) { res2 = combine (src2, alpha2, dest2); } */ + + /* if (need_unpack) { res1 = pack (res1, res2); } */ + + /* write (dst, res1, n_pixels) */ + + return NULL; +} + +static IR * +make_ir (int pref_vsize, + pixman_op_t op, + pixman_image_t *source, + pixman_image_t *mask, + pixman_image_t *dest) +{ + pixman_format_code_t sfmt; + pixman_format_code_t mfmt; + pixman_format_code_t dfmt; + pixman_format_code_t intermediate_fmt; + int intermediate_size; + gboolean need_unpack; + int n_pixels; + int vsize; + int mult; + + if (!verify_image_types (source, mask, dest)) + return NULL; + + /* Determine the intermediate format */ + sfmt = source->bits.format; + mfmt = mask? mask->bits.format : PIXMAN_null; + dfmt = dest->bits.format; + + intermediate_fmt = determine_intermediate_fmt (op, sfmt, mfmt, dfmt); + + /* Decide whether we will need multiplications (if so, the + * pixels will have to be unpacked, otherwise they won't) + */ + if (mfmt == PIXMAN_null && + ((op == PIXMAN_OP_ADD) || + (op == PIXMAN_OP_OVER && PIXMAN_FORMAT_A (sfmt) == 0) || + (op == PIXMAN_OP_SRC))) + { + need_unpack = FALSE; + } + else + { + need_unpack = TRUE; + } + + mult = need_unpack ? 2 : 1; + + intermediate_size = PIXMAN_FORMAT_BPP (intermediate_fmt) / 8; + intermediate_size = (intermediate_size + 3) & ~0x3; + + /* Now determine the vector size we will use, based on the + * backend's preferred vector size. We need to make sure that + * there is enough room for at least one complete intermediate, + * taking into account whether we need multiplications. + */ + if (pref_vsize < mult * intermediate_size) + { + n_pixels = 1; + + /* In this case, backends will need to deal with getting + * vectors that are bigger than they'd like + */ + vsize = mult * intermediate_size; + } + else + { + /* We don't take mult into account here because the unpacking + * will make use of an additional register. + */ + n_pixels = pref_vsize / intermediate_size; + + vsize = pref_vsize; + } + + return generate_intermediate (); +} + +int +main () +{ + IR *ir = make_ir (16, PIXMAN_OP_OVER, NULL, NULL, NULL); +} |
