summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSøren Sandmann <sandmann@redhat.com>2008-11-21 17:28:27 -0500
committerSøren Sandmann <sandmann@redhat.com>2008-11-21 17:28:27 -0500
commitf9fc7b6ea5aafec8b480236d92550a1ab8af09d6 (patch)
tree765ef810fdf0b7592047cb7e918f996f389e4874
parente66a6f0fdb8ac68e4054d031601bfe8485330f7e (diff)
parent9ca1d92a07296bd933ae202065280d5fc1f48e58 (diff)
Merge git+ssh://sandmann@freedesktop.org/home/sandmann/genrender into graphgraph
-rw-r--r--TODO45
-rwxr-xr-xbuild.sh3
-rw-r--r--composite.c216
3 files changed, 261 insertions, 3 deletions
diff --git a/TODO b/TODO
index 9d21ef2..9292703 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,37 @@
+Useful optimizations:
+
+ - Peephole optimizations to eliminate redundant shuffles etc.
+
+ - Dead code elimination - in some cases we will likely end up
+ computing stuff that is not used
+
+ - Move-from-dead-register. Basically,
+
+ x = y
+
+ where y is dead should be eliminated. We are going to generate
+ a number of these.
+
+ - Constant propagation could make generation of intermediate cod
+ simpler
+
+ - Invariant code motion. Solids could then be generated in the
+ loop itself, rather than being special cased.
+
+Component alpha:
+
+Normal and component alpha can be treated largely the same way by
+having the combiner function take (src, alpha, dest), and generating
+alpha differently in the two cases.
+
+if (component_alpha)
+ alpha = src-alpha * mask;
+ src = src * mask;
+else
+ alpha = src_alpha x 4
+ src = src;
+
+
The vector size for an operation is determined by
- the intermediate format
@@ -306,6 +340,13 @@ An intelligent instruction selector on ARM would probably be a big
benefit.
+- Three-register instructions
+
+It is what future x86s will have, it is what ARM wants, it is much
+easier to generate code for, and it is easy to turn three-register
+code into two-register code than the other way around.
+
+
Older notes:
- The generated ops should have a simpler prototype than the normal one. Maybe
@@ -327,8 +368,8 @@ Older notes:
code generation and to reduce the memory overhead of the code generation.
For ops where source or mask is solid, src/mask_start should point to an 8888
- pixel arranged similar to the dest format. Ie., unpacking should happen before the
- op is called.
+ pixel arranged similarly to the dest format. Ie., unpacking should
+ happen before the op is called.
If we add transformations and filters, they can be added at the end of the
argument list - that way the code won't have to change too much.
diff --git a/build.sh b/build.sh
index 27843d1..f911a7d 100755
--- a/build.sh
+++ b/build.sh
@@ -3,6 +3,7 @@
#gcc -Wall -g codex86.c regalloc.c testjit.c `pkg-config --cflags --libs pixman-1`
-gcc -Wall -g codex86.c testir.c ir.c `pkg-config --cflags --libs pixman-1 glib-2.0`
+# gcc -Wall -g codex86.c testir.c ir.c `pkg-config --cflags --libs pixman-1 glib-2.0`
+gcc -Wall -g composite.c `pkg-config --cflags --libs pixman-1 glib-2.0` -o composite
diff --git a/composite.c b/composite.c
new file mode 100644
index 0000000..8c76260
--- /dev/null
+++ b/composite.c
@@ -0,0 +1,216 @@
+#include <glib.h>
+#include <pixman.h>
+#include "pixman-private.h"
+#include "ir.h"
+
+/* Describes a compositing operation */
+#define PIXMAN_null PIXMAN_FORMAT(0,0,0,0,0,0)
+
+/* For now, we don't support
+ *
+ * - transformations
+ * - filters
+ * - repeats
+ * - solid/gradients
+ */
+struct info
+{
+ pixman_format_code_t source_fmt;
+ pixman_format_code_t mask_fmt;
+ pixman_format_code_t dest_fmt;
+};
+
+static gboolean
+verify_image_types (pixman_image_t *source,
+ pixman_image_t *mask,
+ pixman_image_t *dest)
+{
+ if (source->common.transform ||
+ source->common.filter != PIXMAN_FILTER_NEAREST ||
+ source->common.repeat != PIXMAN_REPEAT_NONE ||
+ source->common.type != BITS)
+ {
+ return FALSE;
+ }
+
+ if (mask)
+ {
+ if (mask->common.transform ||
+ mask->common.filter != PIXMAN_FILTER_NEAREST ||
+ mask->common.repeat != PIXMAN_REPEAT_NONE ||
+ mask->common.type != BITS)
+ {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+#define PIXMAN_a16r16g16b16 PIXMAN_FORMAT(64,PIXMAN_TYPE_ARGB,16,16,16,16);
+
+static pixman_format_code_t
+determine_intermediate_fmt (pixman_op_t op,
+ pixman_format_code_t sfmt,
+ pixman_format_code_t mfmt,
+ pixman_format_code_t dfmt)
+{
+ if (op == PIXMAN_OP_ADD &&
+ mfmt == PIXMAN_null &&
+ sfmt == PIXMAN_a8 &&
+ dfmt == PIXMAN_a8)
+ {
+ return PIXMAN_a8;
+ }
+ else if (PIXMAN_FORMAT_16BPC (sfmt) ||
+ PIXMAN_FORMAT_16BPC (mfmt) ||
+ PIXMAN_FORMAT_16BPC (dfmt))
+ {
+ return PIXMAN_a16r16g16b16;
+ }
+ else
+ {
+ return PIXMAN_a8r8g8b8;
+ }
+}
+
+static IR *
+generate_intermediate ()
+{
+#if 0
+ void (* CompositeOp) (uint32_t *src_start,
+ uint32_t src_skip,
+
+ uint32_t *mask_start,
+ uint32_t mask_skip,
+
+ uint32_t *dest_start,
+ uint32_t dest_skip,
+
+ uint16_t width,
+ uint16_t height);
+#endif
+
+ /*
+ * Generate the intermediate code
+ */
+
+ /* Generate loops */
+
+ /* Note: this can almost deal with the case where n_pixels
+ * intermediate expanded fits in a register completely. In that case,
+ * unpacking y still be necessary, but we don't need the extra register.
+ * One way to deal with this would be to not do any packing and
+ * let a dead code eliminator do its job.
+ */
+
+ /* src1 = read (src, n_pixels); */
+ /* src1 = convert (src1, sfmt, intermediate_fmt); */
+ /* if (need_unpack) src1, src2 = unpack (src1); */
+
+ /* mask1 = read (mask, n_pixels); */
+ /* mask1 = convert (mask1, mfmt, intermediate_fmt); */
+ /* if (need_unpack) mask1, mask2 = unpack (mask); */
+
+ /* If component alpha,
+ * src1, alpha1 = combine_mask (src1, mask1);
+ * if (need_unpack) { src2, alpha2 = combine (src2, mask2); }
+ * else
+ * alpha1 = expand (src1 alpha);
+ * if (need_unpack) { alpha2 = expand (src2 alpha); }
+ */
+
+ /* dest1 = read (dest1, n_pixels); */
+ /* dest1 = convert (dest1, dfmt, intermediate_fmt); */
+ /* if (need_unpack) dest1, dest2 = unpack (dest1); */
+
+ /* res1 = combine (src1, alpha1, dest1) */
+
+ /* if (need_unpack) { res2 = combine (src2, alpha2, dest2); } */
+
+ /* if (need_unpack) { res1 = pack (res1, res2); } */
+
+ /* write (dst, res1, n_pixels) */
+
+ return NULL;
+}
+
+static IR *
+make_ir (int pref_vsize,
+ pixman_op_t op,
+ pixman_image_t *source,
+ pixman_image_t *mask,
+ pixman_image_t *dest)
+{
+ pixman_format_code_t sfmt;
+ pixman_format_code_t mfmt;
+ pixman_format_code_t dfmt;
+ pixman_format_code_t intermediate_fmt;
+ int intermediate_size;
+ gboolean need_unpack;
+ int n_pixels;
+ int vsize;
+ int mult;
+
+ if (!verify_image_types (source, mask, dest))
+ return NULL;
+
+ /* Determine the intermediate format */
+ sfmt = source->bits.format;
+ mfmt = mask? mask->bits.format : PIXMAN_null;
+ dfmt = dest->bits.format;
+
+ intermediate_fmt = determine_intermediate_fmt (op, sfmt, mfmt, dfmt);
+
+ /* Decide whether we will need multiplications (if so, the
+ * pixels will have to be unpacked, otherwise they won't)
+ */
+ if (mfmt == PIXMAN_null &&
+ ((op == PIXMAN_OP_ADD) ||
+ (op == PIXMAN_OP_OVER && PIXMAN_FORMAT_A (sfmt) == 0) ||
+ (op == PIXMAN_OP_SRC)))
+ {
+ need_unpack = FALSE;
+ }
+ else
+ {
+ need_unpack = TRUE;
+ }
+
+ mult = need_unpack ? 2 : 1;
+
+ intermediate_size = PIXMAN_FORMAT_BPP (intermediate_fmt) / 8;
+ intermediate_size = (intermediate_size + 3) & ~0x3;
+
+ /* Now determine the vector size we will use, based on the
+ * backend's preferred vector size. We need to make sure that
+ * there is enough room for at least one complete intermediate,
+ * taking into account whether we need multiplications.
+ */
+ if (pref_vsize < mult * intermediate_size)
+ {
+ n_pixels = 1;
+
+ /* In this case, backends will need to deal with getting
+ * vectors that are bigger than they'd like
+ */
+ vsize = mult * intermediate_size;
+ }
+ else
+ {
+ /* We don't take mult into account here because the unpacking
+ * will make use of an additional register.
+ */
+ n_pixels = pref_vsize / intermediate_size;
+
+ vsize = pref_vsize;
+ }
+
+ return generate_intermediate ();
+}
+
+int
+main ()
+{
+ IR *ir = make_ir (16, PIXMAN_OP_OVER, NULL, NULL, NULL);
+}