Merge git+ssh://sandmann@freedesktop.org/home/sandmann/genrender into graphgraph

author: Søren Sandmann <sandmann@redhat.com> 2008-11-21 17:28:27 -0500
committer: Søren Sandmann <sandmann@redhat.com> 2008-11-21 17:28:27 -0500
commit: f9fc7b6ea5aafec8b480236d92550a1ab8af09d6 (patch)
tree: 765ef810fdf0b7592047cb7e918f996f389e4874
parent: e66a6f0fdb8ac68e4054d031601bfe8485330f7e (diff)
parent: 9ca1d92a07296bd933ae202065280d5fc1f48e58 (diff)
3 files changed, 261 insertions, 3 deletions
diff --git a/TODO b/TODO
index 9d21ef2..9292703 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,37 @@
+Useful optimizations:
+
+	- Peephole optimizations to eliminate redundant shuffles etc.
+
+	- Dead code elimination - in some cases we will likely end up
+	  computing stuff that is not used
+
+	- Move-from-dead-register. Basically, 
+
+		x = y
+
+	  where y is dead should be eliminated. We are going to generate
+	  a number of these.
+
+	- Constant propagation could make generation of intermediate cod
+	  simpler
+
+	- Invariant code motion. Solids could then be generated in the
+	  loop itself, rather than being special cased.
+
+Component alpha:
+
+Normal and component alpha can be treated largely the same way by
+having the combiner function take (src, alpha, dest), and generating
+alpha differently in the two cases.
+
+if (component_alpha)
+	alpha = src-alpha * mask;
+	src = src * mask;
+else
+	alpha = src_alpha x 4
+	src = src;
+
+
 The vector size for an operation is determined by
 
     - the intermediate format
@@ -306,6 +340,13 @@ An intelligent instruction selector on ARM would probably be a big
 benefit.
 
 
+- Three-register instructions
+
+It is what future x86s will have, it is what ARM wants, it is much
+easier to generate code for, and it is easy to turn three-register
+code into two-register code than the other way around.
+
+
 Older notes:
 
 - The generated ops should have a simpler prototype than the normal one. Maybe
@@ -327,8 +368,8 @@ Older notes:
   code generation and to reduce the memory overhead of the code generation.
 
   For ops where source or mask is solid, src/mask_start should point to an 8888
-  pixel arranged similar to the dest format. Ie., unpacking should happen before the
-  op is called.
+  pixel arranged similarly to the dest format. Ie., unpacking should
+  happen before the op is called.
 
   If we add transformations and filters, they can be added at the end of the
   argument list - that way the code won't have to change too much.
diff --git a/build.sh b/build.sh
index 27843d1..f911a7d 100755
--- a/build.sh
+++ b/build.sh
@@ -3,6 +3,7 @@
 
 #gcc -Wall -g codex86.c regalloc.c testjit.c `pkg-config --cflags --libs pixman-1` 
 
-gcc -Wall -g codex86.c testir.c ir.c `pkg-config --cflags --libs pixman-1 glib-2.0`
+# gcc -Wall -g codex86.c testir.c ir.c `pkg-config --cflags --libs pixman-1 glib-2.0`
 
+gcc -Wall -g composite.c `pkg-config --cflags --libs pixman-1 glib-2.0` -o composite
 
diff --git a/composite.c b/composite.c
new file mode 100644
index 0000000..8c76260
--- /dev/null
+++ b/composite.c
@@ -0,0 +1,216 @@
+#include <glib.h>
+#include <pixman.h>
+#include "pixman-private.h"
+#include "ir.h"
+
+/* Describes a compositing operation */
+#define PIXMAN_null		PIXMAN_FORMAT(0,0,0,0,0,0)
+
+/* For now, we don't support
+ *
+ *   - transformations
+ *   - filters
+ *   - repeats
+ *   - solid/gradients
+ */
+struct info
+{
+    pixman_format_code_t	source_fmt;
+    pixman_format_code_t	mask_fmt;
+    pixman_format_code_t	dest_fmt;
+};
+
+static gboolean
+verify_image_types (pixman_image_t *source,
+		    pixman_image_t *mask,
+		    pixman_image_t *dest)
+{
+    if (source->common.transform				||
+	source->common.filter != PIXMAN_FILTER_NEAREST		||
+	source->common.repeat != PIXMAN_REPEAT_NONE		||
+	source->common.type != BITS)
+    {
+	return FALSE;
+    }
+
+    if (mask)
+    {
+	if (mask->common.transform				||
+	    mask->common.filter != PIXMAN_FILTER_NEAREST	||
+	    mask->common.repeat != PIXMAN_REPEAT_NONE		||
+	    mask->common.type != BITS)
+	{
+	    return FALSE;
+	}
+    }
+
+    return TRUE;
+}
+
+#define PIXMAN_a16r16g16b16 PIXMAN_FORMAT(64,PIXMAN_TYPE_ARGB,16,16,16,16);
+
+static pixman_format_code_t
+determine_intermediate_fmt (pixman_op_t op,
+			    pixman_format_code_t sfmt,
+			    pixman_format_code_t mfmt,
+			    pixman_format_code_t dfmt)
+{
+    if (op == PIXMAN_OP_ADD		&&
+	mfmt == PIXMAN_null		&&
+	sfmt == PIXMAN_a8		&&
+	dfmt == PIXMAN_a8)
+    {
+	return PIXMAN_a8;
+    }
+    else if (PIXMAN_FORMAT_16BPC (sfmt)		||
+	     PIXMAN_FORMAT_16BPC (mfmt)		||
+	     PIXMAN_FORMAT_16BPC (dfmt))
+    {
+	return PIXMAN_a16r16g16b16;
+    }
+    else
+    {
+	return PIXMAN_a8r8g8b8;
+    }
+}
+
+static IR *
+generate_intermediate ()
+{
+#if 0
+    void (* CompositeOp) (uint32_t *src_start,
+			  uint32_t src_skip,
+			  
+			  uint32_t *mask_start,
+			  uint32_t mask_skip,
+			  
+			  uint32_t *dest_start,
+			  uint32_t dest_skip,
+			  
+			  uint16_t width,
+			  uint16_t height);
+#endif
+    
+    /*
+     * Generate the intermediate code
+     */
+
+    /* Generate loops */
+
+    /* Note: this can almost deal with the case where n_pixels
+     * intermediate expanded fits in a register completely. In that case,
+     * unpacking y still be necessary, but we don't need the extra register.
+     *    One way to deal with this would be to not do any packing and
+     * let a dead code eliminator do its job.
+     */
+    
+    /* src1 = read (src, n_pixels); */
+    /* src1 = convert (src1, sfmt, intermediate_fmt); */
+    /* if (need_unpack) src1, src2 = unpack (src1);  */
+    
+    /* mask1 = read (mask, n_pixels); */
+    /* mask1 = convert (mask1, mfmt, intermediate_fmt); */
+    /* if (need_unpack) mask1, mask2 = unpack (mask); */
+
+    /* If component alpha, 
+     *     src1, alpha1 = combine_mask (src1, mask1);
+     *     if (need_unpack) { src2, alpha2 = combine (src2, mask2); } 
+     * else
+     *     alpha1 = expand (src1 alpha);
+     *     if (need_unpack) { alpha2 = expand (src2 alpha); }
+     */
+
+    /* dest1 = read (dest1, n_pixels); */
+    /* dest1 = convert (dest1, dfmt, intermediate_fmt); */
+    /* if (need_unpack) dest1, dest2 = unpack (dest1); */
+
+    /* res1 = combine (src1, alpha1, dest1) */
+    
+    /* if (need_unpack) { res2 = combine (src2, alpha2, dest2); } */
+
+    /* if (need_unpack) { res1 = pack (res1, res2); } */
+    
+    /* write (dst, res1, n_pixels) */
+
+    return NULL;
+}
+    
+static IR *
+make_ir (int		 pref_vsize,
+	 pixman_op_t	 op,
+	 pixman_image_t *source,
+	 pixman_image_t *mask,
+	 pixman_image_t *dest)
+{
+    pixman_format_code_t sfmt;
+    pixman_format_code_t mfmt;
+    pixman_format_code_t dfmt;
+    pixman_format_code_t intermediate_fmt;
+    int intermediate_size;
+    gboolean need_unpack;
+    int n_pixels;
+    int vsize;
+    int mult;
+    
+    if (!verify_image_types (source, mask, dest))
+	return NULL;
+
+    /* Determine the intermediate format */
+    sfmt = source->bits.format;
+    mfmt = mask? mask->bits.format : PIXMAN_null;
+    dfmt = dest->bits.format;
+
+    intermediate_fmt = determine_intermediate_fmt (op, sfmt, mfmt, dfmt);
+
+    /* Decide whether we will need multiplications (if so, the
+     * pixels will have to be unpacked, otherwise they won't)
+     */
+    if (mfmt == PIXMAN_null						&&
+	((op == PIXMAN_OP_ADD)					||
+	 (op == PIXMAN_OP_OVER && PIXMAN_FORMAT_A (sfmt) == 0)	||
+	 (op == PIXMAN_OP_SRC)))
+    {
+	need_unpack = FALSE;
+    }
+    else
+    {
+	need_unpack = TRUE;
+    }
+
+    mult = need_unpack ? 2 : 1;
+
+    intermediate_size = PIXMAN_FORMAT_BPP (intermediate_fmt) / 8;
+    intermediate_size = (intermediate_size + 3) & ~0x3;
+    
+    /* Now determine the vector size we will use, based on the
+     * backend's preferred vector size. We need to make sure that
+     * there is enough room for at least one complete intermediate,
+     * taking into account whether we need multiplications.
+     */
+    if (pref_vsize < mult * intermediate_size)
+    {
+	n_pixels = 1;
+
+	/* In this case, backends will need to deal with getting
+	 * vectors that are bigger than they'd like
+	 */
+	vsize = mult * intermediate_size;
+    }
+    else
+    {
+	/* We don't take mult into account here because the unpacking
+	 * will make use of an additional register.
+	 */
+	n_pixels = pref_vsize / intermediate_size;
+
+	vsize = pref_vsize;
+    }
+
+    return generate_intermediate ();    
+}
+
+int
+main ()
+{
+    IR *ir = make_ir (16, PIXMAN_OP_OVER, NULL, NULL, NULL);
+}
author	Søren Sandmann <sandmann@redhat.com>	2008-11-21 17:28:27 -0500
committer	Søren Sandmann <sandmann@redhat.com>	2008-11-21 17:28:27 -0500
commit	f9fc7b6ea5aafec8b480236d92550a1ab8af09d6 (patch)
tree	765ef810fdf0b7592047cb7e918f996f389e4874
parent	e66a6f0fdb8ac68e4054d031601bfe8485330f7e (diff)
parent	9ca1d92a07296bd933ae202065280d5fc1f48e58 (diff)