From 7c9dbc980b760e0053d83ca2d7cb147613285680 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 29 Jul 2012 09:50:39 +0100 Subject: sna: Assemble SF and WM kernels using brw Signed-off-by: Chris Wilson --- .gitignore | 1 + src/sna/brw/Makefile.am | 17 ++ src/sna/brw/brw.h | 14 ++ src/sna/brw/brw_sf.c | 51 +++++ src/sna/brw/brw_test.c | 60 +++++ src/sna/brw/brw_test.h | 46 ++++ src/sna/brw/brw_test_gen4.c | 199 ++++++++++++++++ src/sna/brw/brw_test_gen5.c | 208 +++++++++++++++++ src/sna/brw/brw_test_gen6.c | 209 +++++++++++++++++ src/sna/brw/brw_test_gen7.c | 178 +++++++++++++++ src/sna/brw/brw_wm.c | 542 ++++++++++++++++++++++++++++++++++++++++++++ src/sna/gen6_render.c | 6 +- src/sna/gen7_render.c | 6 +- src/sna/sna_render.h | 17 +- src/sna/sna_stream.c | 38 ++++ 15 files changed, 1582 insertions(+), 10 deletions(-) create mode 100644 src/sna/brw/brw.h create mode 100644 src/sna/brw/brw_sf.c create mode 100644 src/sna/brw/brw_test.c create mode 100644 src/sna/brw/brw_test.h create mode 100644 src/sna/brw/brw_test_gen4.c create mode 100644 src/sna/brw/brw_test_gen5.c create mode 100644 src/sna/brw/brw_test_gen6.c create mode 100644 src/sna/brw/brw_test_gen7.c create mode 100644 src/sna/brw/brw_wm.c diff --git a/.gitignore b/.gitignore index 579fe4be..f7799e52 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,7 @@ symlink-tree texinfo.tex ylwrap src/sna/git_version.h +src/sna/brw/brw_test # Do not edit the following section # Edit Compile Debug Document Distribute diff --git a/src/sna/brw/Makefile.am b/src/sna/brw/Makefile.am index edb3db4f..b3513cf9 100644 --- a/src/sna/brw/Makefile.am +++ b/src/sna/brw/Makefile.am @@ -20,6 +20,7 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. noinst_LTLIBRARIES = libbrw.la +noinst_PROGRAMS = brw_test AM_CFLAGS = \ @CWARNFLAGS@ \ @@ -35,8 +36,24 @@ AM_CFLAGS += @VALGRIND_CFLAGS@ endif libbrw_la_SOURCES = \ + brw.h \ brw_disasm.c \ brw_eu.h \ brw_eu.c \ brw_eu_emit.c \ + brw_sf.c \ + brw_wm.c \ + $(NULL) + +brw_test_SOURCES = \ + brw_test.c \ + brw_test.h \ + brw_test_gen4.c \ + brw_test_gen5.c \ + brw_test_gen6.c \ + brw_test_gen7.c \ + $(NULL) + +brw_test_LDADD = \ + libbrw.la \ $(NULL) diff --git a/src/sna/brw/brw.h b/src/sna/brw/brw.h new file mode 100644 index 00000000..a39b253a --- /dev/null +++ b/src/sna/brw/brw.h @@ -0,0 +1,14 @@ +#include "brw_eu.h" + +void brw_sf_kernel__nomask(struct brw_compile *p); +void brw_sf_kernel__mask(struct brw_compile *p); + +void brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width); +void brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width); +void brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width); +void brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width); + +void brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width); +void brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width); +void brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width); +void brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width); diff --git a/src/sna/brw/brw_sf.c b/src/sna/brw/brw_sf.c new file mode 100644 index 00000000..0c69433d --- /dev/null +++ b/src/sna/brw/brw_sf.c @@ -0,0 +1,51 @@ +#include "brw.h" + +void brw_sf_kernel__nomask(struct brw_compile *p) +{ + struct brw_reg inv, v0, v1, v2, delta; + + v0 = brw_vec4_grf(3, 0); + v1 = brw_vec4_grf(4, 0); + v2 = brw_vec4_grf(5, 0); + delta = brw_vec8_grf(7, 0); + + inv = brw_vec4_grf(6, 0); + brw_math_invert(p, inv, brw_vec4_grf(1, 11)); + + brw_MOV(p, brw_message_reg(3), v0); + + brw_ADD(p, delta, v1, brw_negate(v2)); + brw_MUL(p, brw_message_reg(1), delta, brw_vec1_grf(6,0)); + + brw_ADD(p, delta, v2, brw_negate(v0)); + brw_MUL(p, brw_message_reg(2), delta, brw_vec1_grf(6,2)); + + brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0), + false, true, 4, 0, true, true, 0, + BRW_URB_SWIZZLE_TRANSPOSE); +} + +void +brw_sf_kernel__mask(struct brw_compile *p) +{ + struct brw_reg inv, v0, v1, v2; + + v0 = brw_vec8_grf(3, 0); + v1 = brw_vec8_grf(4, 0); + v2 = brw_vec8_grf(5, 0); + + inv = brw_vec4_grf(6, 0); + brw_math_invert(p, inv, brw_vec4_grf(1, 11)); + + brw_MOV(p, brw_message_reg(3), v0); + + brw_ADD(p, brw_vec8_grf(7, 0), v1, brw_negate(v2)); + brw_MUL(p, brw_message_reg(1), brw_vec8_grf(7, 0), brw_vec1_grf(6,0)); + + brw_ADD(p, brw_vec8_grf(7, 0), v2, brw_negate(v0)); + brw_MUL(p, brw_message_reg(2), brw_vec8_grf(7, 0), brw_vec1_grf(6,2)); + + brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0), + false, true, 4, 0, true, true, 0, + BRW_URB_SWIZZLE_TRANSPOSE); +} diff --git a/src/sna/brw/brw_test.c b/src/sna/brw/brw_test.c new file mode 100644 index 00000000..4f038584 --- /dev/null +++ b/src/sna/brw/brw_test.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "brw_test.h" +#include + +void brw_test_compare(const char *function, int gen, + const struct brw_instruction *new, int num_new, + const struct brw_instruction *old, int num_old) +{ + int n; + + if (num_new != num_old || + memcmp(new, old, num_new * sizeof(struct brw_instruction))) { + printf ("%s: new\n", function); + for (n = 0; n < num_new; n++) + brw_disasm(stdout, &new[n], gen); + + printf ("%s: old\n", function); + for (n = 0; n < num_old; n++) + brw_disasm(stdout, &old[n], gen); + printf ("\n"); + } +} + + +/* Check that we can recreate all the existing programs using the assembler */ +int main(int argc, char **argv) +{ + brw_test_gen4(); + brw_test_gen5(); + brw_test_gen6(); + brw_test_gen7(); + + return 0; +} diff --git a/src/sna/brw/brw_test.h b/src/sna/brw/brw_test.h new file mode 100644 index 00000000..41f4ca6b --- /dev/null +++ b/src/sna/brw/brw_test.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifndef BRW_TEST_H +#define BRW_TEST_H + +#include "brw.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) +#endif + +void brw_test_compare(const char *function, int gen, + const struct brw_instruction *new, int num_new, + const struct brw_instruction *old, int num_old); + +void brw_test_gen4(void); +void brw_test_gen5(void); +void brw_test_gen6(void); +void brw_test_gen7(void); + +#endif /* BRW_TEST_H */ diff --git a/src/sna/brw/brw_test_gen4.c b/src/sna/brw/brw_test_gen4.c new file mode 100644 index 00000000..742c7c24 --- /dev/null +++ b/src/sna/brw/brw_test_gen4.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "brw_test.h" + +#include + +static const uint32_t sf_kernel[][4] = { +#include "exa_sf.g4b" +}; + +static const uint32_t sf_kernel_mask[][4] = { +#include "exa_sf_mask.g4b" +}; + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_a.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca_srcalpha.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_a.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca_srcalpha.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_a.g4b" +#include "exa_wm_noca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_a.g4b" +#include "exa_wm_noca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_packed_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_yuv_rgb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_planar_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_planar.g4b" +#include "exa_wm_yuv_rgb.g4b" +#include "exa_wm_write.g4b" +}; + +#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old)-8) + +static void gen4_sf__nomask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 40, store); + brw_sf_kernel__nomask(&p); + + compare(sf_kernel); +} + +static void gen4_sf__mask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 40, store); + brw_sf_kernel__mask(&p); + + compare(sf_kernel_mask); +} + +static void +gen4_wm_kernel__affine_nomask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 40, store); + brw_wm_kernel__affine(&p, 16); + + compare(ps_kernel_nomask_affine); +} + +static void +gen4_wm_kernel__affine_mask_noca(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 40, store); + brw_wm_kernel__affine_mask(&p, 16); + + compare(ps_kernel_masknoca_affine); +} + +static void +gen4_wm_kernel__projective_nomask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 40, store); + brw_wm_kernel__projective(&p, 16); + + compare(ps_kernel_nomask_projective); +} + +void brw_test_gen4(void) +{ + gen4_sf__nomask(); + gen4_sf__mask(); + + gen4_wm_kernel__affine_nomask(); + gen4_wm_kernel__affine_mask_noca(); + + gen4_wm_kernel__projective_nomask(); +} diff --git a/src/sna/brw/brw_test_gen5.c b/src/sna/brw/brw_test_gen5.c new file mode 100644 index 00000000..62a999e1 --- /dev/null +++ b/src/sna/brw/brw_test_gen5.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "brw_test.h" + +#include + +static const uint32_t sf_kernel[][4] = { +#include "exa_sf.g5b" +}; + +static const uint32_t sf_kernel_mask[][4] = { +#include "exa_sf_mask.g5b" +}; + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_projective.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_mask_affine.g5b" +#include "exa_wm_mask_sample_argb.g5b" +#include "exa_wm_ca.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_projective.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_mask_projective.g5b" +#include "exa_wm_mask_sample_argb.g5b" +#include "exa_wm_ca.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_a.g5b" +#include "exa_wm_mask_affine.g5b" +#include "exa_wm_mask_sample_argb.g5b" +#include "exa_wm_ca_srcalpha.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_projective.g5b" +#include "exa_wm_src_sample_a.g5b" +#include "exa_wm_mask_projective.g5b" +#include "exa_wm_mask_sample_argb.g5b" +#include "exa_wm_ca_srcalpha.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_mask_affine.g5b" +#include "exa_wm_mask_sample_a.g5b" +#include "exa_wm_noca.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_projective.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_mask_projective.g5b" +#include "exa_wm_mask_sample_a.g5b" +#include "exa_wm_noca.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_packed_static[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_argb.g5b" +#include "exa_wm_yuv_rgb.g5b" +#include "exa_wm_write.g5b" +}; + +static const uint32_t ps_kernel_planar_static[][4] = { +#include "exa_wm_xy.g5b" +#include "exa_wm_src_affine.g5b" +#include "exa_wm_src_sample_planar.g5b" +#include "exa_wm_yuv_rgb.g5b" +#include "exa_wm_write.g5b" +}; + +#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old)) + +static void gen5_sf(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_sf_kernel__nomask(&p); + + compare(sf_kernel); +} + +static void gen5_sf_mask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_sf_kernel__mask(&p); + + compare(sf_kernel_mask); +} + +static void gen5_wm_affine_nomask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_wm_kernel__affine(&p, 16); + + compare(ps_kernel_nomask_affine); +} + +static void gen5_wm_affine_mask_noca(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_wm_kernel__affine_mask(&p, 16); + + compare(ps_kernel_masknoca_affine); +} + +static void gen5_wm_affine_mask_ca(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_wm_kernel__affine_mask_ca(&p, 16); + + compare(ps_kernel_maskca_affine); +} + +static void gen5_wm_projective_nomask(void) +{ + uint32_t store[128]; + struct brw_compile p; + + brw_compile_init(&p, 50, store); + brw_wm_kernel__projective(&p, 16); + + compare(ps_kernel_nomask_projective); +} + +void brw_test_gen5(void) +{ + gen5_sf(); + gen5_sf_mask(); + + gen5_wm_affine_nomask(); + gen5_wm_affine_mask_noca(); + gen5_wm_affine_mask_ca(); + + gen5_wm_projective_nomask(); +} diff --git a/src/sna/brw/brw_test_gen6.c b/src/sna/brw/brw_test_gen6.c new file mode 100644 index 00000000..64bc2fb1 --- /dev/null +++ b/src/sna/brw/brw_test_gen6.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "brw_test.h" + +#include + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_a.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_a.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_a.g6b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_a.g6b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_packed[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_planar[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_planar.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old)) + +#if 0 +static void wm_src_affine(struct brw_compile *p) +{ + brw_PLN(p, brw_message_reg(2), brw_vec1_grf(6,0), brw_vec8_grf(2,0)); + brw_PLN(p, brw_message_reg(3), brw_vec1_grf(6,0), brw_vec8_grf(4,0)); + brw_PLN(p, brw_message_reg(4), brw_vec1_grf(6,4), brw_vec8_grf(2,0)); + brw_PLN(p, brw_message_reg(5), brw_vec1_grf(6,4), brw_vec8_grf(4,0)); +} + +static void wm_src_sample_argb(struct brw_compile *p) +{ + static const uint32_t fragment[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_write.g6b" + }; + int n; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(0)); + brw_pop_insn_state(p); + + brw_SAMPLE(p, + retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW), + 1, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD), + 1, 0, + WRITEMASK_XYZW, + GEN5_SAMPLER_MESSAGE_SAMPLE, + 8, + 5, + true, + BRW_SAMPLER_SIMD_MODE_SIMD16); + + + for (n = 0; n < p->nr_insn; n++) { + brw_disasm(stdout, &p->store[n], 60); + } + + printf("\n\n"); + for (n = 0; n < ARRAY_SIZE(fragment); n++) { + brw_disasm(stdout, + (const struct brw_instruction *)&fragment[n][0], + 60); + } +} + +static void wm_write(struct brw_compile *p) +{ +} +#endif + +static void gen6_ps_nomask_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, 60, store); + brw_wm_kernel__affine(&p, 16); + + compare(ps_kernel_nomask_affine); +} + +static void gen6_ps_mask_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, 60, store); + brw_wm_kernel__affine_mask(&p, 16); + + compare(ps_kernel_masknoca_affine); +} + +static void gen6_ps_nomask_projective(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, 60, store); + brw_wm_kernel__projective(&p, 16); + + compare(ps_kernel_nomask_projective); +} + +void brw_test_gen6(void) +{ + gen6_ps_nomask_affine(); + gen6_ps_mask_affine(); + + gen6_ps_nomask_projective(); +} diff --git a/src/sna/brw/brw_test_gen7.c b/src/sna/brw/brw_test_gen7.c new file mode 100644 index 00000000..c3f0e231 --- /dev/null +++ b/src/sna/brw/brw_test_gen7.c @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "brw_test.h" + +#include + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_affine.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_projective.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_a.g7b" +#include "exa_wm_mask_affine.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_a.g7b" +#include "exa_wm_mask_projective.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_affine.g7b" +#include "exa_wm_mask_sample_a.g7b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_projective.g7b" +#include "exa_wm_mask_sample_a.g7b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_packed[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_yuv_rgb.g7b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_planar[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_planar.g7b" +#include "exa_wm_yuv_rgb.g7b" +#include "exa_wm_write.g7b" +}; + +#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old)) +#define GEN 70 + +static void gen7_ps_nomask_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__affine(&p, 8); + + compare(ps_kernel_nomask_affine); +} + +static void gen7_ps_mask_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__affine_mask(&p, 8); + + compare(ps_kernel_masknoca_affine); +} + +static void gen7_ps_maskca_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__affine_mask_ca(&p, 8); + + compare(ps_kernel_maskca_affine); +} + +static void gen7_ps_masksa_affine(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__affine_mask_sa(&p, 8); + + compare(ps_kernel_maskca_srcalpha_affine); +} + +static void gen7_ps_nomask_projective(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__projective(&p, 8); + + compare(ps_kernel_nomask_projective); +} + +void brw_test_gen7(void) +{ + gen7_ps_nomask_affine(); + gen7_ps_mask_affine(); + gen7_ps_maskca_affine(); + gen7_ps_masksa_affine(); + + gen7_ps_nomask_projective(); +} diff --git a/src/sna/brw/brw_wm.c b/src/sna/brw/brw_wm.c new file mode 100644 index 00000000..9a8af5f4 --- /dev/null +++ b/src/sna/brw/brw_wm.c @@ -0,0 +1,542 @@ +#include "brw.h" + +#define X16 8 +#define Y16 10 + +static void brw_wm_xy(struct brw_compile *p, int dw) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = __retype_uw(r1); + struct brw_reg x_uw, y_uw; + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + if (dw == 16) { + x_uw = brw_uw16_grf(30, 0); + y_uw = brw_uw16_grf(28, 0); + } else { + x_uw = brw_uw8_grf(30, 0); + y_uw = brw_uw8_grf(28, 0); + } + + brw_ADD(p, + x_uw, + __stride(__suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + brw_ADD(p, + y_uw, + __stride(__suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1)); + brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); +} + +static void brw_wm_affine_st(struct brw_compile *p, int dw, int channel, int msg) +{ + int uv; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + uv = p->gen >= 60 ? 6 : 3; + uv += 2*channel; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + uv = p->gen >= 60 ? 4 : 3; + uv += channel; + } + + msg++; + if (p->gen >= 60) { + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv, 0), + brw_vec8_grf(2, 0)); + msg += dw/8; + + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv, 4), + brw_vec8_grf(2, 0)); + } else { + struct brw_reg r = brw_vec1_grf(uv, 0); + + brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); + msg += dw/8; + + brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); + } +} + +static inline unsigned simd(int dw) +{ + return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8; +} + +static inline struct brw_reg sample_result(int dw, int result) +{ + return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0, + BRW_REGISTER_TYPE_UW, + dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, + dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static void brw_wm_sample(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + struct brw_reg src0; + bool header; + int len; + + len = dw == 16 ? 4 : 2; + if (p->gen >= 60) { + header = false; + src0 = brw_message_reg(++msg); + } else { + header = true; + src0 = brw_vec8_grf(0, 0); + } + + brw_SAMPLE(p, sample_result(dw, result), msg, src0, + channel+1, channel, WRITEMASK_XYZW, 0, + 2*len, len+header, header, simd(dw)); +} + +static void brw_wm_sample__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + struct brw_reg src0; + int len; + + len = dw == 16 ? 4 : 2; + if (p->gen >= 60) + src0 = brw_message_reg(msg); + else + src0 = brw_vec8_grf(0, 0); + + brw_SAMPLE(p, sample_result(dw, result), msg, src0, + channel+1, channel, WRITEMASK_W, 0, + len/2, len+1, true, simd(dw)); +} + +static void brw_wm_affine(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_affine_st(p, dw, channel, msg); + brw_wm_sample(p, dw, channel, msg, result); +} + +static void brw_wm_affine__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_affine_st(p, dw, channel, msg); + brw_wm_sample__alpha(p, dw, channel, msg, result); +} + +static inline struct brw_reg null_result(int dw) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, 0, + BRW_REGISTER_TYPE_UW, + dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, + dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static void brw_fb_write(struct brw_compile *p, int dw) +{ + struct brw_instruction *insn; + unsigned msg_control, msg_type, msg_len; + struct brw_reg src0; + bool header; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; + msg_len = 8; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; + msg_len = 4; + } + + if (p->gen < 60) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + + msg_len += 2; + } + + /* The execution mask is ignored for render target writes. */ + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = 0; + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->gen >= 60) { + src0 = brw_message_reg(2); + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + header = false; + } else { + insn->header.destreg__conditionalmod = 0; + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + src0 = __retype_uw(brw_vec8_grf(0, 0)); + header = true; + } + + brw_set_dest(p, insn, null_result(dw)); + brw_set_src0(p, insn, src0); + brw_set_dp_write_message(p, insn, 0, + msg_control, msg_type, msg_len, + header, true, 0, true, false); +} + +static void brw_wm_write(struct brw_compile *p, int dw, int src) +{ + int n; + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + if (dw == 8 && p->gen >= 60) { + brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src, 0)); + brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0)); + goto done; + } + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MOV(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0)); + } else if (p->gen >= 45 && dw == 16) { + brw_MOV(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static inline struct brw_reg mask_a8(int nr) +{ + return brw_reg(BRW_GENERAL_REGISTER_FILE, + nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static void brw_wm_write__mask(struct brw_compile *p, + int dw, + int src, int mask) +{ + int n; + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + if (dw == 8 && p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src, 0), + mask_a8(mask)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src+2, 0), + mask_a8(mask)); + goto done; + } + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + } else if (p->gen >= 45 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0), + brw_vec8_grf(mask+1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static void brw_wm_write__mask_ca(struct brw_compile *p, + int dw, int src, int mask) +{ + int n; + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + if (dw == 8 && p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src + 2, 0), + brw_vec8_grf(mask + 2, 0)); + goto done; + } + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + } else if (p->gen >= 45 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n + 1, 0), + brw_vec8_grf(mask + 2*n + 1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +void +brw_wm_kernel__affine(struct brw_compile *p, int dispatch) +{ + int src = 12; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_affine(p, dispatch, 0, 1, src); + brw_wm_write(p, dispatch, src); +} + +void +brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch) +{ + int src = 12, mask = 20; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_affine(p, dispatch, 0, 1, src); + brw_wm_affine__alpha(p, dispatch, 1, 7, mask); + brw_wm_write__mask(p, dispatch, src, mask); +} + +void +brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch) +{ + int src = 12, mask = 20; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_affine(p, dispatch, 0, 1, src); + brw_wm_affine(p, dispatch, 1, 7, mask); + brw_wm_write__mask_ca(p, dispatch, src, mask); +} + +void +brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch) +{ + int src = 12, mask = 14; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_affine__alpha(p, dispatch, 0, 1, src); + brw_wm_affine(p, dispatch, 1, 7, mask); + brw_wm_write__mask(p, dispatch, mask, src); +} + +/* Projective variants */ + +static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int msg) +{ + int uv; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + uv = p->gen >= 60 ? 6 : 3; + uv += 2*channel; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + uv = p->gen >= 60 ? 4 : 3; + uv += channel; + } + + msg++; + if (p->gen >= 60) { + /* First compute 1/z */ + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv+1, 0), + brw_vec8_grf(2, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_PLN(p, + brw_vec8_grf(28, 0), + brw_vec1_grf(uv, 0), + brw_vec8_grf(2, 0)); + brw_MUL(p, + brw_message_reg(msg), + brw_vec8_grf(28, 0), + brw_vec8_grf(30, 0)); + msg += dw/8; + + brw_PLN(p, + brw_vec8_grf(28, 0), + brw_vec1_grf(uv, 0), + brw_vec8_grf(4, 0)); + brw_MUL(p, + brw_message_reg(msg), + brw_vec8_grf(28, 0), + brw_vec8_grf(30, 0)); + } else { + struct brw_reg r = brw_vec1_grf(uv, 0); + + /* First compute 1/z */ + brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + + /* Now compute the output s,t values */ + brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); + brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); + msg += dw/8; + + brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); + brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); + } +} + +static void brw_wm_projective(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_projective_st(p, dw, channel, msg); + brw_wm_sample(p, dw, channel, msg, result); +} + +static void brw_wm_projective__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_projective_st(p, dw, channel, msg); + brw_wm_sample__alpha(p, dw, channel, msg, result); +} + +void +brw_wm_kernel__projective(struct brw_compile *p, int dispatch) +{ + int src = 12; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_projective(p, dispatch, 0, 1, src); + brw_wm_write(p, dispatch, src); +} + +void +brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch) +{ + int src = 12, mask = 20; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_projective(p, dispatch, 0, 1, src); + brw_wm_projective__alpha(p, dispatch, 1, 7, mask); + brw_wm_write__mask(p, dispatch, src, mask); +} + +void +brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch) +{ + int src = 12, mask = 20; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_projective(p, dispatch, 0, 1, src); + brw_wm_projective(p, dispatch, 1,7, mask); + brw_wm_write__mask_ca(p, dispatch, src, mask); +} + +void +brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) +{ + int src = 12, mask = 14; + + if (p->gen < 60) + brw_wm_xy(p, dispatch); + brw_wm_projective__alpha(p, dispatch, 0, 1, src); + brw_wm_projective(p, dispatch, 1, 7, mask); + brw_wm_write__mask(p, dispatch, mask, src); +} diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index c292da1e..5b64efa1 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -179,8 +179,8 @@ static const struct wm_kernel_info { KERNEL(MASKCA, ps_kernel_maskca_affine, 3, 2), KERNEL(MASKCA_PROJECTIVE, ps_kernel_maskca_projective, 3, 2), - KERNEL(MASKCA_SRCALPHA, ps_kernel_maskca_srcalpha_affine, 3, 2), - KERNEL(MASKCA_SRCALPHA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, 3, 2), + KERNEL(MASKSA, ps_kernel_maskca_srcalpha_affine, 3, 2), + KERNEL(MASKSA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, 3, 2), KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7, 1), KERNEL(VIDEO_PACKED, ps_kernel_packed, 2, 1), @@ -431,7 +431,7 @@ gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) if (has_mask) { if (is_ca) { if (gen6_blend_op[op].src_alpha) - base = GEN6_WM_KERNEL_MASKCA_SRCALPHA; + base = GEN6_WM_KERNEL_MASKSA; else base = GEN6_WM_KERNEL_MASKCA; } else diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index d06b791d..ded22d5f 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -185,8 +185,8 @@ static const struct wm_kernel_info { KERNEL(MASKCA, ps_kernel_maskca_affine, 3), KERNEL(MASKCA_PROJECTIVE, ps_kernel_maskca_projective, 3), - KERNEL(MASKCA_SRCALPHA, ps_kernel_maskca_srcalpha_affine, 3), - KERNEL(MASKCA_SRCALPHA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, 3), + KERNEL(MASKSA, ps_kernel_maskca_srcalpha_affine, 3), + KERNEL(MASKSA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, 3), KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), @@ -437,7 +437,7 @@ gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) if (has_mask) { if (is_ca) { if (gen7_blend_op[op].src_alpha) - base = GEN7_WM_KERNEL_MASKCA_SRCALPHA; + base = GEN7_WM_KERNEL_MASKSA; else base = GEN7_WM_KERNEL_MASKCA; } else diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index e676b6a7..64e24122 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -13,6 +13,7 @@ struct sna; struct sna_glyph; struct sna_video; struct sna_video_frame; +struct brw_compile; struct sna_composite_rectangles { struct sna_coordinate { @@ -379,8 +380,8 @@ enum { GEN6_WM_KERNEL_MASKCA, GEN6_WM_KERNEL_MASKCA_PROJECTIVE, - GEN6_WM_KERNEL_MASKCA_SRCALPHA, - GEN6_WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + GEN6_WM_KERNEL_MASKSA, + GEN6_WM_KERNEL_MASKSA_PROJECTIVE, GEN6_WM_KERNEL_VIDEO_PLANAR, GEN6_WM_KERNEL_VIDEO_PACKED, @@ -428,8 +429,8 @@ enum { GEN7_WM_KERNEL_MASKCA, GEN7_WM_KERNEL_MASKCA_PROJECTIVE, - GEN7_WM_KERNEL_MASKCA_SRCALPHA, - GEN7_WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + GEN7_WM_KERNEL_MASKSA, + GEN7_WM_KERNEL_MASKSA_PROJECTIVE, GEN7_WM_KERNEL_VIDEO_PLANAR, GEN7_WM_KERNEL_VIDEO_PACKED, @@ -479,6 +480,14 @@ void *sna_static_stream_map(struct sna_static_stream *stream, uint32_t len, uint32_t align); uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, void *ptr); +unsigned sna_static_stream_compile_sf(struct sna *sna, + struct sna_static_stream *stream, + void (*compile)(struct brw_compile *)); + +unsigned sna_static_stream_compile_wm(struct sna *sna, + struct sna_static_stream *stream, + void (*compile)(struct brw_compile *, int), + int width); struct kgem_bo *sna_static_stream_fini(struct sna *sna, struct sna_static_stream *stream); diff --git a/src/sna/sna_stream.c b/src/sna/sna_stream.c index aab15498..66a8c461 100644 --- a/src/sna/sna_stream.c +++ b/src/sna/sna_stream.c @@ -27,6 +27,7 @@ #include "sna.h" #include "sna_render.h" +#include "brw/brw.h" int sna_static_stream_init(struct sna_static_stream *stream) { @@ -92,3 +93,40 @@ struct kgem_bo *sna_static_stream_fini(struct sna *sna, return bo; } + +unsigned +sna_static_stream_compile_sf(struct sna *sna, + struct sna_static_stream *stream, + void (*compile)(struct brw_compile *)) +{ + struct brw_compile p; + + brw_compile_init(&p, sna->kgem.gen, + sna_static_stream_map(stream, + 64*sizeof(uint32_t), 64)); + + compile(&p); + assert(p.nr_insn*sizeof(struct brw_instruction) <= 64*sizeof(uint32_t)); + + stream->used -= 64*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction); + return sna_static_stream_offsetof(stream, p.store); +} + +unsigned +sna_static_stream_compile_wm(struct sna *sna, + struct sna_static_stream *stream, + void (*compile)(struct brw_compile *, int), + int dispatch_width) +{ + struct brw_compile p; + + brw_compile_init(&p, sna->kgem.gen, + sna_static_stream_map(stream, + 256*sizeof(uint32_t), 64)); + + compile(&p, dispatch_width); + assert(p.nr_insn*sizeof(struct brw_instruction) <= 256*sizeof(uint32_t)); + + stream->used -= 256*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction); + return sna_static_stream_offsetof(stream, p.store); +} -- cgit v1.2.3