summaryrefslogtreecommitdiff
authorChris Wilson <chris@chris-wilson.co.uk>2011-04-08 06:17:14 (GMT)
committer Chris Wilson <chris@chris-wilson.co.uk>2011-06-04 08:19:46 (GMT)
commitbcef98af561939aa48d9236b2dfa2c5626adf4cb (patch) (side-by-side diff)
tree9d05558947a97595a6fdece968b50eeae45bbfb1
parent340cfb7f5271fd1df4c8948e5c9336f5b69a6e6c (diff)
downloadxf86-video-intel-bcef98af561939aa48d9236b2dfa2c5626adf4cb.zip
xf86-video-intel-bcef98af561939aa48d9236b2dfa2c5626adf4cb.tar.gz
sna: Introduce a new acceleration model.
The premise is that switching between rings (i.e. the BLT and RENDER rings) on SandyBridge imposes a large latency overhead whilst rendering. The cause is that in order to switch rings, we need to split the batch earlier than is desired and to add serialisation between the rings. Both of which incur large overhead. By switching to using a pure 3D blit engine (ok, not so pure as the BLT engine still has uses for the core drawing model which can not be easily represented without a combinatorial explosion of shaders) we can take advantage of additional efficiencies, such as relative relocations, that have been incorporated into recent hardware advances. However, even older hardware performs better from avoiding the implicit context switches and from the batching efficiency of the 3D pipeline... But this is X, and PolyGlyphBlt still exists and remains in use. So for the operations that are not worth accelerating in hardware, we introduce a shadow buffer mechanism through out and reintroduce pixmap migration. Doing this efficiently is the cornerstone of ensuring that we do exploit the increased potential of recent hardware for running old applications and environments (i.e. so that the latest and greatest chip is actually faster than gen2!) For the curious, sna is SandyBridge's New Acceleration. If you are running older chipsets and welcome the performance increase offered by this patch, then you may choose to call it Snazzy instead. Speedups ======== gen3 firefox-fishtank 1203584.56 (1203842.75 0.01%) -> 85561.71 (125146.44 14.87%): 14.07x speedup gen5 grads-heat-map 3385.42 (3489.73 1.44%) -> 350.29 (350.75 0.18%): 9.66x speedup gen3 xfce4-terminal-a1 4179.02 (4180.09 0.06%) -> 503.90 (531.88 4.48%): 8.29x speedup gen4 grads-heat-map 2458.66 (2826.34 4.64%) -> 348.82 (349.20 0.29%): 7.05x speedup gen3 grads-heat-map 1443.33 (1445.32 0.09%) -> 298.55 (298.76 0.05%): 4.83x speedup gen3 swfdec-youtube 3836.14 (3894.14 0.95%) -> 889.84 (979.56 5.99%): 4.31x speedup gen6 grads-heat-map 742.11 (744.44 0.15%) -> 172.51 (172.93 0.20%): 4.30x speedup gen3 firefox-talos-svg 71740.44 (72370.13 0.59%) -> 21959.29 (21995.09 0.68%): 3.27x speedup gen5 gvim 8045.51 (8071.47 0.17%) -> 2589.38 (3246.78 10.74%): 3.11x speedup gen6 poppler 3800.78 (3817.92 0.24%) -> 1227.36 (1230.12 0.30%): 3.10x speedup gen6 gnome-terminal-vim 9106.84 (9111.56 0.03%) -> 3459.49 (3478.52 0.25%): 2.63x speedup gen5 midori-zoomed 9564.53 (9586.58 0.17%) -> 3677.73 (3837.02 2.02%): 2.60x speedup gen5 gnome-terminal-vim 38167.25 (38215.82 0.08%) -> 14901.09 (14902.28 0.01%): 2.56x speedup gen5 poppler 13575.66 (13605.04 0.16%) -> 5554.27 (5555.84 0.01%): 2.44x speedup gen5 swfdec-giant-steps 8941.61 (8988.72 0.52%) -> 3851.98 (3871.01 0.93%): 2.32x speedup gen5 xfce4-terminal-a1 18956.60 (18986.90 0.07%) -> 8362.75 (8365.70 0.01%): 2.27x speedup gen5 firefox-fishtank 88750.31 (88858.23 0.14%) -> 39164.57 (39835.54 0.80%): 2.27x speedup gen3 midori-zoomed 2392.13 (2397.82 0.14%) -> 1109.96 (1303.10 30.35%): 2.16x speedup gen6 gvim 2510.34 (2513.34 0.20%) -> 1200.76 (1204.30 0.22%): 2.09x speedup gen5 firefox-planet-gnome 40478.16 (40565.68 0.09%) -> 19606.22 (19648.79 0.16%): 2.06x speedup gen5 gnome-system-monitor 10344.47 (10385.62 0.29%) -> 5136.69 (5256.85 1.15%): 2.01x speedup gen3 poppler 2595.23 (2603.10 0.17%) -> 1297.56 (1302.42 0.61%): 2.00x speedup gen6 firefox-talos-gfx 7184.03 (7194.97 0.13%) -> 3806.31 (3811.66 0.06%): 1.89x speedup gen5 evolution 8739.25 (8766.12 0.27%) -> 4817.54 (5050.96 1.54%): 1.81x speedup gen3 evolution 1684.06 (1696.88 0.35%) -> 1004.99 (1008.55 0.85%): 1.68x speedup gen3 gnome-terminal-vim 4285.13 (4287.68 0.04%) -> 2715.97 (3202.17 13.52%): 1.58x speedup gen5 swfdec-youtube 5843.94 (5951.07 0.91%) -> 3810.86 (3826.04 1.32%): 1.53x speedup gen4 poppler 7496.72 (7558.83 0.58%) -> 5125.08 (5247.65 1.44%): 1.46x speedup gen4 gnome-terminal-vim 21126.24 (21292.08 0.85%) -> 14590.25 (15066.33 1.80%): 1.45x speedup gen5 firefox-talos-svg 99873.69 (100300.95 0.37%) -> 70745.66 (70818.86 0.05%): 1.41x speedup gen4 firefox-planet-gnome 28205.10 (28304.45 0.27%) -> 19996.11 (20081.44 0.56%): 1.41x speedup gen5 firefox-talos-gfx 93070.85 (93194.72 0.10%) -> 67687.93 (70374.37 1.30%): 1.37x speedup gen4 evolution 6696.25 (6854.14 0.85%) -> 4958.62 (5027.73 0.85%): 1.35x speedup gen3 swfdec-giant-steps 2538.03 (2539.30 0.04%) -> 1895.71 (2050.62 62.43%): 1.34x speedup gen4 gvim 4356.18 (4422.78 0.70%) -> 3276.31 (3281.69 0.13%): 1.33x speedup gen6 evolution 1242.13 (1245.44 0.72%) -> 953.76 (954.54 0.07%): 1.30x speedup gen6 firefox-planet-gnome 4554.23 (4560.69 0.08%) -> 3758.76 (3768.97 0.28%): 1.21x speedup gen3 firefox-talos-gfx 6264.13 (6284.65 0.30%) -> 5261.56 (5370.87 1.28%): 1.19x speedup gen4 midori-zoomed 4771.13 (4809.90 0.73%) -> 4037.03 (4118.93 0.85%): 1.18x speedup gen6 swfdec-giant-steps 1557.06 (1560.13 0.12%) -> 1336.34 (1341.29 0.32%): 1.17x speedup gen4 firefox-talos-gfx 80767.28 (80986.31 0.17%) -> 69629.08 (69721.71 0.06%): 1.16x speedup gen6 midori-zoomed 1463.70 (1463.76 0.08%) -> 1331.45 (1336.56 0.22%): 1.10x speedup Slowdowns ========= gen6 xfce4-terminal-a1 2030.25 (2036.23 0.25%) -> 2144.60 (2240.31 4.29%): 1.06x slowdown gen4 swfdec-youtube 3580.00 (3597.23 3.92%) -> 3826.90 (3862.24 0.91%): 1.07x slowdown gen4 firefox-talos-svg 66112.25 (66256.51 0.11%) -> 71433.40 (71584.31 0.14%): 1.08x slowdown gen4 gnome-system-monitor 5691.60 (5724.03 0.56%) -> 6707.56 (6747.83 0.33%): 1.18x slowdown gen3 ocitysmap 3494.05 (3502.44 0.20%) -> 4321.99 (4524.42 2.78%): 1.24x slowdown gen4 ocitysmap 3628.42 (3641.66 9.37%) -> 5177.16 (5828.74 8.38%): 1.43x slowdown gen5 ocitysmap 4027.77 (4068.11 0.80%) -> 5748.26 (6282.25 7.38%): 1.43x slowdown gen6 ocitysmap 1401.61 (1402.24 0.40%) -> 2365.74 (2379.14 4.12%): 1.69x slowdown [Note the performance regression for ocitysmap comes from that we now attempt to support rendering to and (more importantly) from large surfaces. By enabling such operations is the only way to one day be faster than purely using the CPU, in the meantime we suffer regression due to the increased migration and aperture thrashing. The other couple of regressions will be eliminated with improved span and shader support, now that the framework for such is in place.] The performance increase for Cairo completely overlooks the other critical aspects of the architecture: World of Padman: gen3 (800x600): 57.5 -> 96.2 gen4 (800x600): 47.8 -> 74.6 gen6 (1366x768): 100.4 -> 140.3 [F15] 144.3 -> 146.4 [drm-intel-next] x11perf (gen6); aa10text: 3.47 -> 14.3 Mglyphs/s [unthrottled!] copywinwin10: 1.66 -> 1.99 Mops/s copywinpix10: 2.28 -> 2.98 Mops/s And we do not have a good measure for how much improvement the reworking of the fallback paths give, except that xterm is now over 4x faster... PS: This depends upon the Xorg patchset "Remove the cacheing of the last scratch PixmapRec" for correct invalidations of scratch Pixmaps (used by the dix to implement SHM operations, used by chromium and gtk+ pixbufs. PPS: ./configure --enable-sna Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--Makefile.am4
-rw-r--r--configure.ac70
-rw-r--r--man/intel.man7
-rw-r--r--src/Makefile.am5
-rw-r--r--src/intel_module.c70
-rw-r--r--src/sna/Makefile.am115
-rw-r--r--src/sna/README30
-rw-r--r--src/sna/blt.c73
-rw-r--r--src/sna/gen2_render.c1237
-rw-r--r--src/sna/gen2_render.h785
-rw-r--r--src/sna/gen3_render.c3694
-rw-r--r--src/sna/gen3_render.h1479
-rw-r--r--src/sna/gen4_render.c2762
-rw-r--r--src/sna/gen4_render.h2643
-rw-r--r--src/sna/gen5_render.c2841
-rw-r--r--src/sna/gen5_render.h2730
-rw-r--r--src/sna/gen6_render.c2860
-rw-r--r--src/sna/gen6_render.h1598
-rw-r--r--src/sna/kgem.c1775
-rw-r--r--src/sna/kgem.h332
-rw-r--r--src/sna/kgem_debug.c390
-rw-r--r--src/sna/kgem_debug.h28
-rw-r--r--src/sna/kgem_debug_gen3.c1615
-rw-r--r--src/sna/kgem_debug_gen4.c711
-rw-r--r--src/sna/kgem_debug_gen5.c687
-rw-r--r--src/sna/kgem_debug_gen6.c1099
-rw-r--r--src/sna/sna.h596
-rw-r--r--src/sna/sna_accel.c3306
-rw-r--r--src/sna/sna_blt.c1339
-rw-r--r--src/sna/sna_composite.c722
-rw-r--r--src/sna/sna_damage.c944
-rw-r--r--src/sna/sna_damage.h94
-rw-r--r--src/sna/sna_display.c1656
-rw-r--r--src/sna/sna_dri.c1446
-rw-r--r--src/sna/sna_driver.c925
-rw-r--r--src/sna/sna_glyphs.c1145
-rw-r--r--src/sna/sna_gradient.c335
-rw-r--r--src/sna/sna_io.c452
-rw-r--r--src/sna/sna_module.h3
-rw-r--r--src/sna/sna_reg.h108
-rw-r--r--src/sna/sna_render.c888
-rw-r--r--src/sna/sna_render.h511
-rw-r--r--src/sna/sna_render_inline.h102
-rw-r--r--src/sna/sna_stream.c99
-rw-r--r--src/sna/sna_tiling.c264
-rw-r--r--src/sna/sna_transform.c139
-rw-r--r--src/sna/sna_trapezoids.c2375
-rw-r--r--src/sna/sna_video.c737
-rw-r--r--src/sna/sna_video.h130
-rw-r--r--src/sna/sna_video_hwmc.c252
-rw-r--r--src/sna/sna_video_hwmc.h74
-rw-r--r--src/sna/sna_video_overlay.c731
-rw-r--r--src/sna/sna_video_textured.c428
-rw-r--r--test/.gitignore13
-rw-r--r--test/Makefile.am30
-rw-r--r--test/README3
-rw-r--r--test/basic-copyarea-size.c102
-rw-r--r--test/basic-copyarea.c301
-rw-r--r--test/basic-fillrect.c263
-rw-r--r--test/basic-putimage.c283
-rw-r--r--test/basic-stress.c155
-rw-r--r--test/mixed-stress.c208
-rw-r--r--test/render-composite-solid.c255
-rw-r--r--test/render-copyarea-size.c115
-rw-r--r--test/render-copyarea.c324
-rw-r--r--test/render-fill-copy.c279
-rw-r--r--test/render-fill.c247
-rw-r--r--test/render-trapezoid-image.c615
-rw-r--r--test/render-trapezoid.c434
-rw-r--r--test/test.h118
-rw-r--r--test/test_display.c150
-rw-r--r--test/test_image.c217
-rw-r--r--test/test_log.c17
-rw-r--r--test/test_render.c149
74 files changed, 53709 insertions, 10 deletions
diff --git a/Makefile.am b/Makefile.am
index 83948ab..29c04fd 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -23,6 +23,10 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS}
SUBDIRS = uxa src man
MAINTAINERCLEANFILES = ChangeLog INSTALL
+if HAVE_X11
+SUBDIRS += test
+endif
+
.PHONY: ChangeLog INSTALL
INSTALL:
diff --git a/configure.ac b/configure.ac
index 9449e56..b4b693e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -67,6 +67,9 @@ if test x"$udev" = xyes; then
AC_DEFINE(HAVE_UDEV,1,[Enable udev-based monitor hotplug detection])
fi
+PKG_CHECK_MODULES(X11, [x11 xrender xext pixman-1], [x11=yes], [x11=no])
+AM_CONDITIONAL(HAVE_X11, test x$x11 = xyes)
+
AH_TOP([#include "xorg-server.h"])
# Define a configure option for an alternate module directory
@@ -89,11 +92,23 @@ AC_ARG_ENABLE(kms-only, AS_HELP_STRING([--enable-kms-only],
[KMS_ONLY="$enableval"],
[KMS_ONLY=no])
+AC_ARG_ENABLE(sna,
+ AS_HELP_STRING([--enable-sna],
+ [Enable SandyBridge's New Acceleration (SNA) [options=default|gen2|gen3|ge4|gen5|gen6]]),
+ [SNA="$enableval"],
+ [SNA=no])
+
+AC_ARG_ENABLE(vmap,
+ AS_HELP_STRING([--enable-vmap],
+ [Enable use of vmap [default=no]]),
+ [VMAP="$enableval"],
+ [VMAP=no])
+
AC_ARG_ENABLE(debug,
AS_HELP_STRING([--enable-debug],
- [Enables internal debugging [[default=yes]]]),
+ [Enables internal debugging [default=no]]),
[DEBUG="$enableval"],
- [DEBUG=yes])
+ [DEBUG=no])
# Store the list of server defined optional extensions in REQUIRED_MODULES
XORG_DRIVER_CHECK_EXT(RANDR, randrproto)
@@ -165,9 +180,58 @@ if test "x$KMS_ONLY" = xyes; then
AC_DEFINE(KMS_ONLY,1,[Assume KMS support])
fi
+AM_CONDITIONAL(USE_VMAP, test x$VMAP = xyes)
+if test "x$VMAP" = xyes; then
+ AC_DEFINE(USE_VMAP,1,[Assume VMAP support])
+fi
+
+AM_CONDITIONAL(SNA, test x$SNA != xno)
+AM_CONDITIONAL(SNA_GEN2, [echo $SNA | grep -E -qsi '(yes)|(all)|(gen2)'])
+AM_CONDITIONAL(SNA_GEN3, [echo $SNA | grep -E -qsi '(yes)|(all)|(pnv)|(gen3)'])
+AM_CONDITIONAL(SNA_GEN4, [echo $SNA | grep -E -qsi '(yes)|(all)|(brw)|(gen4)'])
+AM_CONDITIONAL(SNA_GEN5, [echo $SNA | grep -E -qsi '(yes)|(all)|(ilk)|(gen5)'])
+AM_CONDITIONAL(SNA_GEN6, [echo $SNA | grep -E -qsi '(yes)|(all)|(snb)|(gen6)'])
+AC_MSG_CHECKING([whether to include SNA support])
+sna_drivers="no"
+if test "x$SNA" != xno; then
+ sna_drivers=""
+ AC_DEFINE(SNA,1,[Enable SandyBridge's New Architecture])
+ if echo $SNA | grep -E -qsi '(yes)|(default)'; then
+ AC_DEFINE(SNA_DEFAULT,1,[Enable SandyBridge's New Architecture by default])
+ sna_drivers="default $sna_drivers"
+ fi
+ if echo $SNA | grep -E -qsi '(yes)|(all)|(gen2)'; then
+ AC_DEFINE(SNA_GEN2,1,[Enable SandyBridge's New Architecture for GEN2])
+ sna_drivers="i8xx $sna_drivers"
+ fi
+ if echo $SNA | grep -E -qsi '(yes)|(all)|(pnv)|(gen3)'; then
+ AC_DEFINE(SNA_GEN3,1,[Enable SandyBridge's New Architecture for PNV])
+ sna_drivers="pnv $sna_drivers"
+ fi
+ if echo $SNA | grep -E -qsi '(yes)|(all)|(brw)|(gen4)'; then
+ AC_DEFINE(SNA_GEN4,1,[Enable SandyBridge's New Architecture for BRW])
+ sna_drivers="brw $sna_drivers"
+ fi
+ if echo $SNA | grep -E -qsi '(yes)|(all)|(ilk)|(gen5)'; then
+ AC_DEFINE(SNA_GEN5,1,[Enable SandyBridge's New Architecture for ILK])
+ sna_drivers="ilk $sna_drivers"
+ fi
+ if echo $SNA | grep -E -qsi '(yes)|(all)|(snb)|(gen6)'; then
+ AC_DEFINE(SNA_GEN6,1,[Enable SandyBridge's New Architecture for SNB])
+ sna_drivers="snb $sna_drivers"
+ fi
+fi
+AC_MSG_RESULT([$sna_drivers])
+
+AM_CONDITIONAL(DEBUG, test x$DEBUG = xyes)
if test "x$DEBUG" = xno; then
AC_DEFINE(NDEBUG,1,[Disable internal debugging])
fi
+if test "x$DEBUG" = xyes; then
+ AC_DEFINE(HAS_EXTRA_DEBUG,1,[Enable additional debugging])
+fi
+
+AC_CHECK_HEADERS([sys/timerfd.h])
DRIVER_NAME=intel
AC_SUBST([DRIVER_NAME])
@@ -184,7 +248,9 @@ AC_CONFIG_FILES([
src/legacy/Makefile
src/legacy/i810/Makefile
src/legacy/i810/xvmc/Makefile
+ src/sna/Makefile
man/Makefile
src/render_program/Makefile
+ test/Makefile
])
AC_OUTPUT
diff --git a/man/intel.man b/man/intel.man
index 85e2b2e..ab46db2 100644
--- a/man/intel.man
+++ b/man/intel.man
@@ -210,6 +210,13 @@ User should provide absolute path to libIntelXvMC.so in XvMCConfig file.
.IP
Default: Disabled.
.TP
+.BI "Option \*qThrottle\*q \*q" boolean \*q
+This option controls whether the driver periodically waits for pending
+drawing operations to complete. Throttling ensures that the GPU does not
+lag too far behind the CPU and thus noticeable delays in user responsible at
+the cost of throughput performance.
+.IP
+Default: enabled.
.BI "Option \*qHotPlug\*q \*q" boolean \*q
This option controls whether the driver automatically notifies
applications when monitors are connected or disconnected.
diff --git a/src/Makefile.am b/src/Makefile.am
index abb03c3..a7f219c 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -35,6 +35,11 @@ intel_drv_ladir = @moduledir@/drivers
intel_drv_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ -ldrm_intel ../uxa/libuxa.la legacy/liblegacy.la
intel_drv_la_LIBADD += @PCIACCESS_LIBS@
+if SNA
+SUBDIRS += sna
+intel_drv_la_LIBADD += sna/libsna.la
+endif
+
NULL:=#
intel_drv_la_SOURCES = \
diff --git a/src/intel_module.c b/src/intel_module.c
index 9468e72..9b1da49 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -36,6 +36,7 @@
#include "intel.h"
#include "intel_driver.h"
#include "legacy/legacy.h"
+#include "sna/sna_module.h"
#include <xf86drmMode.h>
@@ -320,22 +321,49 @@ static Bool intel_pci_probe(DriverPtr driver,
scrn->name = INTEL_NAME;
scrn->Probe = NULL;
-#if KMS_ONLY
- intel_init_scrn(scrn);
-#else
switch (DEVICE_ID(device)) {
+#if !KMS_ONLY
case PCI_CHIP_I810:
case PCI_CHIP_I810_DC100:
case PCI_CHIP_I810_E:
case PCI_CHIP_I815:
lg_i810_init(scrn);
break;
+#endif
+#if SNA
+ case 0:
+#if SNA_GEN3
+ case PCI_CHIP_PINEVIEW_M:
+ case PCI_CHIP_PINEVIEW_G:
+ case PCI_CHIP_G33_G:
+ case PCI_CHIP_Q35_G:
+ case PCI_CHIP_Q33_G:
+#endif
+#if SNA_GEN5
+ case PCI_CHIP_IRONLAKE_D_G:
+ case PCI_CHIP_IRONLAKE_M_G:
+#endif
+#if SNA_GEN6
+ case PCI_CHIP_SANDYBRIDGE_GT1:
+ case PCI_CHIP_SANDYBRIDGE_GT2:
+ case PCI_CHIP_SANDYBRIDGE_GT2_PLUS:
+ case PCI_CHIP_SANDYBRIDGE_M_GT1:
+ case PCI_CHIP_SANDYBRIDGE_M_GT2:
+ case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS:
+ case PCI_CHIP_SANDYBRIDGE_S_GT:
+#endif
+ sna_init_scrn(scrn);
+ break;
+#endif
default:
+#if SNA_DEFAULT
+ sna_init_scrn(scrn);
+#else
intel_init_scrn(scrn);
+#endif
break;
}
-#endif
}
return scrn != NULL;
}
@@ -360,20 +388,46 @@ static XF86ModuleVersionInfo intel_version = {
static const OptionInfoRec *
intel_available_options(int chipid, int busid)
{
-#if KMS_ONLY
- return intel_uxa_available_options(chipid, busid);
-#else
switch (chipid) {
+#if !KMS_ONLY
case PCI_CHIP_I810:
case PCI_CHIP_I810_DC100:
case PCI_CHIP_I810_E:
case PCI_CHIP_I815:
return lg_i810_available_options(chipid, busid);
+#endif
+#if SNA
+ case 0:
+#if SNA_GEN3
+ case PCI_CHIP_PINEVIEW_M:
+ case PCI_CHIP_PINEVIEW_G:
+ case PCI_CHIP_G33_G:
+ case PCI_CHIP_Q35_G:
+ case PCI_CHIP_Q33_G:
+#endif
+#if SNA_GEN5
+ case PCI_CHIP_IRONLAKE_D_G:
+ case PCI_CHIP_IRONLAKE_M_G:
+#endif
+#if SNA_GEN6
+ case PCI_CHIP_SANDYBRIDGE_GT1:
+ case PCI_CHIP_SANDYBRIDGE_GT2:
+ case PCI_CHIP_SANDYBRIDGE_GT2_PLUS:
+ case PCI_CHIP_SANDYBRIDGE_M_GT1:
+ case PCI_CHIP_SANDYBRIDGE_M_GT2:
+ case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS:
+ case PCI_CHIP_SANDYBRIDGE_S_GT:
+#endif
+ return sna_available_options(chipid, busid);
+#endif
default:
+#if SNA_DEFAULT
+ return sna_available_options(chipid, busid);
+#else
return intel_uxa_available_options(chipid, busid);
- }
#endif
+ }
}
static DriverRec intel = {
diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am
new file mode 100644
index 0000000..f65b281
--- a/dev/null
+++ b/src/sna/Makefile.am
@@ -0,0 +1,115 @@
+# Copyright 2005 Adam Jackson.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# on the rights to use, copy, modify, merge, publish, distribute, sub
+# license, and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+AM_CFLAGS = @CWARNFLAGS@ @XORG_CFLAGS@ @UDEV_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \
+ -I$(top_srcdir)/src -I$(top_srcdir)/uxa -I$(top_srcdir)/src/render_program
+
+noinst_LTLIBRARIES = libsna.la
+libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@
+
+NULL:=#
+
+libsna_la_SOURCES = \
+ blt.c \
+ kgem.c \
+ kgem.h \
+ sna.h \
+ sna_accel.c \
+ sna_blt.c \
+ sna_composite.c \
+ sna_damage.c \
+ snd_damage.h \
+ sna_display.c \
+ sna_driver.c \
+ sna_driver.h \
+ sna_glyphs.c \
+ sna_gradient.c \
+ sna_io.c \
+ sna_render.c \
+ sna_render.h \
+ sna_render_inline.h \
+ sna_reg.h \
+ sna_stream.c \
+ sna_trapezoids.c \
+ sna_tiling.c \
+ sna_transform.c \
+ sna_video.c \
+ sna_video.h \
+ sna_video_overlay.c \
+ sna_video_textured.c \
+ $(NULL)
+
+if SNA_GEN2
+libsna_la_SOURCES += \
+ gen2_render.c \
+ gen2_render.h \
+ $(NULL)
+endif
+if SNA_GEN3
+libsna_la_SOURCES += \
+ gen3_render.c \
+ gen3_render.h \
+ $(NULL)
+endif
+if SNA_GEN4
+libsna_la_SOURCES += \
+ gen4_render.c \
+ gen4_render.h \
+ $(NULL)
+endif
+if SNA_GEN5
+libsna_la_SOURCES += \
+ gen5_render.c \
+ gen5_render.h \
+ $(NULL)
+endif
+if SNA_GEN6
+libsna_la_SOURCES += \
+ gen6_render.c \
+ gen6_render.h \
+ $(NULL)
+endif
+
+if DRI
+libsna_la_SOURCES += \
+ sna_dri.c \
+ $(NULL)
+libsna_la_LIBADD += \
+ $(DRI_LIBS) \
+ $(NULL)
+endif
+
+if XVMC
+libsna_la_SOURCES += \
+ sna_video_hwmc.h \
+ sna_video_hwmc.c \
+ $(NULL)
+endif
+
+if DEBUG
+libsna_la_SOURCES += \
+ kgem_debug.c \
+ kgem_debug.h \
+ kgem_debug_gen3.c \
+ kgem_debug_gen4.c \
+ kgem_debug_gen5.c \
+ kgem_debug_gen6.c \
+ $(NULL)
+endif
diff --git a/src/sna/README b/src/sna/README
new file mode 100644
index 0000000..fd847de
--- a/dev/null
+++ b/src/sna/README
@@ -0,0 +1,30 @@
+SandyBridge's New Acceleration
+------------------------------
+
+The guiding principle behind the design is to avoid GPU context switches.
+On SandyBridge (and beyond), these are especially pernicious because the
+RENDER and BLT engine are now on different rings and require
+synchronisation of the various execution units when switching contexts.
+They were not cheap on early generation, but with the increasing
+complexity of the GPU, avoiding such serialisations is important.
+
+Furthermore, we try very hard to avoid migrating between the CPU and GPU.
+Every pixmap (apart from temporary "scratch" surfaces which we intend to
+use on the GPU) is created in system memory. All operations are then done
+upon this shadow copy until we are forced to move it onto the GPU. Such
+migration can only be first triggered by: setting the pixmap as the
+scanout (we obviously need a GPU buffer here), using the pixmap as a DRI
+buffer (the client expects to perform hardware acceleration and we do not
+want to disappoint) and lastly using the pixmap as a RENDER target. This
+last is chosen because when we know we are going to perform hardware
+acceleration and will continue to do so without fallbacks, using the GPU
+is much, much faster than the CPU. The heuristic I chose therefore was
+that if the application uses RENDER, i.e. cairo, then it will only be
+using those paths and not intermixing core drawing operations and so
+unlikely to trigger a fallback.
+
+The complicating case is front-buffer rendering. So in order to accommodate
+using RENDER on an application whilst running xterm without a composite
+manager redirecting all the pixmaps to backing surfaces, we have to
+perform damage tracking to avoid excess migration of portions of the
+buffer.
diff --git a/src/sna/blt.c b/src/sna/blt.c
new file mode 100644
index 0000000..ac20372
--- a/dev/null
+++ b/src/sna/blt.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+
+#if DEBUG_BLT
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+void
+memcpy_blt(const void *src, void *dst, int bpp,
+ uint16_t src_stride, uint16_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ uint8_t *src_bytes;
+ uint8_t *dst_bytes;
+
+ assert(width && height);
+ assert(bpp >= 8);
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ bpp /= 8;
+ width *= bpp;
+
+ src_bytes = (uint8_t *)src + src_stride * src_y + src_x * bpp;
+ dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * bpp;
+
+ if (width == src_stride && width == dst_stride) {
+ memcpy(dst_bytes, src_bytes, width * height);
+ return;
+ }
+
+ do {
+ memcpy(dst_bytes, src_bytes, width);
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+}
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
new file mode 100644
index 0000000..896f730
--- a/dev/null
+++ b/src/sna/gen2_render.c
@@ -0,0 +1,1237 @@
+/*
+ * Copyright © 2006,2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Eric Anholt <eric@anholt.net>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_reg.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+
+#include "gen2_render.h"
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define OUT_BATCH(v) batch_emit(sna, v)
+#define OUT_BATCH_F(v) batch_emit_float(sna, v)
+#define OUT_VERTEX(v) batch_emit_float(sna, v)
+
+static const struct blendinfo {
+ Bool dst_alpha;
+ Bool src_alpha;
+ uint32_t src_blend;
+ uint32_t dst_blend;
+} gen2_blend_op[] = {
+ /* Clear */
+ {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO},
+ /* Src */
+ {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO},
+ /* Dst */
+ {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE},
+ /* Over */
+ {0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA},
+ /* OverReverse */
+ {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE},
+ /* In */
+ {1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO},
+ /* InReverse */
+ {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA},
+ /* Out */
+ {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO},
+ /* OutReverse */
+ {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA},
+ /* Atop */
+ {1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
+ /* AtopReverse */
+ {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA},
+ /* Xor */
+ {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
+ /* Add */
+ {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE},
+};
+
+static const struct formatinfo {
+ int fmt;
+ uint32_t card_fmt;
+} i8xx_tex_formats[] = {
+ {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8},
+ {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888},
+ {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888},
+ {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565},
+ {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555},
+ {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444},
+}, i85x_tex_formats[] = {
+ {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888},
+ {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
+};
+
+static inline uint32_t
+gen2_buf_tiling(uint32_t tiling)
+{
+ uint32_t v = 0;
+ switch (tiling) {
+ case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
+ case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
+ case I915_TILING_NONE: break;
+ }
+ return v;
+}
+
+static uint32_t
+gen2_get_dst_format(uint32_t format)
+{
+#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8)
+ switch (format) {
+ default:
+ assert(0);
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ return COLR_BUF_ARGB8888 | BIAS;
+ case PICT_r5g6b5:
+ return COLR_BUF_RGB565 | BIAS;
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ return COLR_BUF_ARGB1555 | BIAS;
+ case PICT_a8:
+ return COLR_BUF_8BIT | BIAS;
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return COLR_BUF_ARGB4444 | BIAS;
+ }
+#undef BIAS
+}
+
+static Bool
+gen2_check_dst_format(uint32_t format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_r5g6b5:
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static uint32_t
+gen2_get_card_format(struct sna *sna, uint32_t format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) {
+ if (i8xx_tex_formats[i].fmt == format)
+ return i8xx_tex_formats[i].card_fmt;
+ }
+
+ if (!(IS_I830(sna) || IS_845G(sna))) {
+ for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) {
+ if (i85x_tex_formats[i].fmt == format)
+ return i85x_tex_formats[i].card_fmt;
+ }
+ }
+
+ assert(0);
+ return 0;
+}
+
+static Bool
+gen2_check_card_format(struct sna *sna, uint32_t format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) {
+ if (i8xx_tex_formats[i].fmt == format)
+ return TRUE;
+ }
+
+ if (!(IS_I830(sna) || IS_845G(sna))) {
+ for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) {
+ if (i85x_tex_formats[i].fmt == format)
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static uint32_t
+gen2_sampler_tiling_bits(uint32_t tiling)
+{
+ uint32_t bits = 0;
+ switch (tiling) {
+ default:
+ assert(0);
+ case I915_TILING_Y:
+ bits |= TM0S1_TILE_WALK;
+ case I915_TILING_X:
+ bits |= TM0S1_TILED_SURFACE;
+ case I915_TILING_NONE:
+ break;
+ }
+ return bits;
+}
+
+static Bool
+gen2_check_filter(PicturePtr picture)
+{
+ switch (picture->filter) {
+ case PictFilterNearest:
+ case PictFilterBilinear:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static Bool
+gen2_check_repeat(PicturePtr picture)
+{
+ if (!picture->repeat)
+ return TRUE;
+
+ switch (picture->repeatType) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static void
+gen2_emit_texture(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int unit)
+{
+ uint32_t filter;
+ uint32_t wrap_mode;
+ uint32_t texcoordtype;
+
+ if (channel->is_affine)
+ texcoordtype = TEXCOORDTYPE_CARTESIAN;
+ else
+ texcoordtype = TEXCOORDTYPE_HOMOGENEOUS;
+
+ switch (channel->repeat) {
+ default:
+ assert(0);
+ case RepeatNone:
+ wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
+ break;
+ case RepeatNormal:
+ wrap_mode = TEXCOORDMODE_WRAP;
+ break;
+ case RepeatPad:
+ wrap_mode = TEXCOORDMODE_CLAMP;
+ break;
+ case RepeatReflect:
+ wrap_mode = TEXCOORDMODE_MIRROR;
+ break;
+ }
+
+ switch (channel->filter) {
+ default:
+ assert(0);
+ case PictFilterNearest:
+ filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT |
+ FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT);
+ break;
+ case PictFilterBilinear:
+ filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT |
+ FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT);
+ break;
+ }
+ filter |= MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT;
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+ LOAD_TEXTURE_MAP(unit) | 4);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ channel->bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ 0));
+ OUT_BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) |
+ ((channel->width - 1) << TM0S1_WIDTH_SHIFT) |
+ gen2_get_card_format(sna, channel->pict_format) |
+ gen2_sampler_tiling_bits(channel->bo->tiling));
+ OUT_BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D);
+ OUT_BATCH(filter);
+ OUT_BATCH(0); /* default color */
+ OUT_BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) |
+ ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL |
+ texcoordtype |
+ ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode) |
+ ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode));
+ /* map texel stream */
+ OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD);
+ if (unit == 0)
+ OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) |
+ TEXBIND_SET1(TEXCOORDSRC_KEEP) |
+ TEXBIND_SET2(TEXCOORDSRC_KEEP) |
+ TEXBIND_SET3(TEXCOORDSRC_KEEP));
+ else
+ OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) |
+ TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
+ TEXBIND_SET2(TEXCOORDSRC_KEEP) |
+ TEXBIND_SET3(TEXCOORDSRC_KEEP));
+ OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+ (unit << 16) |
+ DISABLE_TEX_STREAM_BUMP |
+ ENABLE_TEX_STREAM_COORD_SET |
+ TEX_STREAM_COORD_SET(unit) |
+ ENABLE_TEX_STREAM_MAP_IDX |
+ TEX_STREAM_MAP_IDX(unit));
+}
+
+static void
+gen2_get_blend_factors(const struct sna_composite_op *op,
+ uint32_t *c_out,
+ uint32_t *a_out)
+{
+ uint32_t cblend, ablend;
+
+ /* If component alpha is active in the mask and the blend operation
+ * uses the source alpha, then we know we don't need the source
+ * value (otherwise we would have hit a fallback earlier), so we
+ * provide the source alpha (src.A * mask.X) as output color.
+ * Conversely, if CA is set and we don't need the source alpha, then
+ * we produce the source value (src.X * mask.X) and the source alpha
+ * is unused.. Otherwise, we provide the non-CA source value
+ * (src.X * mask.A).
+ *
+ * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8
+ * pictures, but we need to implement it for 830/845 and there's no
+ * harm done in leaving it in.
+ */
+ cblend =
+ TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULE |
+ TB0C_OUTPUT_WRITE_CURRENT;
+ ablend =
+ TB0A_RESULT_SCALE_1X | TB0A_OP_MODULE |
+ TB0A_OUTPUT_WRITE_CURRENT;
+
+ /* Get the source picture's channels into TBx_ARG1 */
+ if ((op->has_component_alpha && gen2_blend_op[op->op].src_alpha) ||
+ op->dst.format == PICT_a8) {
+ /* Producing source alpha value, so the first set of channels
+ * is src.A instead of src.X. We also do this if the destination
+ * is a8, in which case src.G is what's written, and the other
+ * channels are ignored.
+ */
+ ablend |= TB0A_ARG1_SEL_TEXEL0;
+ cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA;
+ } else {
+ if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
+ cblend |= TB0C_ARG1_SEL_TEXEL0;
+ else
+ cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */
+ ablend |= TB0A_ARG1_SEL_TEXEL0;
+ }
+
+ if (op->mask.bo) {
+ cblend |= TB0C_ARG2_SEL_TEXEL1;
+ if (op->dst.format == PICT_a8 || op->has_component_alpha)
+ cblend |= TB0C_ARG2_REPLICATE_ALPHA;
+ ablend |= TB0A_ARG2_SEL_TEXEL1;
+ } else {
+ cblend |= TB0C_ARG2_SEL_ONE;
+ ablend |= TB0A_ARG2_SEL_ONE;
+ }
+
+ *c_out = cblend;
+ *a_out = ablend;
+}
+
+static uint32_t gen2_get_blend_cntl(int op,
+ Bool has_component_alpha,
+ uint32_t dst_format)
+{
+ uint32_t sblend, dblend;
+
+ sblend = gen2_blend_op[op].src_blend;
+ dblend = gen2_blend_op[op].dst_blend;
+
+ /* If there's no dst alpha channel, adjust the blend op so that
+ * we'll treat it as always 1.
+ */
+ if (PICT_FORMAT_A(dst_format) == 0 && gen2_blend_op[op].dst_alpha) {
+ if (sblend == BLENDFACTOR_DST_ALPHA)
+ sblend = BLENDFACTOR_ONE;
+ else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
+ sblend = BLENDFACTOR_ZERO;
+ }
+
+ /* If the source alpha is being used, then we should only be in a case
+ * where the source blend factor is 0, and the source blend value is
+ * the mask channels multiplied by the source picture's alpha.
+ */
+ if (has_component_alpha && gen2_blend_op[op].src_alpha) {
+ if (dblend == BLENDFACTOR_SRC_ALPHA)
+ dblend = BLENDFACTOR_SRC_COLR;
+ else if (dblend == BLENDFACTOR_INV_SRC_ALPHA)
+ dblend = BLENDFACTOR_INV_SRC_COLR;
+ }
+
+ return (sblend << S8_SRC_BLEND_FACTOR_SHIFT |
+ dblend << S8_DST_BLEND_FACTOR_SHIFT);
+}
+
+static void gen2_emit_invariant(struct sna *sna)
+{
+ OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0));
+ OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1));
+ OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(2));
+ OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(3));
+
+ OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_FOG_MODE_CMD);
+ OUT_BATCH(FOGFUNC_ENABLE |
+ FOG_LINEAR_CONST | FOGSRC_INDEX_Z | ENABLE_FOG_DENSITY);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+ MAP_UNIT(0) |
+ DISABLE_TEX_STREAM_BUMP |
+ ENABLE_TEX_STREAM_COORD_SET |
+ TEX_STREAM_COORD_SET(0) |
+ ENABLE_TEX_STREAM_MAP_IDX |
+ TEX_STREAM_MAP_IDX(0));
+ OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+ MAP_UNIT(1) |
+ DISABLE_TEX_STREAM_BUMP |
+ ENABLE_TEX_STREAM_COORD_SET |
+ TEX_STREAM_COORD_SET(1) |
+ ENABLE_TEX_STREAM_MAP_IDX |
+ TEX_STREAM_MAP_IDX(1));
+ OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+ MAP_UNIT(2) |
+ DISABLE_TEX_STREAM_BUMP |
+ ENABLE_TEX_STREAM_COORD_SET |
+ TEX_STREAM_COORD_SET(2) |
+ ENABLE_TEX_STREAM_MAP_IDX |
+ TEX_STREAM_MAP_IDX(2));
+ OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+ MAP_UNIT(3) |
+ DISABLE_TEX_STREAM_BUMP |
+ ENABLE_TEX_STREAM_COORD_SET |
+ TEX_STREAM_COORD_SET(3) |
+ ENABLE_TEX_STREAM_MAP_IDX |
+ TEX_STREAM_MAP_IDX(3));
+
+ OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+ OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0));
+ OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+ OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(1));
+ OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+ OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(2));
+ OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+ OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3));
+
+ OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ ENABLE_TRI_STRIP_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2));
+
+ OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+ OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM);
+ OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
+
+ OUT_BATCH(_3DSTATE_W_STATE_CMD);
+ OUT_BATCH(MAGIC_W_STATE_DWORD1);
+ OUT_BATCH_F(1.0);
+
+ OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD);
+ OUT_BATCH(0x80808080); /* .5 required in alpha for GL_DOT3_RGBA_EXT */
+
+ OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD);
+ OUT_BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) |
+ TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
+ TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
+ TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
+
+ /* copy from mesa */
+ OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD |
+ DISABLE_INDPT_ALPHA_BLEND |
+ ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD);
+
+ OUT_BATCH(_3DSTATE_FOG_COLOR_CMD |
+ FOG_COLOR_RED(0) | FOG_COLOR_GREEN(0) | FOG_COLOR_BLUE(0));
+
+ OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_MODES_1_CMD |
+ ENABLE_COLR_BLND_FUNC |
+ BLENDFUNC_ADD |
+ ENABLE_SRC_BLND_FACTOR |
+ SRC_BLND_FACT(BLENDFACTOR_ONE) |
+ ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO));
+ OUT_BATCH(_3DSTATE_MODES_2_CMD |
+ ENABLE_GLOBAL_DEPTH_BIAS |
+ GLOBAL_DEPTH_BIAS(0) |
+ ENABLE_ALPHA_TEST_FUNC |
+ ALPHA_TEST_FUNC(0) | /* always */
+ ALPHA_REF_VALUE(0));
+ OUT_BATCH(_3DSTATE_MODES_3_CMD |
+ ENABLE_DEPTH_TEST_FUNC |
+ DEPTH_TEST_FUNC(0x2) | /* COMPAREFUNC_LESS */
+ ENABLE_ALPHA_SHADE_MODE | ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) |
+ ENABLE_FOG_SHADE_MODE | FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
+ ENABLE_SPEC_SHADE_MODE | SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
+ ENABLE_COLOR_SHADE_MODE | COLOR_SHADE_MODE(SHADE_MODE_LINEAR) |
+ ENABLE_CULL_MODE | CULLMODE_NONE);
+
+ OUT_BATCH(_3DSTATE_MODES_4_CMD |
+ ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+ ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff) |
+ ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff));
+
+ OUT_BATCH(_3DSTATE_STENCIL_TEST_CMD |
+ ENABLE_STENCIL_PARMS |
+ STENCIL_FAIL_OP(0) | /* STENCILOP_KEEP */
+ STENCIL_PASS_DEPTH_FAIL_OP(0) | /* STENCILOP_KEEP */
+ STENCIL_PASS_DEPTH_PASS_OP(0) | /* STENCILOP_KEEP */
+ ENABLE_STENCIL_TEST_FUNC | STENCIL_TEST_FUNC(0) | /* COMPAREFUNC_ALWAYS */
+ ENABLE_STENCIL_REF_VALUE | STENCIL_REF_VALUE(0));
+
+ OUT_BATCH(_3DSTATE_MODES_5_CMD |
+ FLUSH_TEXTURE_CACHE |
+ ENABLE_SPRITE_POINT_TEX | SPRITE_POINT_TEX_OFF |
+ ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(0x2) | /* 1.0 */
+ ENABLE_FIXED_POINT_WIDTH | FIXED_POINT_WIDTH(1));
+
+ OUT_BATCH(_3DSTATE_ENABLES_1_CMD |
+ DISABLE_LOGIC_OP |
+ DISABLE_STENCIL_TEST |
+ DISABLE_DEPTH_BIAS |
+ DISABLE_SPEC_ADD |
+ DISABLE_FOG |
+ DISABLE_ALPHA_TEST |
+ ENABLE_COLOR_BLEND |
+ DISABLE_DEPTH_TEST);
+ OUT_BATCH(_3DSTATE_ENABLES_2_CMD |
+ DISABLE_STENCIL_WRITE |
+ ENABLE_TEX_CACHE |
+ DISABLE_DITHER |
+ ENABLE_COLOR_MASK |
+ ENABLE_COLOR_WRITE |
+ DISABLE_DEPTH_WRITE);
+
+ OUT_BATCH(_3DSTATE_STIPPLE);
+
+ /* Set default blend state */
+ OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
+ TEXPIPE_COLOR |
+ ENABLE_TEXOUTPUT_WRT_SEL | TEXOP_OUTPUT_CURRENT |
+ DISABLE_TEX_CNTRL_STAGE |
+ TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS |
+ TEXOP_LAST_STAGE | TEXBLENDOP_ARG1);
+ OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
+ TEXPIPE_ALPHA |
+ ENABLE_TEXOUTPUT_WRT_SEL | TEXOP_OUTPUT_CURRENT |
+ TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
+ OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
+ TEXPIPE_COLOR |
+ TEXBLEND_ARG1 |
+ TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE);
+ OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
+ TEXPIPE_ALPHA |
+ TEXBLEND_ARG1 |
+ TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE);
+
+ OUT_BATCH(_3DSTATE_AA_CMD |
+ AA_LINE_ECAAR_WIDTH_ENABLE |
+ AA_LINE_ECAAR_WIDTH_1_0 |
+ AA_LINE_REGION_WIDTH_ENABLE |
+ AA_LINE_REGION_WIDTH_1_0 | AA_LINE_DISABLE);
+
+ sna->render_state.gen2.need_invariant = FALSE;
+}
+
+static void
+gen2_get_batch(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ if (!kgem_check_batch(&sna->kgem, 50)) {
+ DBG(("%s: flushing batch: size %d > %d\n",
+ __FUNCTION__, 50,
+ sna->kgem.surface-sna->kgem.nbatch));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nreloc + 3 > KGEM_RELOC_SIZE(&sna->kgem)) {
+ DBG(("%s: flushing batch: reloc %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nreloc + 3,
+ (int)KGEM_RELOC_SIZE(&sna->kgem)));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nexec + 3 > KGEM_EXEC_SIZE(&sna->kgem)) {
+ DBG(("%s: flushing batch: exec %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nexec + 1,
+ (int)KGEM_EXEC_SIZE(&sna->kgem)));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen2.need_invariant)
+ gen2_emit_invariant(sna);
+}
+
+static void gen2_emit_composite_state(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t texcoordfmt;
+ uint32_t cblend, ablend;
+
+ gen2_get_batch(sna, op);
+
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(BUF_3D_ID_COLOR_BACK |
+ gen2_buf_tiling(op->dst.bo->tiling) |
+ BUF_3D_PITCH(op->dst.bo->pitch));
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ op->dst.bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER,
+ 0));
+
+ OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+ OUT_BATCH(gen2_get_dst_format(op->dst.format));
+
+ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* ymin, xmin */
+ OUT_BATCH(DRAW_YMAX(op->dst.height - 1) |
+ DRAW_XMAX(op->dst.width - 1));
+ OUT_BATCH(0); /* yorig, xorig */
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
+ OUT_BATCH((1 + (op->mask.bo != NULL)) << 12);
+ OUT_BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
+ OUT_BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
+ gen2_get_blend_cntl(op->op,
+ op->has_component_alpha,
+ op->dst.format) |
+ S8_ENABLE_COLOR_BUFFER_WRITE);
+
+ OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | DISABLE_INDPT_ALPHA_BLEND);
+
+ gen2_get_blend_factors(op, &cblend, &ablend);
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+ LOAD_TEXTURE_BLEND_STAGE(0) | 1);
+ OUT_BATCH(cblend);
+ OUT_BATCH(ablend);
+
+ OUT_BATCH(_3DSTATE_ENABLES_1_CMD | DISABLE_LOGIC_OP |
+ DISABLE_STENCIL_TEST | DISABLE_DEPTH_BIAS |
+ DISABLE_SPEC_ADD | DISABLE_FOG | DISABLE_ALPHA_TEST |
+ ENABLE_COLOR_BLEND | DISABLE_DEPTH_TEST);
+ /* We have to explicitly say we don't want write disabled */
+ OUT_BATCH(_3DSTATE_ENABLES_2_CMD | ENABLE_COLOR_MASK |
+ DISABLE_STENCIL_WRITE | ENABLE_TEX_CACHE |
+ DISABLE_DITHER | ENABLE_COLOR_WRITE | DISABLE_DEPTH_WRITE);
+
+ texcoordfmt = 0;
+ if (op->src.is_affine)
+ texcoordfmt |= TEXCOORDFMT_2D << 0;
+ else
+ texcoordfmt |= TEXCOORDFMT_3D << 0;
+ if (op->mask.bo) {
+ if (op->mask.is_affine)
+ texcoordfmt |= TEXCOORDFMT_2D << 2;
+ else
+ texcoordfmt |= TEXCOORDFMT_3D << 2;
+ }
+ OUT_BATCH(_3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt);
+
+ gen2_emit_texture(sna, &op->src, 0);
+ if (op->mask.bo)
+ gen2_emit_texture(sna, &op->mask, 1);
+}
+
+static inline void
+gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY)
+{
+ OUT_VERTEX(dstX);
+ OUT_VERTEX(dstY);
+}
+
+static void
+gen2_emit_composite_texcoord(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int16_t x, int16_t y)
+{
+ float s = 0, t = 0, w = 1;
+
+ x += channel->offset[0];
+ y += channel->offset[1];
+
+ if (channel->is_affine) {
+ sna_get_transformed_coordinates(x, y,
+ channel->transform,
+ &s, &t);
+ OUT_VERTEX(s * channel->scale[0]);
+ OUT_VERTEX(t * channel->scale[1]);
+ } else {
+ sna_get_transformed_coordinates_3d(x, y,
+ channel->transform,
+ &s, &t, &w);
+ OUT_VERTEX(s * channel->scale[0]);
+ OUT_VERTEX(t * channel->scale[1]);
+ OUT_VERTEX(w);
+ }
+}
+
+static void
+gen2_emit_composite_vertex(struct sna *sna,
+ const struct sna_composite_op *op,
+ int16_t srcX, int16_t srcY,
+ int16_t mskX, int16_t mskY,
+ int16_t dstX, int16_t dstY)
+{
+ gen2_emit_composite_dstcoord(sna, dstX, dstY);
+ gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY);
+ gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY);
+}
+
+static void
+gen2_emit_composite_primitive(struct sna *sna,
+ const struct sna_composite_op *op,
+ int16_t srcX, int16_t srcY,
+ int16_t mskX, int16_t mskY,
+ int16_t dstX, int16_t dstY,
+ int16_t w, int16_t h)
+{
+ dstX += op->dst.x;
+ dstY += op->dst.y;
+
+ gen2_emit_composite_vertex(sna, op,
+ srcX + w, srcY + h,
+ mskX + w, mskY + h,
+ dstX + w, dstY + h);
+ gen2_emit_composite_vertex(sna, op,
+ srcX, srcY + h,
+ mskX, mskY + h,
+ dstX, dstY + h);
+ gen2_emit_composite_vertex(sna, op,
+ srcX, srcY,
+ mskX, mskY,
+ dstX, dstY);
+}
+
+static void gen2_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t ablend, cblend;
+
+ if (!op->need_magic_ca_pass)
+ return;
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 2);
+ OUT_BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
+ gen2_get_blend_cntl(PictOpAdd,
+ op->has_component_alpha,
+ op->dst.format) |
+ S8_ENABLE_COLOR_BUFFER_WRITE);
+
+ gen2_get_blend_factors(op, &cblend, &ablend);
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+ LOAD_TEXTURE_BLEND_STAGE(0) | 1);
+ OUT_BATCH(cblend);
+ OUT_BATCH(ablend);
+
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ sna->kgem.batch + sna->render_state.gen2.vertex_offset,
+ (1 + 3*sna->render.vertex_index)*sizeof(uint32_t));
+ sna->kgem.nbatch += 1 + 3*sna->render.vertex_index;
+}
+
+static void gen2_vertex_flush(struct sna *sna)
+{
+ if (sna->render.vertex_index == 0)
+ return;
+
+ sna->kgem.batch[sna->render_state.gen2.vertex_offset] |=
+ sna->render.vertex_index - 1;
+
+ if (sna->render.op)
+ gen2_magic_ca_pass(sna, sna->render.op);
+
+ sna->render_state.gen2.vertex_offset = 0;
+ sna->render.vertex_index = 0;
+}
+
+inline static int gen2_get_rectangles(struct sna *sna,
+ const const struct sna_composite_op *op,
+ int want)
+{
+ struct gen2_render_state *state = &sna->render_state.gen2;
+ int rem = batch_space(sna), size, need;
+
+ need = 0;
+ size = 3*op->floats_per_vertex;
+ if (op->need_magic_ca_pass)
+ need += 5, size *= 2;
+
+ need += size;
+ if (state->vertex_offset == 0)
+ need += 2;
+
+ if (rem < need)
+ return 0;
+
+ if (state->vertex_offset == 0) {
+ state->vertex_offset = sna->kgem.nbatch;
+ OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
+ rem--;
+ }
+
+ if (want * size > rem)
+ want = rem / size;
+
+ sna->render.vertex_index += 3*want;
+ return want;
+}
+
+fastcall static void
+gen2_render_composite_blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ if (!gen2_get_rectangles(sna, op, 1)) {
+ gen2_emit_composite_state(sna, op);
+ gen2_get_rectangles(sna, op, 1);
+ }
+
+ gen2_emit_composite_primitive(sna, op,
+ r->src.x, r->src.y,
+ r->mask.x, r->mask.y,
+ r->dst.x, r->dst.y,
+ r->width, r->height);
+}
+
+static void
+gen2_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = gen2_get_rectangles(sna, op, nbox);
+ if (nbox_this_time == 0) {
+ gen2_emit_composite_state(sna, op);
+ nbox_this_time = gen2_get_rectangles(sna, op, nbox);
+ }
+ nbox -= nbox_this_time;
+
+ do {
+ gen2_emit_composite_primitive(sna, op,
+ box->x1, box->y1,
+ box->x1, box->y1,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1);
+ box++;
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+static void gen2_render_composite_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ gen2_vertex_flush(sna);
+ sna->render.op = NULL;
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ sna_render_composite_redirect_done(sna, op);
+
+ if (op->src.bo)
+ kgem_bo_destroy(&sna->kgem, op->src.bo);
+ if (op->mask.bo)
+ kgem_bo_destroy(&sna->kgem, op->mask.bo);
+}
+
+static Bool
+gen2_composite_solid_init(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color)
+{
+ channel->filter = PictFilterNearest;
+ channel->repeat = RepeatNormal;
+ channel->is_affine = TRUE;
+ channel->is_solid = TRUE;
+ channel->transform = NULL;
+ channel->width = 1;
+ channel->height = 1;
+ channel->pict_format = PICT_a8r8g8b8;
+
+ channel->bo = sna_render_get_solid(sna, color);
+
+ channel->scale[0] = channel->scale[1] = 1;
+ channel->offset[0] = channel->offset[1] = 0;
+ return channel->bo != NULL;
+}
+
+static int
+gen2_composite_picture(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int w, int h,
+ int dst_x, int dst_y)
+{
+ PixmapPtr pixmap;
+ uint32_t color;
+ int16_t dx, dy;
+
+ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ channel->is_solid = FALSE;
+ channel->card_format = -1;
+
+ if (sna_picture_is_solid(picture, &color))
+ return gen2_composite_solid_init(sna, channel, color);
+
+ if (picture->pDrawable == NULL)
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen2_check_repeat(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen2_check_filter(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+ channel->filter = picture->filter;
+
+ pixmap = get_drawable_pixmap(picture->pDrawable);
+ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
+
+ x += dx + picture->pDrawable->x;
+ y += dy + picture->pDrawable->y;
+
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ x += dx;
+ y += dy;
+ channel->transform = NULL;
+ channel->filter = PictFilterNearest;
+ } else
+ channel->transform = picture->transform;
+
+ if (!gen2_check_card_format(sna, picture->format))
+ return sna_render_picture_convert(sna, picture, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->pict_format = picture->format;
+ if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192)
+ return sna_render_picture_extract(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ return sna_render_pixmap_bo(sna, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+}
+
+static Bool
+gen2_composite_set_target(struct sna *sna,
+ struct sna_composite_op *op,
+ PicturePtr dst)
+{
+ struct sna_pixmap *priv;
+
+ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+ op->dst.format = dst->format;
+ op->dst.width = op->dst.pixmap->drawable.width;
+ op->dst.height = op->dst.pixmap->drawable.height;
+
+ priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ op->dst.bo = priv->gpu_bo;
+ if (!priv->gpu_only)
+ op->damage = &priv->gpu_damage;
+
+ get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
+ &op->dst.x, &op->dst.y);
+ return TRUE;
+}
+
+static Bool
+try_blt(struct sna *sna,
+ PicturePtr dst,
+ PicturePtr source,
+ int width, int height)
+{
+ uint32_t color;
+
+ if (sna->kgem.mode == KGEM_BLT) {
+ DBG(("%s: already performing BLT\n", __FUNCTION__));
+ return TRUE;
+ }
+
+ if (width > 2048 || height > 2048) {
+ DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
+ __FUNCTION__, width, height));
+ return TRUE;
+ }
+
+ /* If it is a solid, try to use the BLT paths */
+ if (sna_picture_is_solid(source, &color))
+ return TRUE;
+
+ if (!source->pDrawable)
+ return FALSE;
+
+ return is_cpu(source->pDrawable);
+}
+
+static Bool
+gen2_render_composite(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t mask_x, int16_t mask_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ DBG(("%s()\n", __FUNCTION__));
+
+ /* Try to use the BLT engine unless it implies a
+ * 3D -> 2D context switch.
+ */
+ if (mask == NULL &&
+ try_blt(sna, dst, src, width, height) &&
+ sna_blt_composite(sna,
+ op, src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height,
+ tmp))
+ return TRUE;
+
+ if (op >= ARRAY_SIZE(gen2_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (!gen2_check_dst_format(dst->format)) {
+ DBG(("%s: fallback due to unhandled dst format: %x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ if (need_tiling(sna, width, height))
+ return sna_tiling_composite(sna,
+ op, src, mask, dst,
+ src_x, src_y,
+ mask_x, mask_y,
+ dst_x, dst_y,
+ width, height,
+ tmp);
+
+ memset(&tmp->u.gen2, 0, sizeof(tmp->u.gen2));
+
+ if (!gen2_composite_set_target(sna, tmp, dst)) {
+ DBG(("%s: unable to set render target\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ tmp->op = op;
+ if (tmp->dst.width > 2048 ||
+ tmp->dst.height > 2048 ||
+ tmp->dst.bo->pitch > 8192) {
+ if (!sna_render_composite_redirect(sna, tmp,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ switch (gen2_composite_picture(sna, src, &tmp->src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_dst;
+ case 0:
+ gen2_composite_solid_init(sna, &tmp->src, 0);
+ case 1:
+ break;
+ }
+
+ if (mask) {
+ switch (gen2_composite_picture(sna, mask, &tmp->mask,
+ mask_x, mask_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_src;
+ case 0:
+ gen2_composite_solid_init(sna, &tmp->mask, 0);
+ case 1:
+ break;
+ }
+
+ if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
+ /* Check if it's component alpha that relies on a source alpha
+ * and on the source value. We can only get one of those
+ * into the single source value that we get to blend with.
+ */
+ tmp->has_component_alpha = TRUE;
+ if (gen2_blend_op[op].src_alpha &&
+ (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) {
+ if (op != PictOpOver)
+ return FALSE;
+
+ tmp->need_magic_ca_pass = TRUE;
+ tmp->op = PictOpOutReverse;
+ }
+ }
+ }
+
+ tmp->blt = gen2_render_composite_blt;
+ tmp->boxes = gen2_render_composite_boxes;
+ tmp->done = gen2_render_composite_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->src.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->mask.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen2_emit_composite_state(sna, tmp);
+
+ sna->render.op = tmp;
+ return TRUE;
+
+cleanup_src:
+ if (tmp->src.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+cleanup_dst:
+ if (tmp->redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
+ return FALSE;
+}
+
+static void
+gen2_render_reset(struct sna *sna)
+{
+ sna->render_state.gen2.need_invariant = TRUE;
+ sna->render_state.gen2.vertex_offset = 0;
+}
+
+static void
+gen2_render_flush(struct sna *sna)
+{
+ gen2_vertex_flush(sna);
+}
+
+static void
+gen2_render_context_switch(struct sna *sna,
+ int new_mode)
+{
+}
+
+static void
+gen2_render_fini(struct sna *sna)
+{
+}
+
+Bool gen2_render_init(struct sna *sna)
+{
+ struct sna_render *render = &sna->render;
+
+ gen2_render_reset(sna);
+
+ /* Use the BLT (and overlay) for everything except when forced to
+ * use the texture combiners.
+ */
+ render->composite = gen2_render_composite;
+
+ /* XXX Y-tiling copies */
+
+ render->reset = gen2_render_reset;
+ render->flush = gen2_render_flush;
+ render->context_switch = gen2_render_context_switch;
+ render->fini = gen2_render_fini;
+
+ render->max_3d_size = 2048;
+ return TRUE;
+}
diff --git a/src/sna/gen2_render.h b/src/sna/gen2_render.h
new file mode 100644
index 0000000..945cd84
--- a/dev/null
+++ b/src/sna/gen2_render.h
@@ -0,0 +1,785 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef GEN2_RENDER_H
+#define GEN2_RENDER_H
+
+#define CMD_3D (0x3<<29)
+
+#define PRIM3D_INLINE (CMD_3D | (0x1f<<24))
+#define PRIM3D_TRILIST (0x0<<18)
+#define PRIM3D_TRISTRIP (0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE (0x2<<18)
+#define PRIM3D_TRIFAN (0x3<<18)
+#define PRIM3D_POLY (0x4<<18)
+#define PRIM3D_LINELIST (0x5<<18)
+#define PRIM3D_LINESTRIP (0x6<<18)
+#define PRIM3D_RECTLIST (0x7<<18)
+#define PRIM3D_POINTLIST (0x8<<18)
+#define PRIM3D_DIB (0x9<<18)
+#define PRIM3D_CLEAR_RECT (0xa<<18)
+#define PRIM3D_ZONE_INIT (0xd<<18)
+#define PRIM3D_MASK (0x1f<<18)
+
+#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5 0
+#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE (1<<8)
+#define AA_LINE_REGION_WIDTH_0_5 0
+#define AA_LINE_REGION_WIDTH_1_0 (1<<6)
+#define AA_LINE_REGION_WIDTH_2_0 (2<<6)
+#define AA_LINE_REGION_WIDTH_4_0 (3<<6)
+#define AA_LINE_ENABLE ((1<<1) | 1)
+#define AA_LINE_DISABLE (1<<1)
+
+#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK (0x3<<24)
+#define BUF_3D_ID_DEPTH (0x7<<24)
+#define BUF_3D_USE_FENCE (1<<23)
+#define BUF_3D_TILED_SURFACE (1<<22)
+#define BUF_3D_TILE_WALK_X 0
+#define BUF_3D_TILE_WALK_Y (1<<21)
+#define BUF_3D_PITCH(x) (((x)/4)<<2)
+/* Dword 2 */
+#define BUF_3D_ADDR(x) ((x) & ~0x3)
+
+#define _3DSTATE_COLOR_FACTOR_CMD (CMD_3D | (0x1d<<24) | (0x1<<16))
+
+#define _3DSTATE_COLOR_FACTOR_N_CMD(stage) (CMD_3D | (0x1d<<24) | \
+ ((0x90+(stage))<<16))
+
+#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16))
+
+#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16))
+
+#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16))
+
+#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16))
+
+#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define DSTORG_HORT_BIAS(x) ((x)<<20)
+#define DSTORG_VERT_BIAS(x) ((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL 0
+#define COLOR_4_2_2_CHNL_WRT_Y (1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR (2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB (3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12)
+#define COLR_BUF_8BIT 0
+#define COLR_BUF_RGB555 (1<<8)
+#define COLR_BUF_RGB565 (2<<8)
+#define COLR_BUF_ARGB8888 (3<<8)
+#define COLR_BUF_ARGB4444 (8<<8)
+#define COLR_BUF_ARGB1555 (9<<8)
+#define DEPTH_IS_Z 0
+#define DEPTH_IS_W (1<<6)
+#define DEPTH_FRMT_16_FIXED 0
+#define DEPTH_FRMT_16_FLOAT (1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2)
+#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2)
+#define VERT_LINE_STRIDE_1 (1<<1)
+#define VERT_LINE_STRIDE_0 0
+#define VERT_LINE_STRIDE_OFS_1 1
+#define VERT_LINE_STRIDE_OFS_0 0
+
+#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS (1<<30)
+#define DRAW_DITHER_OFS_X(x) ((x)<<26)
+#define DRAW_DITHER_OFS_Y(x) ((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x) ((x)<<16)
+#define DRAW_XMIN(x) (x)
+/* Dword 3 */
+#define DRAW_YMAX(x) ((x)<<16)
+#define DRAW_XMAX(x) (x)
+/* Dword 4 */
+#define DRAW_YORG(x) ((x)<<16)
+#define DRAW_XORG(x) (x)
+
+#define _3DSTATE_ENABLES_1_CMD (CMD_3D|(0x3<<24))
+#define ENABLE_LOGIC_OP_MASK ((1<<23)|(1<<22))
+#define ENABLE_LOGIC_OP ((1<<23)|(1<<22))
+#define DISABLE_LOGIC_OP (1<<23)
+#define ENABLE_STENCIL_TEST ((1<<21)|(1<<20))
+#define DISABLE_STENCIL_TEST (1<<21)
+#define ENABLE_DEPTH_BIAS ((1<<11)|(1<<10))
+#define DISABLE_DEPTH_BIAS (1<<11)
+#define ENABLE_SPEC_ADD_MASK ((1<<9)|(1<<8))
+#define ENABLE_SPEC_ADD ((1<<9)|(1<<8))
+#define DISABLE_SPEC_ADD (1<<9)
+#define ENABLE_DIS_FOG_MASK ((1<<7)|(1<<6))
+#define ENABLE_FOG ((1<<7)|(1<<6))
+#define DISABLE_FOG (1<<7)
+#define ENABLE_DIS_ALPHA_TEST_MASK ((1<<5)|(1<<4))
+#define ENABLE_ALPHA_TEST ((1<<5)|(1<<4))
+#define DISABLE_ALPHA_TEST (1<<5)
+#define ENABLE_DIS_CBLEND_MASK ((1<<3)|(1<<2))
+#define ENABLE_COLOR_BLEND ((1<<3)|(1<<2))
+#define DISABLE_COLOR_BLEND (1<<3)
+#define ENABLE_DIS_DEPTH_TEST_MASK ((1<<1)|1)
+#define ENABLE_DEPTH_TEST ((1<<1)|1)
+#define DISABLE_DEPTH_TEST (1<<1)
+
+/* _3DSTATE_ENABLES_2, p138 */
+#define _3DSTATE_ENABLES_2_CMD (CMD_3D|(0x4<<24))
+#define ENABLE_STENCIL_WRITE ((1<<21)|(1<<20))
+#define DISABLE_STENCIL_WRITE (1<<21)
+#define ENABLE_TEX_CACHE ((1<<17)|(1<<16))
+#define DISABLE_TEX_CACHE (1<<17)
+#define ENABLE_DITHER ((1<<9)|(1<<8))
+#define DISABLE_DITHER (1<<9)
+#define ENABLE_COLOR_MASK (1<<10)
+#define WRITEMASK_ALPHA (1<<7)
+#define WRITEMASK_ALPHA_SHIFT 7
+#define WRITEMASK_RED (1<<6)
+#define WRITEMASK_RED_SHIFT 6
+#define WRITEMASK_GREEN (1<<5)
+#define WRITEMASK_GREEN_SHIFT 5
+#define WRITEMASK_BLUE (1<<4)
+#define WRITEMASK_BLUE_SHIFT 4
+#define WRITEMASK_MASK ((1<<4)|(1<<5)|(1<<6)|(1<<7))
+#define ENABLE_COLOR_WRITE ((1<<3)|(1<<2))
+#define DISABLE_COLOR_WRITE (1<<3)
+#define ENABLE_DIS_DEPTH_WRITE_MASK 0x3
+#define ENABLE_DEPTH_WRITE ((1<<1)|1)
+#define DISABLE_DEPTH_WRITE (1<<1)
+
+/* _3DSTATE_FOG_COLOR, p139 */
+#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x) ((x)<<16)
+#define FOG_COLOR_GREEN(x) ((x)<<8)
+#define FOG_COLOR_BLUE(x) (x)
+
+/* _3DSTATE_FOG_MODE, p140 */
+#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FOGFUNC_ENABLE (1<<31)
+#define FOGFUNC_VERTEX 0
+#define FOGFUNC_PIXEL_EXP (1<<28)
+#define FOGFUNC_PIXEL_EXP2 (2<<28)
+#define FOGFUNC_PIXEL_LINEAR (3<<28)
+#define FOGSRC_INDEX_Z (1<<27)
+#define FOGSRC_INDEX_W ((1<<27)|(1<<25))
+#define FOG_LINEAR_CONST (1<<24)
+#define FOG_CONST_1(x) ((x)<<4)
+#define ENABLE_FOG_DENSITY (1<<23)
+/* Dword 2 */
+#define FOG_CONST_2(x) (x)
+/* Dword 3 */
+#define FOG_DENSITY(x) (x)
+
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p142 */
+#define _3DSTATE_INDPT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24))
+#define ENABLE_INDPT_ALPHA_BLEND ((1<<23)|(1<<22))
+#define DISABLE_INDPT_ALPHA_BLEND (1<<23)
+#define ALPHA_BLENDFUNC_MASK 0x3f0000
+#define ENABLE_ALPHA_BLENDFUNC (1<<21)
+#define ABLENDFUNC_ADD 0
+#define ABLENDFUNC_SUB (1<<16)
+#define ABLENDFUNC_RVSE_SUB (2<<16)
+#define ABLENDFUNC_MIN (3<<16)
+#define ABLENDFUNC_MAX (4<<16)
+#define SRC_DST_ABLEND_MASK 0xfff
+#define ENABLE_SRC_ABLEND_FACTOR (1<<11)
+#define SRC_ABLEND_FACT(x) ((x)<<6)
+#define ENABLE_DST_ABLEND_FACTOR (1<<5)
+#define DST_ABLEND_FACT(x) (x)
+
+#define BLENDFACTOR_ZERO 0x01
+#define BLENDFACTOR_ONE 0x02
+#define BLENDFACTOR_SRC_COLR 0x03
+#define BLENDFACTOR_INV_SRC_COLR 0x04
+#define BLENDFACTOR_SRC_ALPHA 0x05
+#define BLENDFACTOR_INV_SRC_ALPHA 0x06
+#define BLENDFACTOR_DST_ALPHA 0x07
+#define BLENDFACTOR_INV_DST_ALPHA 0x08
+#define BLENDFACTOR_DST_COLR 0x09
+#define BLENDFACTOR_INV_DST_COLR 0x0a
+#define BLENDFACTOR_SRC_ALPHA_SATURATE 0x0b
+#define BLENDFACTOR_CONST_COLOR 0x0c
+#define BLENDFACTOR_INV_CONST_COLOR 0x0d
+#define BLENDFACTOR_CONST_ALPHA 0x0e
+#define BLENDFACTOR_INV_CONST_ALPHA 0x0f
+#define BLENDFACTOR_MASK 0x0f
+
+/* _3DSTATE_MAP_BLEND_ARG, p152 */
+#define _3DSTATE_MAP_BLEND_ARG_CMD(stage) (CMD_3D|(0x0e<<24)|((stage)<<20))
+
+#define TEXPIPE_COLOR 0
+#define TEXPIPE_ALPHA (1<<18)
+#define TEXPIPE_KILL (2<<18)
+#define TEXBLEND_ARG0 0
+#define TEXBLEND_ARG1 (1<<15)
+#define TEXBLEND_ARG2 (2<<15)
+#define TEXBLEND_ARG3 (3<<15)
+#define TEXBLENDARG_MODIFY_PARMS (1<<6)
+#define TEXBLENDARG_REPLICATE_ALPHA (1<<5)
+#define TEXBLENDARG_INV_ARG (1<<4)
+#define TEXBLENDARG_ONE 0
+#define TEXBLENDARG_FACTOR 0x01
+#define TEXBLENDARG_ACCUM 0x02
+#define TEXBLENDARG_DIFFUSE 0x03
+#define TEXBLENDARG_SPEC 0x04
+#define TEXBLENDARG_CURRENT 0x05
+#define TEXBLENDARG_TEXEL0 0x06
+#define TEXBLENDARG_TEXEL1 0x07
+#define TEXBLENDARG_TEXEL2 0x08
+#define TEXBLENDARG_TEXEL3 0x09
+#define TEXBLENDARG_FACTOR_N 0x0e
+
+/* _3DSTATE_MAP_BLEND_OP, p155 */
+#define _3DSTATE_MAP_BLEND_OP_CMD(stage) (CMD_3D|(0x0d<<24)|((stage)<<20))
+#if 0
+# define TEXPIPE_COLOR 0
+# define TEXPIPE_ALPHA (1<<18)
+# define TEXPIPE_KILL (2<<18)
+#endif
+#define ENABLE_TEXOUTPUT_WRT_SEL (1<<17)
+#define TEXOP_OUTPUT_CURRENT 0
+#define TEXOP_OUTPUT_ACCUM (1<<15)
+#define ENABLE_TEX_CNTRL_STAGE ((1<<12)|(1<<11))
+#define DISABLE_TEX_CNTRL_STAGE (1<<12)
+#define TEXOP_SCALE_SHIFT 9
+#define TEXOP_SCALE_1X (0 << TEXOP_SCALE_SHIFT)
+#define TEXOP_SCALE_2X (1 << TEXOP_SCALE_SHIFT)
+#define TEXOP_SCALE_4X (2 << TEXOP_SCALE_SHIFT)
+#define TEXOP_MODIFY_PARMS (1<<8)
+#define TEXOP_LAST_STAGE (1<<7)
+#define TEXBLENDOP_KILLPIXEL 0x02
+#define TEXBLENDOP_ARG1 0x01
+#define TEXBLENDOP_ARG2 0x02
+#define TEXBLENDOP_MODULATE 0x03
+#define TEXBLENDOP_ADD 0x06
+#define TEXBLENDOP_ADDSIGNED 0x07
+#define TEXBLENDOP_BLEND 0x08
+#define TEXBLENDOP_BLEND_AND_ADD 0x09
+#define TEXBLENDOP_SUBTRACT 0x0a
+#define TEXBLENDOP_DOT3 0x0b
+#define TEXBLENDOP_DOT4 0x0c
+#define TEXBLENDOP_MODULATE_AND_ADD 0x0d
+#define TEXBLENDOP_MODULATE_2X_AND_ADD 0x0e
+#define TEXBLENDOP_MODULATE_4X_AND_ADD 0x0f
+
+/* _3DSTATE_MAP_BUMP_TABLE, p160 TODO */
+/* _3DSTATE_MAP_COLOR_CHROMA_KEY, p161 TODO */
+
+#define _3DSTATE_MAP_COORD_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8c<<16))
+#define DISABLE_TEX_TRANSFORM (1<<28)
+#define TEXTURE_SET(x) (x<<29)
+
+#define _3DSTATE_VERTEX_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8b<<16))
+#define DISABLE_VIEWPORT_TRANSFORM (1<<31)
+#define DISABLE_PERSPECTIVE_DIVIDE (1<<29)
+
+/* _3DSTATE_MAP_COORD_SET_BINDINGS, p162 */
+#define _3DSTATE_MAP_COORD_SETBIND_CMD (CMD_3D|(0x1d<<24)|(0x02<<16))
+#define TEXBIND_MASK3 ((1<<15)|(1<<14)|(1<<13)|(1<<12))
+#define TEXBIND_MASK2 ((1<<11)|(1<<10)|(1<<9)|(1<<8))
+#define TEXBIND_MASK1 ((1<<7)|(1<<6)|(1<<5)|(1<<4))
+#define TEXBIND_MASK0 ((1<<3)|(1<<2)|(1<<1)|1)
+
+#define TEXBIND_SET3(x) ((x)<<12)
+#define TEXBIND_SET2(x) ((x)<<8)
+#define TEXBIND_SET1(x) ((x)<<4)
+#define TEXBIND_SET0(x) (x)
+
+#define TEXCOORDSRC_KEEP 0
+#define TEXCOORDSRC_DEFAULT 0x01
+#define TEXCOORDSRC_VTXSET_0 0x08
+#define TEXCOORDSRC_VTXSET_1 0x09
+#define TEXCOORDSRC_VTXSET_2 0x0a
+#define TEXCOORDSRC_VTXSET_3 0x0b
+#define TEXCOORDSRC_VTXSET_4 0x0c
+#define TEXCOORDSRC_VTXSET_5 0x0d
+#define TEXCOORDSRC_VTXSET_6 0x0e
+#define TEXCOORDSRC_VTXSET_7 0x0f
+
+#define MAP_UNIT(unit) ((unit)<<16)
+#define MAP_UNIT_MASK (0x7<<16)
+
+/* _3DSTATE_MAP_COORD_SETS, p164 */
+#define _3DSTATE_MAP_COORD_SET_CMD (CMD_3D|(0x1c<<24)|(0x01<<19))
+#define TEXCOORD_SET(n) ((n)<<16)
+#define ENABLE_TEXCOORD_PARAMS (1<<15)
+#define TEXCOORDS_ARE_NORMAL (1<<14)
+#define TEXCOORDS_ARE_IN_TEXELUNITS 0
+#define TEXCOORDTYPE_CARTESIAN 0
+#define TEXCOORDTYPE_HOMOGENEOUS (1<<11)
+#define TEXCOORDTYPE_VECTOR (2<<11)
+#define TEXCOORDTYPE_MASK (0x7<<11)
+#define ENABLE_ADDR_V_CNTL (1<<7)
+#define ENABLE_ADDR_U_CNTL (1<<3)
+#define TEXCOORD_ADDR_V_MODE(x) ((x)<<4)
+#define TEXCOORD_ADDR_U_MODE(x) (x)
+#define TEXCOORDMODE_WRAP 0
+#define TEXCOORDMODE_MIRROR 1
+#define TEXCOORDMODE_CLAMP 2
+#define TEXCOORDMODE_WRAP_SHORTEST 3
+#define TEXCOORDMODE_CLAMP_BORDER 4
+#define TEXCOORD_ADDR_V_MASK 0x70
+#define TEXCOORD_ADDR_U_MASK 0x7
+
+/* _3DSTATE_MAP_CUBE, p168 TODO */
+#define _3DSTATE_MAP_CUBE (CMD_3D|(0x1c<<24)|(0x0a<<19))
+#define CUBE_NEGX_ENABLE (1<<5)
+#define CUBE_POSX_ENABLE (1<<4)
+#define CUBE_NEGY_ENABLE (1<<3)
+#define CUBE_POSY_ENABLE (1<<2)
+#define CUBE_NEGZ_ENABLE (1<<1)
+#define CUBE_POSZ_ENABLE (1<<0)
+
+#define _3DSTATE_MAP_INFO_CMD (CMD_3D|(0x1d<<24)|(0x0<<16)|3)
+#define TEXMAP_INDEX(x) ((x)<<28)
+#define MAP_SURFACE_8BIT (1<<24)
+#define MAP_SURFACE_16BIT (2<<24)
+#define MAP_SURFACE_32BIT (3<<24)
+#define MAP_FORMAT_2D (0)
+#define MAP_FORMAT_3D_CUBE (1<<11)
+
+/* _3DSTATE_MODES_1, p190 */
+#define _3DSTATE_MODES_1_CMD (CMD_3D|(0x08<<24))
+#define BLENDFUNC_MASK 0x3f0000
+#define ENABLE_COLR_BLND_FUNC (1<<21)
+#define BLENDFUNC_ADD 0
+#define BLENDFUNC_SUB (1<<16)
+#define BLENDFUNC_RVRSE_SUB (2<<16)
+#define BLENDFUNC_MIN (3<<16)
+#define BLENDFUNC_MAX (4<<16)
+#define SRC_DST_BLND_MASK 0xfff
+#define ENABLE_SRC_BLND_FACTOR (1<<11)
+#define ENABLE_DST_BLND_FACTOR (1<<5)
+#define SRC_BLND_FACT(x) ((x)<<6)
+#define DST_BLND_FACT(x) (x)
+
+/* _3DSTATE_MODES_2, p192 */
+#define _3DSTATE_MODES_2_CMD (CMD_3D|(0x0f<<24))
+#define ENABLE_GLOBAL_DEPTH_BIAS (1<<22)
+#define GLOBAL_DEPTH_BIAS(x) ((x)<<14)
+#define ENABLE_ALPHA_TEST_FUNC (1<<13)
+#define ENABLE_ALPHA_REF_VALUE (1<<8)
+#define ALPHA_TEST_FUNC(x) ((x)<<9)
+#define ALPHA_REF_VALUE(x) (x)
+
+#define ALPHA_TEST_REF_MASK 0x3fff
+
+/* _3DSTATE_MODES_3, p193 */
+#define _3DSTATE_MODES_3_CMD (CMD_3D|(0x02<<24))
+#define DEPTH_TEST_FUNC_MASK 0x1f0000
+#define ENABLE_DEPTH_TEST_FUNC (1<<20)
+/* Uses COMPAREFUNC */
+#define DEPTH_TEST_FUNC(x) ((x)<<16)
+#define ENABLE_ALPHA_SHADE_MODE (1<<11)
+#define ENABLE_FOG_SHADE_MODE (1<<9)
+#define ENABLE_SPEC_SHADE_MODE (1<<7)
+#define ENABLE_COLOR_SHADE_MODE (1<<5)
+#define ALPHA_SHADE_MODE(x) ((x)<<10)
+#define FOG_SHADE_MODE(x) ((x)<<8)
+#define SPEC_SHADE_MODE(x) ((x)<<6)
+#define COLOR_SHADE_MODE(x) ((x)<<4)
+#define CULLMODE_MASK 0xf
+#define ENABLE_CULL_MODE (1<<3)
+#define CULLMODE_BOTH 0
+#define CULLMODE_NONE 1
+#define CULLMODE_CW 2
+#define CULLMODE_CCW 3
+
+#define SHADE_MODE_LINEAR 0
+#define SHADE_MODE_FLAT 0x1
+
+/* _3DSTATE_MODES_4, p195 */
+#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x16<<24))
+#define ENABLE_LOGIC_OP_FUNC (1<<23)
+#define LOGIC_OP_FUNC(x) ((x)<<18)
+#define LOGICOP_MASK ((1<<18)|(1<<19)|(1<<20)|(1<<21))
+#define LOGICOP_CLEAR 0
+#define LOGICOP_NOR 0x1
+#define LOGICOP_AND_INV 0x2
+#define LOGICOP_COPY_INV 0x3
+#define LOGICOP_AND_RVRSE 0x4
+#define LOGICOP_INV 0x5
+#define LOGICOP_XOR 0x6
+#define LOGICOP_NAND 0x7
+#define LOGICOP_AND 0x8
+#define LOGICOP_EQUIV 0x9
+#define LOGICOP_NOOP 0xa
+#define LOGICOP_OR_INV 0xb
+#define LOGICOP_COPY 0xc
+#define LOGICOP_OR_RVRSE 0xd
+#define LOGICOP_OR 0xe
+#define LOGICOP_SET 0xf
+#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK (1<<17)
+#define STENCIL_TEST_MASK(x) ((x)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK (1<<16)
+#define STENCIL_WRITE_MASK(x) ((x)&0xff)
+
+/* _3DSTATE_MODES_5, p196 */
+#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24))
+#define ENABLE_SPRITE_POINT_TEX (1<<23)
+#define SPRITE_POINT_TEX_ON (1<<22)
+#define SPRITE_POINT_TEX_OFF 0
+#define FLUSH_RENDER_CACHE (1<<18)
+#define FLUSH_TEXTURE_CACHE (1<<16)
+#define FIXED_LINE_WIDTH_MASK 0xfc00
+#define ENABLE_FIXED_LINE_WIDTH (1<<15)
+#define FIXED_LINE_WIDTH(x) ((x)<<10)
+#define FIXED_POINT_WIDTH_MASK 0x3ff
+#define ENABLE_FIXED_POINT_WIDTH (1<<9)
+#define FIXED_POINT_WIDTH(x) (x)
+
+/* _3DSTATE_RASTERIZATION_RULES, p198 */
+#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE (1<<15)
+#define OGL_POINT_RASTER_RULE (1<<13)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
+#define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2)
+#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
+#define TRI_STRIP_PROVOKE_VRTX(x) (x)
+
+/* _3DSTATE_SCISSOR_ENABLE, p200 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT ((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT (1<<1)
+
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p201 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x) ((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x) (x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x) ((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x) (x)
+
+/* _3DSTATE_STENCIL_TEST, p202 */
+#define _3DSTATE_STENCIL_TEST_CMD (CMD_3D|(0x09<<24))
+#define ENABLE_STENCIL_PARMS (1<<23)
+#define STENCIL_OPS_MASK (0xffc000)
+#define STENCIL_FAIL_OP(x) ((x)<<20)
+#define STENCIL_PASS_DEPTH_FAIL_OP(x) ((x)<<17)
+#define STENCIL_PASS_DEPTH_PASS_OP(x) ((x)<<14)
+
+#define ENABLE_STENCIL_TEST_FUNC_MASK ((1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9))
+#define ENABLE_STENCIL_TEST_FUNC (1<<13)
+/* Uses COMPAREFUNC */
+#define STENCIL_TEST_FUNC(x) ((x)<<9)
+#define STENCIL_REF_VALUE_MASK ((1<<8)|0xff)
+#define ENABLE_STENCIL_REF_VALUE (1<<8)
+#define STENCIL_REF_VALUE(x) (x)
+
+/* _3DSTATE_VERTEX_FORMAT, p204 */
+#define _3DSTATE_VFT0_CMD (CMD_3D|(0x05<<24))
+#define VFT0_POINT_WIDTH (1<<12)
+#define VFT0_TEX_COUNT_MASK (7<<8)
+#define VFT0_TEX_COUNT_SHIFT 8
+#define VFT0_TEX_COUNT(x) ((x)<<8)
+#define VFT0_SPEC (1<<7)
+#define VFT0_DIFFUSE (1<<6)
+#define VFT0_DEPTH_OFFSET (1<<5)
+#define VFT0_XYZ (1<<1)
+#define VFT0_XYZW (2<<1)
+#define VFT0_XY (3<<1)
+#define VFT0_XYW (4<<1)
+#define VFT0_XYZW_MASK (7<<1)
+
+/* _3DSTATE_VERTEX_FORMAT_2, p206 */
+#define _3DSTATE_VERTEX_FORMAT_2_CMD (CMD_3D|(0x0a<<24))
+#define VFT1_TEX7_FMT(x) ((x)<<14)
+#define VFT1_TEX6_FMT(x) ((x)<<12)
+#define VFT1_TEX5_FMT(x) ((x)<<10)
+#define VFT1_TEX4_FMT(x) ((x)<<8)
+#define VFT1_TEX3_FMT(x) ((x)<<6)
+#define VFT1_TEX2_FMT(x) ((x)<<4)
+#define VFT1_TEX1_FMT(x) ((x)<<2)
+#define VFT1_TEX0_FMT(x) (x)
+#define VFT1_TEX0_MASK 3
+#define VFT1_TEX1_SHIFT 2
+#define TEXCOORDFMT_2D 0
+#define TEXCOORDFMT_3D 1
+#define TEXCOORDFMT_4D 2
+#define TEXCOORDFMT_1D 3
+
+/*New stuff picked up along the way */
+
+#define MLC_LOD_BIAS_MASK ((1<<7)-1)
+
+/* _3DSTATE_VERTEX_TRANSFORM, p207 */
+#define _3DSTATE_VERTEX_TRANS_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|0)
+#define _3DSTATE_VERTEX_TRANS_MTX_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|6)
+/* Dword 1 */
+#define ENABLE_VIEWPORT_TRANSFORM ((1<<31)|(1<<30))
+#define DISABLE_VIEWPORT_TRANSFORM (1<<31)
+#define ENABLE_PERSP_DIVIDE ((1<<29)|(1<<28))
+#define DISABLE_PERSP_DIVIDE (1<<29)
+#define VRTX_TRANS_LOAD_MATRICES 0x7421
+#define VRTX_TRANS_NO_LOAD_MATRICES 0x0000
+/* Dword 2 -> 7 are matrix elements */
+
+/* _3DSTATE_W_STATE, p209 */
+#define _3DSTATE_W_STATE_CMD (CMD_3D|(0x1d<<24)|(0x8d<<16)|1)
+/* Dword 1 */
+#define MAGIC_W_STATE_DWORD1 0x00000008
+/* Dword 2 */
+#define WFAR_VALUE(x) (x)
+
+/* Stipple command, carried over from the i810, apparently:
+ */
+#define _3DSTATE_STIPPLE (CMD_3D|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE (1<<16)
+#define ST1_MASK (0xffff)
+
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D|(0x1d<<24)|(0x04<<16))
+#define I1_LOAD_S(n) (1<<((n)+4))
+#define S3_POINT_WIDTH_SHIFT 23
+#define S3_LINE_WIDTH_SHIFT 19
+#define S3_ALPHA_SHADE_MODE_SHIFT 18
+#define S3_FOG_SHADE_MODE_SHIFT 17
+#define S3_SPEC_SHADE_MODE_SHIFT 16
+#define S3_COLOR_SHADE_MODE_SHIFT 15
+#define S3_CULL_MODE_SHIFT 13
+#define S3_CULLMODE_BOTH (0)
+#define S3_CULLMODE_NONE (1<<13)
+#define S3_CULLMODE_CW (2<<13)
+#define S3_CULLMODE_CCW (3<<13)
+#define S3_POINT_WIDTH_PRESENT (1<<12)
+#define S3_SPEC_FOG_PRESENT (1<<11)
+#define S3_DIFFUSE_PRESENT (1<<10)
+#define S3_DEPTH_OFFSET_PRESENT (1<<9)
+#define S3_POSITION_SHIFT 6
+#define S3_VERTEXHAS_XYZ (1<<6)
+#define S3_VERTEXHAS_XYZW (2<<6)
+#define S3_VERTEXHAS_XY (3<<6)
+#define S3_VERTEXHAS_XYW (4<<6)
+#define S3_ENABLE_SPEC_ADD (1<<5)
+#define S3_ENABLE_FOG (1<<4)
+#define S3_ENABLE_LOCAL_DEPTH_BIAS (1<<3)
+#define S3_ENABLE_SPRITE_POINT (1<<1)
+#define S3_ENABLE_ANTIALIASING 1
+#define S8_ENABLE_ALPHA_TEST (1<<31)
+#define S8_ALPHA_TEST_FUNC_SHIFT 28
+#define S8_ALPHA_REFVALUE_SHIFT 20
+#define S8_ENABLE_DEPTH_TEST (1<<19)
+#define S8_DEPTH_TEST_FUNC_SHIFT 16
+#define S8_ENABLE_COLOR_BLEND (1<<15)
+#define S8_COLOR_BLEND_FUNC_SHIFT 12
+#define S8_BLENDFUNC_ADD (0)
+#define S8_BLENDFUNC_SUB (1<<12)
+#define S8_BLENDFUNC_RVRSE_SUB (2<<12)
+#define S8_BLENDFUNC_MIN (3<<12)
+#define S8_BLENDFUNC_MAX (4<<12)
+#define S8_SRC_BLEND_FACTOR_SHIFT 8
+#define S8_DST_BLEND_FACTOR_SHIFT 4
+#define S8_ENABLE_DEPTH_BUFFER_WRITE (1<<3)
+#define S8_ENABLE_COLOR_BUFFER_WRITE (1<<2)
+
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_2 (CMD_3D|(0x1d<<24)|(0x03<<16))
+#define LOAD_TEXTURE_MAP(x) (1<<((x)+11))
+#define LOAD_TEXTURE_BLEND_STAGE(x) (1<<((x)+7))
+#define LOAD_GLOBAL_COLOR_FACTOR (1<<6)
+
+#define TM0S0_ADDRESS_MASK 0xfffffffc
+#define TM0S0_USE_FENCE (1<<1)
+
+#define TM0S1_HEIGHT_SHIFT 21
+#define TM0S1_WIDTH_SHIFT 10
+#define TM0S1_PALETTE_SELECT (1<<9)
+#define TM0S1_MAPSURF_FORMAT_MASK (0x7 << 6)
+#define TM0S1_MAPSURF_FORMAT_SHIFT 6
+#define MAPSURF_8BIT_INDEXED (0<<6)
+#define MAPSURF_8BIT (1<<6)
+#define MAPSURF_16BIT (2<<6)
+#define MAPSURF_32BIT (3<<6)
+#define MAPSURF_411 (4<<6)
+#define MAPSURF_422 (5<<6)
+#define MAPSURF_COMPRESSED (6<<6)
+#define MAPSURF_4BIT_INDEXED (7<<6)
+#define TM0S1_MT_FORMAT_MASK (0x7 << 3)
+#define TM0S1_MT_FORMAT_SHIFT 3
+#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */
+#define MT_8BIT_IDX_RGB565 (0<<3) /* SURFACE_8BIT_INDEXED */
+#define MT_8BIT_IDX_ARGB1555 (1<<3)
+#define MT_8BIT_IDX_ARGB4444 (2<<3)
+#define MT_8BIT_IDX_AY88 (3<<3)
+#define MT_8BIT_IDX_ABGR8888 (4<<3)
+#define MT_8BIT_IDX_BUMP_88DVDU (5<<3)
+#define MT_8BIT_IDX_BUMP_655LDVDU (6<<3)
+#define MT_8BIT_IDX_ARGB8888 (7<<3)
+#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */
+#define MT_8BIT_L8 (1<<3)
+#define MT_8BIT_A8 (4<<3)
+#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */
+#define MT_16BIT_ARGB1555 (1<<3)
+#define MT_16BIT_ARGB4444 (2<<3)
+#define MT_16BIT_AY88 (3<<3)
+#define MT_16BIT_DIB_ARGB1555_8888 (4<<3)
+#define MT_16BIT_BUMP_88DVDU (5<<3)
+#define MT_16BIT_BUMP_655LDVDU (6<<3)
+#define MT_16BIT_DIB_RGB565_8888 (7<<3)
+#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */
+#define MT_32BIT_ABGR8888 (1<<3)
+#define MT_32BIT_XRGB8888 (2<<3)
+#define MT_32BIT_XBGR8888 (3<<3)
+#define MT_32BIT_BUMP_XLDVDU_8888 (6<<3)
+#define MT_32BIT_DIB_8888 (7<<3)
+#define MT_411_YUV411 (0<<3) /* SURFACE_411 */
+#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */
+#define MT_422_YCRCB_NORMAL (1<<3)
+#define MT_422_YCRCB_SWAPUV (2<<3)
+#define MT_422_YCRCB_SWAPUVY (3<<3)
+#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */
+#define MT_COMPRESS_DXT2_3 (1<<3)
+#define MT_COMPRESS_DXT4_5 (2<<3)
+#define MT_COMPRESS_FXT1 (3<<3)
+#define TM0S1_COLORSPACE_CONVERSION (1 << 2)
+#define TM0S1_TILED_SURFACE (1 << 1)
+#define TM0S1_TILE_WALK (1 << 0)
+
+#define TM0S2_PITCH_SHIFT 21
+#define TM0S2_CUBE_FACE_ENA_SHIFT 15
+#define TM0S2_CUBE_FACE_ENA_MASK (1<<15)
+#define TM0S2_MAP_FORMAT (1<<14)
+#define TM0S2_MAP_2D (0<<14)
+#define TM0S2_MAP_3D_CUBE (1<<14)
+#define TM0S2_VERTICAL_LINE_STRIDE (1<<13)
+#define TM0S2_VERITCAL_LINE_STRIDE_OFF (1<<12)
+#define TM0S2_OUTPUT_CHAN_SHIFT 10
+#define TM0S2_OUTPUT_CHAN_MASK (3<<10)
+
+#define TM0S3_MIP_FILTER_MASK (0x3<<30)
+#define TM0S3_MIP_FILTER_SHIFT 30
+#define MIPFILTER_NONE 0
+#define MIPFILTER_NEAREST 1
+#define MIPFILTER_LINEAR 3
+#define TM0S3_MAG_FILTER_MASK (0x3<<28)
+#define TM0S3_MAG_FILTER_SHIFT 28
+#define TM0S3_MIN_FILTER_MASK (0x3<<26)
+#define TM0S3_MIN_FILTER_SHIFT 26
+#define FILTER_NEAREST 0
+#define FILTER_LINEAR 1
+#define FILTER_ANISOTROPIC 2
+
+#define TM0S3_LOD_BIAS_SHIFT 17
+#define TM0S3_LOD_BIAS_MASK (0x1ff<<17)
+#define TM0S3_MAX_MIP_SHIFT 9
+#define TM0S3_MAX_MIP_MASK (0xff<<9)
+#define TM0S3_MIN_MIP_SHIFT 3
+#define TM0S3_MIN_MIP_MASK (0x3f<<3)
+#define TM0S3_KILL_PIXEL (1<<2)
+#define TM0S3_KEYED_FILTER (1<<1)
+#define TM0S3_CHROMA_KEY (1<<0)
+
+/* _3DSTATE_MAP_TEXEL_STREAM, p188 */
+#define _3DSTATE_MAP_TEX_STREAM_CMD (CMD_3D|(0x1c<<24)|(0x05<<19))
+#define DISABLE_TEX_STREAM_BUMP (1<<12)
+#define ENABLE_TEX_STREAM_BUMP ((1<<12)|(1<<11))
+#define TEX_MODIFY_UNIT_0 0
+#define TEX_MODIFY_UNIT_1 (1<<8)
+#define ENABLE_TEX_STREAM_COORD_SET (1<<7)
+#define TEX_STREAM_COORD_SET(x) ((x)<<4)
+#define ENABLE_TEX_STREAM_MAP_IDX (1<<3)
+#define TEX_STREAM_MAP_IDX(x) (x)
+
+#define FLUSH_MAP_CACHE (1<<0)
+
+#define _3DSTATE_MAP_FILTER_CMD (CMD_3D|(0x1c<<24)|(0x02<<19))
+#define FILTER_TEXMAP_INDEX(x) ((x) << 16)
+#define MAG_MODE_FILTER_ENABLE (1 << 5)
+#define MIN_MODE_FILTER_ENABLE (1 << 2)
+#define MAG_MAPFILTER_NEAREST (0 << 3)
+#define MAG_MAPFILTER_LINEAR (1 << 3)
+#define MAG_MAPFILTER_ANISOTROPIC (2 << 3)
+#define MIN_MAPFILTER_NEAREST (0)
+#define MIN_MAPFILTER_LINEAR (1)
+#define MIN_MAPFILTER_ANISOTROPIC (2)
+#define ENABLE_KEYS (1<<15)
+#define DISABLE_COLOR_KEY 0
+#define DISABLE_CHROMA_KEY 0
+#define DISABLE_KILL_PIXEL 0
+#define ENABLE_MIP_MODE_FILTER (1 << 9)
+#define MIPFILTER_NONE 0
+#define MIPFILTER_NEAREST 1
+#define MIPFILTER_LINEAR 3
+
+#define TB0C_LAST_STAGE (1 << 31)
+#define TB0C_RESULT_SCALE_1X (0 << 29)
+#define TB0C_RESULT_SCALE_2X (1 << 29)
+#define TB0C_RESULT_SCALE_4X (2 << 29)
+#define TB0C_OP_MODULE (3 << 25)
+#define TB0C_OUTPUT_WRITE_CURRENT (0 << 24)
+#define TB0C_OUTPUT_WRITE_ACCUM (1 << 24)
+#define TB0C_ARG3_REPLICATE_ALPHA (1<<23)
+#define TB0C_ARG3_INVERT (1<<22)
+#define TB0C_ARG3_SEL_XXX
+#define TB0C_ARG2_REPLICATE_ALPHA (1<<17)
+#define TB0C_ARG2_INVERT (1<<16)
+#define TB0C_ARG2_SEL_ONE (0 << 12)
+#define TB0C_ARG2_SEL_FACTOR (1 << 12)
+#define TB0C_ARG2_SEL_TEXEL0 (6 << 12)
+#define TB0C_ARG2_SEL_TEXEL1 (7 << 12)
+#define TB0C_ARG2_SEL_TEXEL2 (8 << 12)
+#define TB0C_ARG2_SEL_TEXEL3 (9 << 12)
+#define TB0C_ARG1_REPLICATE_ALPHA (1<<11)
+#define TB0C_ARG1_INVERT (1<<10)
+#define TB0C_ARG1_SEL_ONE (0 << 6)
+#define TB0C_ARG1_SEL_TEXEL0 (6 << 6)
+#define TB0C_ARG1_SEL_TEXEL1 (7 << 6)
+#define TB0C_ARG1_SEL_TEXEL2 (8 << 6)
+#define TB0C_ARG1_SEL_TEXEL3 (9 << 6)
+#define TB0C_ARG0_REPLICATE_ALPHA (1<<5)
+#define TB0C_ARG0_SEL_XXX
+
+#define TB0A_CTR_STAGE_ENABLE (1<<31)
+#define TB0A_RESULT_SCALE_1X (0 << 29)
+#define TB0A_RESULT_SCALE_2X (1 << 29)
+#define TB0A_RESULT_SCALE_4X (2 << 29)
+#define TB0A_OP_MODULE (3 << 25)
+#define TB0A_OUTPUT_WRITE_CURRENT (0<<24)
+#define TB0A_OUTPUT_WRITE_ACCUM (1<<24)
+#define TB0A_CTR_STAGE_SEL_BITS_XXX
+#define TB0A_ARG3_SEL_XXX
+#define TB0A_ARG3_INVERT (1<<17)
+#define TB0A_ARG2_INVERT (1<<16)
+#define TB0A_ARG2_SEL_ONE (0 << 12)
+#define TB0A_ARG2_SEL_TEXEL0 (6 << 12)
+#define TB0A_ARG2_SEL_TEXEL1 (7 << 12)
+#define TB0A_ARG2_SEL_TEXEL2 (8 << 12)
+#define TB0A_ARG2_SEL_TEXEL3 (9 << 12)
+#define TB0A_ARG1_INVERT (1<<10)
+#define TB0A_ARG1_SEL_ONE (0 << 6)
+#define TB0A_ARG1_SEL_TEXEL0 (6 << 6)
+#define TB0A_ARG1_SEL_TEXEL1 (7 << 6)
+#define TB0A_ARG1_SEL_TEXEL2 (8 << 6)
+#define TB0A_ARG1_SEL_TEXEL3 (9 << 6)
+
+#endif /* GEN2_RENDER_H */
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
new file mode 100644
index 0000000..203de08
--- a/dev/null
+++ b/src/sna/gen3_render.c
@@ -0,0 +1,3694 @@
+/*
+ * Copyright © 2010-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "sna_reg.h"
+#include "sna_video.h"
+
+#include "gen3_render.h"
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define NO_COMPOSITE 0
+#define NO_COMPOSITE_SPANS 0
+#define NO_COPY 0
+#define NO_COPY_BOXES 0
+#define NO_FILL 0
+#define NO_FILL_BOXES 0
+
+enum {
+ SHADER_NONE = 0,
+ SHADER_ZERO,
+ SHADER_CONSTANT,
+ SHADER_LINEAR,
+ SHADER_RADIAL,
+ SHADER_TEXTURE,
+ SHADER_OPACITY,
+};
+
+#define OUT_BATCH(v) batch_emit(sna, v)
+#define OUT_BATCH_F(v) batch_emit_float(sna, v)
+#define OUT_VERTEX(v) vertex_emit(sna, v)
+
+enum gen3_radial_mode {
+ RADIAL_ONE,
+ RADIAL_TWO
+};
+
+static const struct blendinfo {
+ Bool dst_alpha;
+ Bool src_alpha;
+ uint32_t src_blend;
+ uint32_t dst_blend;
+} gen3_blend_op[] = {
+ /* Clear */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
+ /* Src */ {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
+ /* Dst */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
+ /* Over */ {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
+ /* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
+ /* In */ {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
+ /* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
+ /* Out */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
+ /* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
+ /* Atop */ {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
+ /* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
+ /* Xor */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
+ /* Add */ {0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
+};
+
+static const struct formatinfo {
+ int fmt, xfmt;
+ uint32_t card_fmt;
+ Bool rb_reversed;
+} gen3_tex_formats[] = {
+ {PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, FALSE},
+ {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, FALSE},
+ {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, FALSE},
+ {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, FALSE},
+ {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, FALSE},
+ {PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, FALSE},
+ {PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, FALSE},
+ {PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, FALSE},
+ {PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, TRUE},
+ {PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, FALSE},
+ {PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, TRUE},
+ {PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, FALSE},
+ {PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, TRUE},
+};
+
+#define xFixedToDouble(f) pixman_fixed_to_double(f)
+
+static inline uint32_t gen3_buf_tiling(uint32_t tiling)
+{
+ uint32_t v = 0;
+ switch (tiling) {
+ case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
+ case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
+ case I915_TILING_NONE: break;
+ }
+ return v;
+}
+
+static inline Bool
+gen3_check_pitch_3d(struct kgem_bo *bo)
+{
+ return bo->pitch <= 8192;
+}
+
+static uint32_t gen3_get_blend_cntl(int op,
+ Bool has_component_alpha,
+ uint32_t dst_format)
+{
+ uint32_t sblend = gen3_blend_op[op].src_blend;
+ uint32_t dblend = gen3_blend_op[op].dst_blend;
+
+ /* If there's no dst alpha channel, adjust the blend op so that we'll
+ * treat it as always 1.
+ */
+ if (gen3_blend_op[op].dst_alpha) {
+ if (PICT_FORMAT_A(dst_format) == 0) {
+ if (sblend == BLENDFACT_DST_ALPHA)
+ sblend = BLENDFACT_ONE;
+ else if (sblend == BLENDFACT_INV_DST_ALPHA)
+ sblend = BLENDFACT_ZERO;
+ }
+
+ /* gen3 engine reads 8bit color buffer into green channel
+ * in cases like color buffer blending etc., and also writes
+ * back green channel. So with dst_alpha blend we should use
+ * color factor. See spec on "8-bit rendering".
+ */
+ if (dst_format == PICT_a8) {
+ if (sblend == BLENDFACT_DST_ALPHA)
+ sblend = BLENDFACT_DST_COLR;
+ else if (sblend == BLENDFACT_INV_DST_ALPHA)
+ sblend = BLENDFACT_INV_DST_COLR;
+ }
+ }
+
+ /* If the source alpha is being used, then we should only be in a case
+ * where the source blend factor is 0, and the source blend value is the
+ * mask channels multiplied by the source picture's alpha.
+ */
+ if (has_component_alpha && gen3_blend_op[op].src_alpha) {
+ if (dblend == BLENDFACT_SRC_ALPHA)
+ dblend = BLENDFACT_SRC_COLR;
+ else if (dblend == BLENDFACT_INV_SRC_ALPHA)
+ dblend = BLENDFACT_INV_SRC_COLR;
+ }
+
+ return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+ BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+ sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+ dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
+}
+
+static Bool gen3_check_dst_format(uint32_t format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ case PICT_r5g6b5:
+ case PICT_b5g6r5:
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ case PICT_a1b5g5r5:
+ case PICT_x1b5g5r5:
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_a2b10g10r10:
+ case PICT_x2b10g10r10:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ case PICT_a4b4g4r4:
+ case PICT_x4b4g4r4:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static Bool gen3_dst_rb_reversed(uint32_t format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_r5g6b5:
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return FALSE;
+ default:
+ return TRUE;
+ }
+}
+
+#define DSTORG_HORT_BIAS(x) ((x)<<20)
+#define DSTORG_VERT_BIAS(x) ((x)<<16)
+
+static uint32_t gen3_get_dst_format(uint32_t format)
+{
+#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
+ switch (format) {
+ default:
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ return BIAS | COLR_BUF_ARGB8888;
+ case PICT_r5g6b5:
+ case PICT_b5g6r5:
+ return BIAS | COLR_BUF_RGB565;
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ case PICT_a1b5g5r5:
+ case PICT_x1b5g5r5:
+ return BIAS | COLR_BUF_ARGB1555;
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_a2b10g10r10:
+ case PICT_x2b10g10r10:
+ return BIAS | COLR_BUF_ARGB2AAA;
+ case PICT_a8:
+ return BIAS | COLR_BUF_8BIT;
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ case PICT_a4b4g4r4:
+ case PICT_x4b4g4r4:
+ return BIAS | COLR_BUF_ARGB4444;
+ }
+#undef BIAS
+}
+
+static uint32_t gen3_texture_repeat(uint32_t repeat)
+{
+#define REPEAT(x) \
+ (SS3_NORMALIZED_COORDS | \
+ TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
+ TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT)
+ switch (repeat) {
+ default:
+ case RepeatNone:
+ return REPEAT(CLAMP_BORDER);
+ case RepeatNormal:
+ return REPEAT(WRAP);
+ case RepeatPad:
+ return REPEAT(CLAMP_EDGE);
+ case RepeatReflect:
+ return REPEAT(MIRROR);
+ }
+#undef REPEAT
+}
+
+static uint32_t gen3_gradient_repeat(uint32_t repeat)
+{
+#define REPEAT(x) \
+ (SS3_NORMALIZED_COORDS | \
+ TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
+ TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT)
+ switch (repeat) {
+ default:
+ case RepeatNone:
+ return REPEAT(CLAMP_BORDER);
+ case RepeatNormal:
+ return REPEAT(WRAP);
+ case RepeatPad:
+ return REPEAT(CLAMP_EDGE);
+ case RepeatReflect:
+ return REPEAT(MIRROR);
+ }
+#undef REPEAT
+}
+
+static Bool gen3_check_repeat(uint32_t repeat)
+{
+ switch (repeat) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static uint32_t gen3_filter(uint32_t filter)
+{
+ switch (filter) {
+ default:
+ assert(0);
+ case PictFilterNearest:
+ return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+ FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
+ MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
+ case PictFilterBilinear:
+ return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT |
+ FILTER_LINEAR << SS2_MIN_FILTER_SHIFT |
+ MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
+ }
+}
+
+static bool gen3_check_filter(uint32_t filter)
+{
+ switch (filter) {
+ case PictFilterNearest:
+ case PictFilterBilinear:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static inline void
+gen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY)
+{
+ OUT_VERTEX(dstX);
+ OUT_VERTEX(dstY);
+}
+
+fastcall static void
+gen3_emit_composite_primitive_constant(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int16_t dst_x = r->dst.x + op->dst.x;
+ int16_t dst_y = r->dst.y + op->dst.y;
+
+ gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
+}
+
+fastcall static void
+gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int16_t dst_x, dst_y;
+ int16_t src_x, src_y;
+
+ dst_x = r->dst.x + op->dst.x;
+ dst_y = r->dst.y + op->dst.y;
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+
+ gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
+ OUT_VERTEX(src_x + r->width);
+ OUT_VERTEX(src_y + r->height);
+
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
+ OUT_VERTEX(src_x);
+ OUT_VERTEX(src_y + r->height);
+
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
+ OUT_VERTEX(src_x);
+ OUT_VERTEX(src_y);
+}
+
+fastcall static void
+gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ PictTransform *transform = op->src.transform;
+ int16_t dst_x, dst_y;
+ int16_t src_x, src_y;
+ float sx, sy;
+
+ dst_x = r->dst.x + op->dst.x;
+ dst_y = r->dst.y + op->dst.y;
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+
+ sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
+ transform,
+ &sx, &sy);
+ gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
+ OUT_VERTEX(sx);
+ OUT_VERTEX(sy);
+
+ sna_get_transformed_coordinates(src_x, src_y + r->height,
+ transform,
+ &sx, &sy);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
+ OUT_VERTEX(sx);
+ OUT_VERTEX(sy);
+
+ sna_get_transformed_coordinates(src_x, src_y,
+ transform,
+ &sx, &sy);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
+ OUT_VERTEX(sx);
+ OUT_VERTEX(sy);
+}
+
+fastcall static void
+gen3_emit_composite_primitive_identity_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float w = r->width;
+ float h = r->height;
+ float *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
+
+ v[8] = v[4] = r->dst.x + op->dst.x;
+ v[0] = v[4] + w;
+
+ v[9] = r->dst.y + op->dst.y;
+ v[5] = v[1] = v[9] + h;
+
+ v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
+ v[2] = v[6] + w * op->src.scale[0];
+
+ v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
+ v[7] = v[3] = v[11] + h * op->src.scale[1];
+}
+
+fastcall static void
+gen3_emit_composite_primitive_affine_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ PictTransform *transform = op->src.transform;
+ int16_t dst_x = r->dst.x + op->dst.x;
+ int16_t dst_y = r->dst.y + op->dst.y;
+ int src_x = r->src.x + (int)op->src.offset[0];
+ int src_y = r->src.y + (int)op->src.offset[1];
+ float sx, sy;
+
+ _sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
+ transform,
+ &sx, &sy);
+
+ gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
+ OUT_VERTEX(sx * op->src.scale[0]);
+ OUT_VERTEX(sy * op->src.scale[1]);
+
+ _sna_get_transformed_coordinates(src_x, src_y + r->height,
+ transform,
+ &sx, &sy);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
+ OUT_VERTEX(sx * op->src.scale[0]);
+ OUT_VERTEX(sy * op->src.scale[1]);
+
+ _sna_get_transformed_coordinates(src_x, src_y,
+ transform,
+ &sx, &sy);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
+ OUT_VERTEX(sx * op->src.scale[0]);
+ OUT_VERTEX(sy * op->src.scale[1]);
+}
+
+fastcall static void
+gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float w = r->width;
+ float h = r->height;
+ float *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
+
+ v[8] = v[4] = r->dst.x + op->dst.x;
+ v[0] = v[4] + w;
+
+ v[9] = r->dst.y + op->dst.y;
+ v[5] = v[1] = v[9] + h;
+
+ v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
+ v[2] = v[6] + w * op->mask.scale[0];
+
+ v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
+ v[7] = v[3] = v[11] + h * op->mask.scale[1];
+}
+
+fastcall static void
+gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float dst_x, dst_y;
+ float src_x, src_y;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ dst_x = r->dst.x + op->dst.x;
+ dst_y = r->dst.y + op->dst.y;
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 18;
+
+ v[0] = dst_x + w;
+ v[1] = dst_y + h;
+ v[2] = (src_x + w) * op->src.scale[0];
+ v[3] = (src_y + h) * op->src.scale[1];
+ v[4] = (msk_x + w) * op->mask.scale[0];
+ v[5] = (msk_y + h) * op->mask.scale[1];
+
+ v[6] = dst_x;
+ v[7] = v[1];
+ v[8] = src_x * op->src.scale[0];
+ v[9] = v[3];
+ v[10] = msk_x * op->mask.scale[0];
+ v[11] =v[5];
+
+ v[12] = v[6];
+ v[13] = dst_y;
+ v[14] = v[8];
+ v[15] = src_y * op->src.scale[1];
+ v[16] = v[10];
+ v[17] = msk_y * op->mask.scale[1];
+}
+
+fastcall static void
+gen3_emit_composite_primitive_affine_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int16_t src_x, src_y;
+ float dst_x, dst_y;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ dst_x = r->dst.x + op->dst.x;
+ dst_y = r->dst.y + op->dst.y;
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 18;
+
+ v[0] = dst_x + w;
+ v[1] = dst_y + h;
+ sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
+ op->src.transform,
+ &v[2], &v[3]);
+ v[2] *= op->src.scale[0];
+ v[3] *= op->src.scale[1];
+ v[4] = (msk_x + w) * op->mask.scale[0];
+ v[5] = (msk_y + h) * op->mask.scale[1];
+
+ v[6] = dst_x;
+ v[7] = v[1];
+ sna_get_transformed_coordinates(src_x, src_y + r->height,
+ op->src.transform,
+ &v[8], &v[9]);
+ v[8] *= op->src.scale[0];
+ v[9] *= op->src.scale[1];
+ v[10] = msk_x * op->mask.scale[0];
+ v[11] =v[5];
+
+ v[12] = v[6];
+ v[13] = dst_y;
+ sna_get_transformed_coordinates(src_x, src_y,
+ op->src.transform,
+ &v[14], &v[15]);
+ v[14] *= op->src.scale[0];
+ v[15] *= op->src.scale[1];
+ v[16] = v[10];
+ v[17] = msk_y * op->mask.scale[1];
+}
+
+static void
+gen3_emit_composite_texcoord(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int16_t x, int16_t y)
+{
+ float s = 0, t = 0, w = 1;
+
+ switch (channel->gen3.type) {
+ case SHADER_OPACITY:
+ case SHADER_NONE:
+ case SHADER_ZERO:
+ case SHADER_CONSTANT:
+ break;
+
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ case SHADER_TEXTURE:
+ x += channel->offset[0];
+ y += channel->offset[1];
+ if (channel->is_affine) {
+ sna_get_transformed_coordinates(x, y,
+ channel->transform,
+ &s, &t);
+ OUT_VERTEX(s * channel->scale[0]);
+ OUT_VERTEX(t * channel->scale[1]);
+ } else {
+ sna_get_transformed_coordinates_3d(x, y,
+ channel->transform,
+ &s, &t, &w);
+ OUT_VERTEX(s * channel->scale[0]);
+ OUT_VERTEX(t * channel->scale[1]);
+ OUT_VERTEX(0);
+ OUT_VERTEX(w);
+ }
+ break;
+ }
+}
+
+static void
+gen3_emit_composite_vertex(struct sna *sna,
+ const struct sna_composite_op *op,
+ int16_t srcX, int16_t srcY,
+ int16_t maskX, int16_t maskY,
+ int16_t dstX, int16_t dstY)
+{
+ gen3_emit_composite_dstcoord(sna, dstX, dstY);
+ gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY);
+ gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY);
+}
+
+fastcall static void
+gen3_emit_composite_primitive(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ gen3_emit_composite_vertex(sna, op,
+ r->src.x + r->width,
+ r->src.y + r->height,
+ r->mask.x + r->width,
+ r->mask.y + r->height,
+ op->dst.x + r->dst.x + r->width,
+ op->dst.y + r->dst.y + r->height);
+ gen3_emit_composite_vertex(sna, op,
+ r->src.x,
+ r->src.y + r->height,
+ r->mask.x,
+ r->mask.y + r->height,
+ op->dst.x + r->dst.x,
+ op->dst.y + r->dst.y + r->height);
+ gen3_emit_composite_vertex(sna, op,
+ r->src.x,
+ r->src.y,
+ r->mask.x,
+ r->mask.y,
+ op->dst.x + r->dst.x,
+ op->dst.y + r->dst.y);
+}
+
+static inline void
+gen3_2d_perspective(struct sna *sna, int in, int out)
+{
+ gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
+ gen3_fs_mul(out,
+ gen3_fs_operand(in, X, Y, ZERO, ONE),
+ gen3_fs_operand_reg(out));
+}
+
+static inline void
+gen3_linear_coord(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int in, int out)
+{
+ int c = channel->gen3.constants;
+
+ if (!channel->is_affine) {
+ gen3_2d_perspective(sna, in, FS_U0);
+ in = FS_U0;
+ }
+
+ gen3_fs_mov(out, gen3_fs_operand_zero());
+ gen3_fs_dp3(out, MASK_X,
+ gen3_fs_operand(in, X, Y, ONE, ZERO),
+ gen3_fs_operand_reg(c));
+}
+
+static void
+gen3_radial_coord(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int in, int out)
+{
+ int c = channel->gen3.constants;
+
+ if (!channel->is_affine) {
+ gen3_2d_perspective(sna, in, FS_U0);
+ in = FS_U0;
+ }
+
+ switch (channel->gen3.mode) {
+ case RADIAL_ONE:
+ /*
+ pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
+ r² = pdx*pdx + pdy*pdy
+ t = r²/sqrt(r²) - r1/dr;
+ */
+ gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
+ gen3_fs_operand(in, X, Y, ZERO, ZERO),
+ gen3_fs_operand(c, Z, Z, ZERO, ZERO),
+ gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
+ gen3_fs_dp2add(FS_U0, MASK_X,
+ gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
+ gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
+ gen3_fs_operand_zero());
+ gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
+ gen3_fs_mad(out, 0,
+ gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
+ gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
+ gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
+ break;
+
+ case RADIAL_TWO:
+ /*
+ pdx = x - c1x, pdy = y - c1y;
+ A = dx² + dy² - dr²
+ B = -2*(pdx*dx + pdy*dy + r1*dr);
+ C = pdx² + pdy² - r1²;
+ det = B*B - 4*A*C;
+ t = (-B + sqrt (det)) / (2 * A)
+ */
+
+ /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
+ gen3_fs_add(FS_U0,
+ gen3_fs_operand(in, X, Y, ZERO, ZERO),
+ gen3_fs_operand(c, X, Y, Z, ZERO));
+ /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
+ gen3_fs_dp3(FS_U0, MASK_W,
+ gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
+ gen3_fs_operand(c+1, X, Y, Z, ZERO));
+ /* u1.x = pdx² + pdy² - r1²; [C] */
+ gen3_fs_dp3(FS_U1, MASK_X,
+ gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
+ gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
+ /* u1.x = C, u1.y = B, u1.z=-4*A; */
+ gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
+ gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
+ /* u1.x = B² - 4*A*C */
+ gen3_fs_dp2add(FS_U1, MASK_X,
+ gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
+ gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
+ gen3_fs_operand_zero());
+ /* out.x = -B + sqrt (B² - 4*A*C), */
+ gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
+ gen3_fs_mad(out, MASK_X,
+ gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
+ gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
+ gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
+ /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */
+ gen3_fs_mul(out,
+ gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
+ gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
+ break;
+ }
+}
+
+static void
+gen3_composite_emit_shader(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint8_t blend)
+{
+ Bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
+ const struct sna_composite_channel *src, *mask;
+ struct gen3_render_state *state = &sna->render_state.gen3;
+ uint32_t shader_offset, id;
+ int src_reg, mask_reg;
+ int t, length;
+
+ src = &op->src;
+ mask = &op->mask;
+ if (mask->gen3.type == SHADER_NONE)
+ mask = NULL;
+
+ if (mask && src->is_opaque &&
+ gen3_blend_op[blend].src_alpha &&
+ op->has_component_alpha) {
+ src = mask;
+ mask = NULL;
+ }
+
+ id = (src->gen3.type |
+ src->is_affine << 4 |
+ src->alpha_fixup << 5 |
+ src->rb_reversed << 6);
+ if (mask) {
+ id |= (mask->gen3.type << 8 |
+ mask->is_affine << 12 |
+ gen3_blend_op[blend].src_alpha << 13 |
+ op->has_component_alpha << 14 |
+ mask->alpha_fixup << 15 |
+ mask->rb_reversed << 16);
+ }
+ id |= dst_is_alpha << 24;
+ id |= op->rb_reversed << 25;
+
+ if (id == state->last_shader)
+ return;
+
+ state->last_shader = id;
+
+ shader_offset = sna->kgem.nbatch++;
+ t = 0;
+ switch (src->gen3.type) {
+ case SHADER_NONE:
+ case SHADER_OPACITY:
+ assert(0);
+ case SHADER_ZERO:
+ break;
+ case SHADER_CONSTANT:
+ gen3_fs_dcl(FS_T8);
+ src_reg = FS_T8;
+ break;
+ case SHADER_TEXTURE:
+ case SHADER_RADIAL:
+ case SHADER_LINEAR:
+ gen3_fs_dcl(FS_S0);
+ gen3_fs_dcl(FS_T0);
+ t++;
+ break;
+ }
+
+ if (mask == NULL) {
+ if (src->gen3.type == SHADER_ZERO) {
+ gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
+ goto done;
+ }
+ if (src->alpha_fixup && dst_is_alpha) {
+ gen3_fs_mov(FS_OC, gen3_fs_operand_one());
+ goto done;
+ }
+ /* No mask, so load directly to output color */
+ if (src->gen3.type != SHADER_CONSTANT) {
+ if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
+ src_reg = FS_R0;
+ else
+ src_reg = FS_OC;
+ }
+ switch (src->gen3.type) {
+ case SHADER_LINEAR:
+ gen3_linear_coord(sna, src, FS_T0, FS_R0);
+ gen3_fs_texld(src_reg, FS_S0, FS_R0);
+ break;
+
+ case SHADER_RADIAL:
+ gen3_radial_coord(sna, src, FS_T0, FS_R0);
+ gen3_fs_texld(src_reg, FS_S0, FS_R0);
+ break;
+
+ case SHADER_TEXTURE:
+ if (src->is_affine)
+ gen3_fs_texld(src_reg, FS_S0, FS_T0);
+ else
+ gen3_fs_texldp(src_reg, FS_S0, FS_T0);
+ break;
+
+ case SHADER_NONE:
+ case SHADER_CONSTANT:
+ case SHADER_ZERO:
+ break;
+ }
+
+ if (src_reg != FS_OC) {
+ if (src->alpha_fixup)
+ gen3_fs_mov(FS_OC,
+ src->rb_reversed ^ op->rb_reversed ?
+ gen3_fs_operand(src_reg, Z, Y, X, ONE) :
+ gen3_fs_operand(src_reg, X, Y, Z, ONE));
+ else if (dst_is_alpha)
+ gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
+ else if (src->rb_reversed ^ op->rb_reversed)
+ gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
+ else
+ gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
+ } else if (src->alpha_fixup)
+ gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
+ } else {
+ int out_reg = FS_OC;
+ if (op->rb_reversed)
+ out_reg = FS_U0;
+
+ switch (mask->gen3.type) {
+ case SHADER_CONSTANT:
+ gen3_fs_dcl(FS_T9);
+ mask_reg = FS_T9;
+ break;
+ case SHADER_TEXTURE:
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ gen3_fs_dcl(FS_S0 + t);
+ case SHADER_OPACITY:
+ gen3_fs_dcl(FS_T0 + t);
+ break;
+ case SHADER_NONE:
+ case SHADER_ZERO:
+ assert(0);
+ break;
+ }
+
+ t = 0;
+ switch (src->gen3.type) {
+ case SHADER_LINEAR:
+ gen3_linear_coord(sna, src, FS_T0, FS_R0);
+ gen3_fs_texld(FS_R0, FS_S0, FS_R0);
+ src_reg = FS_R0;
+ t++;
+ break;
+
+ case SHADER_RADIAL:
+ gen3_radial_coord(sna, src, FS_T0, FS_R0);
+ gen3_fs_texld(FS_R0, FS_S0, FS_R0);
+ src_reg = FS_R0;
+ t++;
+ break;
+
+ case SHADER_TEXTURE:
+ if (src->is_affine)
+ gen3_fs_texld(FS_R0, FS_S0, FS_T0);
+ else
+ gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
+ src_reg = FS_R0;
+ t++;
+ break;
+
+ case SHADER_CONSTANT:
+ case SHADER_NONE:
+ case SHADER_ZERO:
+ break;
+ }
+ if (src->alpha_fixup)
+ gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
+ if (src->rb_reversed)
+ gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
+
+ switch (mask->gen3.type) {
+ case SHADER_LINEAR:
+ gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
+ gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
+ mask_reg = FS_R1;
+ break;
+
+ case SHADER_RADIAL:
+ gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
+ gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
+ mask_reg = FS_R1;
+ break;
+
+ case SHADER_TEXTURE:
+ if (mask->is_affine)
+ gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
+ else
+ gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
+ mask_reg = FS_R1;
+ break;
+
+ case SHADER_OPACITY:
+ if (dst_is_alpha) {
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, W, W, W, W),
+ gen3_fs_operand(FS_T0 + t, X, X, X, X));
+ } else {
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, X, Y, Z, W),
+ gen3_fs_operand(FS_T0 + t, X, X, X, X));
+ }
+ goto mask_done;
+
+ case SHADER_CONSTANT:
+ case SHADER_NONE:
+ case SHADER_ZERO:
+ break;
+ }
+ if (mask->alpha_fixup)
+ gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
+ if (mask->rb_reversed)
+ gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
+
+ if (dst_is_alpha) {
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, W, W, W, W),
+ gen3_fs_operand(mask_reg, W, W, W, W));
+ } else {
+ /* If component alpha is active in the mask and the blend
+ * operation uses the source alpha, then we know we don't
+ * need the source value (otherwise we would have hit a
+ * fallback earlier), so we provide the source alpha (src.A *
+ * mask.X) as output color.
+ * Conversely, if CA is set and we don't need the source alpha,
+ * then we produce the source value (src.X * mask.X) and the
+ * source alpha is unused. Otherwise, we provide the non-CA
+ * source value (src.X * mask.A).
+ */
+ if (op->has_component_alpha) {
+ if (gen3_blend_op[blend].src_alpha)
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, W, W, W, W),
+ gen3_fs_operand_reg(mask_reg));
+ else
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand_reg(src_reg),
+ gen3_fs_operand_reg(mask_reg));
+ } else {
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand_reg(src_reg),
+ gen3_fs_operand(mask_reg, W, W, W, W));
+ }
+ }
+mask_done:
+ if (op->rb_reversed)
+ gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
+ }
+
+done:
+ length = sna->kgem.nbatch - shader_offset;
+ sna->kgem.batch[shader_offset] =
+ _3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
+}
+
+static uint32_t gen3_ms_tiling(uint32_t tiling)
+{
+ uint32_t v = 0;
+ switch (tiling) {
+ case I915_TILING_Y: v |= MS3_TILE_WALK;
+ case I915_TILING_X: v |= MS3_TILED_SURFACE;
+ case I915_TILING_NONE: break;
+ }
+ return v;
+}
+
+static void gen3_emit_invariant(struct sna *sna)
+{
+ /* Disable independent alpha blend */
+ OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
+ IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
+ IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
+ IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
+
+ OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
+ CSB_TCB(0, 0) |
+ CSB_TCB(1, 1) |
+ CSB_TCB(2, 2) |
+ CSB_TCB(3, 3) |
+ CSB_TCB(4, 4) |
+ CSB_TCB(5, 5) |
+ CSB_TCB(6, 6) |
+ CSB_TCB(7, 7));
+
+ OUT_BATCH(_3DSTATE_MODES_4_CMD |
+ ENABLE_LOGIC_OP_FUNC |
+ LOGIC_OP_FUNC(LOGICOP_COPY));
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+ OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */
+ OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
+ S4_LINE_WIDTH_ONE |
+ S4_CULLMODE_NONE |
+ S4_VFMT_XY);
+ OUT_BATCH(0x00000000); /* Stencil. */
+
+ OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+ OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
+
+ OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
+ OUT_BATCH(0x00000000);
+
+ OUT_BATCH(_3DSTATE_STIPPLE);
+ OUT_BATCH(0x00000000);
+
+ sna->render_state.gen3.need_invariant = FALSE;
+}
+
+static void
+gen3_get_batch(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+#define MAX_OBJECTS 3 /* worst case: dst + src + mask */
+
+ kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ if (!kgem_check_batch(&sna->kgem, 200)) {
+ DBG(("%s: flushing batch: size %d > %d\n",
+ __FUNCTION__, 200,
+ sna->kgem.surface-sna->kgem.nbatch));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS) {
+ DBG(("%s: flushing batch: reloc %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nreloc,
+ (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1) {
+ DBG(("%s: flushing batch: exec %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nexec,
+ (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen3.need_invariant)
+ gen3_emit_invariant(sna);
+#undef MAX_OBJECTS
+}
+
+static void gen3_emit_composite_state(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ struct gen3_render_state *state = &sna->render_state.gen3;
+ uint32_t map[4];
+ uint32_t sampler[4];
+ struct kgem_bo *bo[2];
+ int tex_count, n;
+ uint32_t ss2;
+
+ gen3_get_batch(sna, op);
+
+ /* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
+ if (op->dst.bo->unique_id != state->current_dst) {
+ uint32_t v;
+
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(BUF_3D_ID_COLOR_BACK |
+ gen3_buf_tiling(op->dst.bo->tiling) |
+ op->dst.bo->pitch);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ op->dst.bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER,
+ 0));
+
+ OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+ OUT_BATCH(gen3_get_dst_format(op->dst.format));
+
+ v = (DRAW_YMAX(op->dst.height - 1) |
+ DRAW_XMAX(op->dst.width - 1));
+ if (v != state->last_drawrect_limit) {
+ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(v);
+ OUT_BATCH(0);
+ state->last_drawrect_limit = v;
+ }
+
+ state->current_dst = op->dst.bo->unique_id;
+ }
+ kgem_bo_mark_dirty(op->dst.bo);
+
+ ss2 = ~0;
+ tex_count = 0;
+ switch (op->src.gen3.type) {
+ case SHADER_OPACITY:
+ case SHADER_NONE:
+ assert(0);
+ case SHADER_ZERO:
+ break;
+ case SHADER_CONSTANT:
+ if (op->src.gen3.mode != state->last_diffuse) {
+ OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+ OUT_BATCH(op->src.gen3.mode);
+ state->last_diffuse = op->src.gen3.mode;
+ }
+ break;
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ case SHADER_TEXTURE:
+ ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
+ ss2 |= S2_TEXCOORD_FMT(tex_count,
+ op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
+ map[tex_count * 2 + 0] =
+ op->src.card_format |
+ gen3_ms_tiling(op->src.bo->tiling) |
+ (op->src.height - 1) << MS3_HEIGHT_SHIFT |
+ (op->src.width - 1) << MS3_WIDTH_SHIFT;
+ map[tex_count * 2 + 1] =
+ (op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
+
+ sampler[tex_count * 2 + 0] = op->src.filter;
+ sampler[tex_count * 2 + 1] =
+ op->src.repeat |
+ tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
+ bo[tex_count] = op->src.bo;
+ tex_count++;
+ break;
+ }
+ switch (op->mask.gen3.type) {
+ case SHADER_NONE:
+ case SHADER_ZERO:
+ break;
+ case SHADER_CONSTANT:
+ if (op->mask.gen3.mode != state->last_specular) {
+ OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+ OUT_BATCH(op->mask.gen3.mode);
+ state->last_specular = op->mask.gen3.mode;
+ }
+ break;
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ case SHADER_TEXTURE:
+ ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
+ ss2 |= S2_TEXCOORD_FMT(tex_count,
+ op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
+ map[tex_count * 2 + 0] =
+ op->mask.card_format |
+ gen3_ms_tiling(op->mask.bo->tiling) |
+ (op->mask.height - 1) << MS3_HEIGHT_SHIFT |
+ (op->mask.width - 1) << MS3_WIDTH_SHIFT;
+ map[tex_count * 2 + 1] =
+ (op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
+
+ sampler[tex_count * 2 + 0] = op->mask.filter;
+ sampler[tex_count * 2 + 1] =
+ op->mask.repeat |
+ tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
+ bo[tex_count] = op->mask.bo;
+ tex_count++;
+ break;
+ case SHADER_OPACITY:
+ ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
+ ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
+ break;
+ }
+
+ {
+ uint32_t blend_offset = sna->kgem.nbatch;
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
+ OUT_BATCH(ss2);
+ OUT_BATCH(gen3_get_blend_cntl(op->op,
+ op->has_component_alpha,
+ op->dst.format));
+
+ if (memcmp(sna->kgem.batch + state->last_blend + 1,
+ sna->kgem.batch + blend_offset + 1,
+ 2 * 4) == 0)
+ sna->kgem.nbatch = blend_offset;
+ else
+ state->last_blend = blend_offset;
+ }
+
+ if (op->u.gen3.num_constants) {
+ int count = op->u.gen3.num_constants;
+ if (state->last_constants) {
+ int last = sna->kgem.batch[state->last_constants+1];
+ if (last == (1 << (count >> 2)) - 1 &&
+ memcmp(&sna->kgem.batch[state->last_constants+2],
+ op->u.gen3.constants,
+ count * sizeof(uint32_t)) == 0)
+ count = 0;
+ }
+ if (count) {
+ state->last_constants = sna->kgem.nbatch;
+ OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
+ OUT_BATCH((1 << (count >> 2)) - 1);
+
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ op->u.gen3.constants,
+ count * sizeof(uint32_t));
+ sna->kgem.nbatch += count;
+ }
+ }
+
+ if (tex_count != 0) {
+ uint32_t rewind;
+
+ n = 0;
+ if (tex_count == state->tex_count) {
+ for (; n < tex_count; n++) {
+ if (map[2*n+0] != state->tex_map[2*n+0] ||
+ map[2*n+1] != state->tex_map[2*n+1] ||
+ state->tex_handle[n] != bo[n]->handle ||
+ state->tex_delta[n] != bo[n]->delta)
+ break;
+ }
+ }
+ if (n < tex_count) {
+ OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
+ OUT_BATCH((1 << tex_count) - 1);
+ for (n = 0; n < tex_count; n++) {
+ OUT_BATCH(kgem_add_reloc(&sna->kgem,
+ sna->kgem.nbatch,
+ bo[n],
+ I915_GEM_DOMAIN_SAMPLER<< 16,
+ 0));
+ OUT_BATCH(map[2*n + 0]);
+ OUT_BATCH(map[2*n + 1]);
+
+ state->tex_map[2*n+0] = map[2*n+0];
+ state->tex_map[2*n+1] = map[2*n+1];
+ state->tex_handle[n] = bo[n]->handle;
+ state->tex_delta[n] = bo[n]->delta;
+ }
+ state->tex_count = n;
+ }
+
+ rewind = sna->kgem.nbatch;
+ OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
+ OUT_BATCH((1 << tex_count) - 1);
+ for (n = 0; n < tex_count; n++) {
+ OUT_BATCH(sampler[2*n + 0]);
+ OUT_BATCH(sampler[2*n + 1]);
+ OUT_BATCH(0);
+ }
+ if (state->last_sampler &&
+ memcmp(&sna->kgem.batch[state->last_sampler+1],
+ &sna->kgem.batch[rewind + 1],
+ (3*tex_count + 1)*sizeof(uint32_t)) == 0)
+ sna->kgem.nbatch = rewind;
+ else
+ state->last_sampler = rewind;
+ }
+
+ gen3_composite_emit_shader(sna, op, op->op);
+}
+
+static void gen3_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ if (!op->need_magic_ca_pass)
+ return;
+
+ DBG(("%s(%d)\n", __FUNCTION__,
+ sna->render.vertex_index - sna->render.vertex_start));
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
+ OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, TRUE, op->dst.format));
+ gen3_composite_emit_shader(sna, op, PictOpAdd);
+
+ OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
+ (sna->render.vertex_index - sna->render.vertex_start));
+ OUT_BATCH(sna->render.vertex_start);
+}
+
+static void gen3_vertex_flush(struct sna *sna)
+{
+ if (sna->render_state.gen3.vertex_offset == 0 ||
+ sna->render.vertex_index == sna->render.vertex_start)
+ return;
+
+ DBG(("%s[%x] = %d\n", __FUNCTION__,
+ 4*sna->render_state.gen3.vertex_offset,
+ sna->render.vertex_index - sna->render.vertex_start));
+
+ sna->kgem.batch[sna->render_state.gen3.vertex_offset] =
+ PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
+ (sna->render.vertex_index - sna->render.vertex_start);
+ sna->kgem.batch[sna->render_state.gen3.vertex_offset + 1] =
+ sna->render.vertex_start;
+
+ if (sna->render.op)
+ gen3_magic_ca_pass(sna, sna->render.op);
+
+ sna->render_state.gen3.vertex_offset = 0;
+}
+
+static void gen3_vertex_finish(struct sna *sna, Bool last)
+{
+ struct kgem_bo *bo;
+ int delta;
+
+ DBG(("%s: last? %d\n", __FUNCTION__, last));
+
+ gen3_vertex_flush(sna);
+ if (!sna->render.vertex_used)
+ return;
+
+ if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
+ DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
+ sna->render.vertex_used, sna->kgem.nbatch));
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ sna->render.vertex_data,
+ sna->render.vertex_used * 4);
+ delta = sna->kgem.nbatch * 4;
+ bo = NULL;
+ sna->kgem.nbatch += sna->render.vertex_used;
+ } else {
+ bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used);
+ if (bo && !kgem_bo_write(&sna->kgem, bo,
+ sna->render.vertex_data,
+ 4*sna->render.vertex_used)) {
+ kgem_bo_destroy(&sna->kgem, bo);
+ return;
+ }
+ delta = 0;
+ DBG(("%s: new vbo: %d\n", __FUNCTION__,
+ sna->render.vertex_used));
+ }
+
+ DBG(("%s: reloc = %d\n", __FUNCTION__,
+ sna->render.vertex_reloc[0]));
+
+ sna->kgem.batch[sna->render.vertex_reloc[0]] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[0],
+ bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta);
+ sna->render.vertex_reloc[0] = 0;
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+
+ if (bo)
+ kgem_bo_destroy(&sna->kgem, bo);
+}
+
+static bool gen3_rectangle_begin(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int ndwords, i1_cmd = 0, i1_len = 0;
+ struct gen3_render_state *state = &sna->render_state.gen3;
+
+ ndwords = 0;
+ if (state->vertex_offset == 0) {
+ ndwords += 2;
+ if (op->need_magic_ca_pass)
+ ndwords += 100;
+ }
+ if (sna->render.vertex_reloc[0] == 0)
+ i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
+ if (state->floats_per_vertex != op->floats_per_vertex)
+ i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
+ if (ndwords == 0)
+ return true;
+
+ if (!kgem_check_batch(&sna->kgem, ndwords+1))
+ return false;
+
+ if (i1_cmd) {
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
+ if (sna->render.vertex_reloc[0] == 0)
+ sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
+ if (state->floats_per_vertex != op->floats_per_vertex) {
+ state->floats_per_vertex = op->floats_per_vertex;
+ OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
+ state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
+ }
+ }
+
+ if (state->vertex_offset == 0) {
+ if (sna->kgem.nbatch == 2 + state->last_vertex_offset) {
+ state->vertex_offset = state->last_vertex_offset;
+ } else {
+ state->vertex_offset = sna->kgem.nbatch;
+ OUT_BATCH(MI_NOOP); /* to be filled later */
+ OUT_BATCH(MI_NOOP);
+ sna->render.vertex_start = sna->render.vertex_index;
+ state->last_vertex_offset = state->vertex_offset;
+ }
+ }
+
+ return true;
+}
+
+static int gen3_get_rectangles__flush(struct sna *sna, bool ca)
+{
+ if (!kgem_check_batch(&sna->kgem, ca ? 105: 5))
+ return 0;
+ if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 2)
+ return 0;
+ if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1)
+ return 0;
+
+ gen3_vertex_finish(sna, FALSE);
+ assert(sna->render.vertex_index == 0);
+ assert(sna->render.vertex_used == 0);
+ return ARRAY_SIZE(sna->render.vertex_data);
+}
+
+inline static int gen3_get_rectangles(struct sna *sna,
+ const struct sna_composite_op *op,
+ int want)
+{
+ int rem = vertex_space(sna);
+
+ DBG(("%s: want=%d, rem=%d\n",
+ __FUNCTION__, 3*want*op->floats_per_vertex, rem));
+
+ assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
+ if (op->floats_per_vertex*3 > rem) {
+ DBG(("flushing vbo for %s: %d < %d\n",
+ __FUNCTION__, rem, 3*op->floats_per_vertex));
+ rem = gen3_get_rectangles__flush(sna, op->need_magic_ca_pass);
+ if (rem == 0)
+ return 0;
+ }
+
+ if (!gen3_rectangle_begin(sna, op)) {
+ DBG(("%s: flushing batch\n", __FUNCTION__));
+ return 0;
+ }
+
+ if (want > 1 && want * op->floats_per_vertex*3 > rem)
+ want = rem / (3*op->floats_per_vertex);
+ sna->render.vertex_index += 3*want;
+
+ assert(want);
+ assert(sna->render.vertex_index * op->floats_per_vertex <= ARRAY_SIZE(sna->render.vertex_data));
+ return want;
+}
+
+fastcall static void
+gen3_render_composite_blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
+ r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
+ r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
+ r->dst.x, r->dst.y, op->dst.x, op->dst.y,
+ r->width, r->height));
+
+ if (!gen3_get_rectangles(sna, op, 1)) {
+ gen3_emit_composite_state(sna, op);
+ gen3_get_rectangles(sna, op, 1);
+ }
+
+ op->prim_emit(sna, op, r);
+}
+
+static void
+gen3_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->src.offset[0], op->src.offset[1],
+ op->mask.offset[0], op->mask.offset[1],
+ op->dst.x, op->dst.y));
+
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = gen3_get_rectangles(sna, op, nbox);
+ if (nbox_this_time == 0) {
+ gen3_emit_composite_state(sna, op);
+ nbox_this_time = gen3_get_rectangles(sna, op, nbox);
+ }
+ nbox -= nbox_this_time;
+
+ do {
+ struct sna_composite_rectangles r;
+
+ DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1));
+
+ r.dst.x = box->x1; r.dst.y = box->y1;
+ r.width = box->x2 - box->x1;
+ r.height = box->y2 - box->y1;
+ r.src = r.mask = r.dst;
+
+ op->prim_emit(sna, op, &r);
+ box++;
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+static void
+gen3_render_composite_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ assert(sna->render.op == op);
+
+ gen3_vertex_flush(sna);
+ sna->render.op = NULL;
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ sna_render_composite_redirect_done(sna, op);
+
+ if (op->src.bo)
+ kgem_bo_destroy(&sna->kgem, op->src.bo);
+ if (op->mask.bo)
+ kgem_bo_destroy(&sna->kgem, op->mask.bo);
+}
+
+static void
+gen3_render_reset(struct sna *sna)
+{
+ struct gen3_render_state *state = &sna->render_state.gen3;
+
+ state->need_invariant = TRUE;
+ state->current_dst = 0;
+ state->tex_count = 0;
+ state->last_drawrect_limit = ~0U;
+ state->last_target = 0;
+ state->last_blend = 0;
+ state->last_constants = 0;
+ state->last_sampler = 0;
+ state->last_shader = 0;
+ state->last_diffuse = 0xcc00ffee;
+ state->last_specular = 0xcc00ffee;
+
+ state->floats_per_vertex = 0;
+ state->last_floats_per_vertex = 0;
+ state->last_vertex_offset = 0;
+ state->vertex_offset = 0;
+
+ assert(sna->render.vertex_used == 0);
+ assert(sna->render.vertex_index == 0);
+ assert(sna->render.vertex_reloc[0] == 0);
+}
+
+static Bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
+ CARD32 format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
+ if (gen3_tex_formats[i].fmt == format) {
+ channel->card_format = gen3_tex_formats[i].card_fmt;
+ channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static Bool source_is_covered(PicturePtr picture,
+ int x, int y,
+ int width, int height)
+{
+ int x1, y1, x2, y2;
+
+ if (picture->repeat && picture->repeatType != RepeatNone)
+ return TRUE;
+
+ if (picture->pDrawable == NULL)
+ return FALSE;
+
+ if (picture->transform) {
+ pixman_box16_t sample;
+
+ sample.x1 = x;
+ sample.y1 = y;
+ sample.x2 = x + width;
+ sample.y2 = y + height;
+
+ pixman_transform_bounds(picture->transform, &sample);
+
+ x1 = sample.x1;
+ x2 = sample.x2;
+ y1 = sample.y1;
+ y2 = sample.y2;
+ } else {
+ x1 = x;
+ y1 = y;
+ x2 = x + width;
+ y2 = y + height;
+ }
+
+ return
+ x1 >= 0 && y1 >= 0 &&
+ x2 <= picture->pDrawable->width &&
+ y2 <= picture->pDrawable->height;
+}
+
+static Bool gen3_composite_channel_set_xformat(PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int width, int height)
+{
+ int i;
+
+ if (PICT_FORMAT_A(picture->format) != 0)
+ return FALSE;
+
+ if (width == 0 || height == 0)
+ return FALSE;
+
+ if (!source_is_covered(picture, x, y, width, height))
+ return FALSE;
+
+ for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
+ if (gen3_tex_formats[i].xfmt == picture->format) {
+ channel->card_format = gen3_tex_formats[i].card_fmt;
+ channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
+ channel->alpha_fixup = true;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static int
+gen3_init_solid(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color)
+{
+ channel->gen3.mode = color;
+ channel->gen3.type = SHADER_CONSTANT;
+ if (color == 0)
+ channel->gen3.type = SHADER_ZERO;
+ if ((color & 0xff000000) == 0xff000000)
+ channel->is_opaque = true;
+
+ /* for consistency */
+ channel->repeat = RepeatNormal;
+ channel->filter = PictFilterNearest;
+ channel->pict_format = PICT_a8r8g8b8;
+ channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+
+ return 1;
+}
+
+static void gen3_composite_channel_convert(struct sna_composite_channel *channel)
+{
+ if (channel->gen3.type == SHADER_TEXTURE)
+ channel->repeat = gen3_texture_repeat(channel->repeat);
+ else
+ channel->repeat = gen3_gradient_repeat(channel->repeat);
+
+ channel->filter = gen3_filter(channel->filter);
+ if (channel->card_format == 0)
+ gen3_composite_channel_set_format(channel, channel->pict_format);
+}
+
+static Bool gen3_gradient_setup(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int16_t ox, int16_t oy)
+{
+ int16_t dx, dy;
+
+ if (picture->repeat == 0) {
+ channel->repeat = RepeatNone;
+ } else switch (picture->repeatType) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ channel->repeat = picture->repeatType;
+ break;
+ default:
+ return FALSE;
+ }
+
+ channel->bo =
+ sna_render_get_gradient(sna,
+ (PictGradient *)picture->pSourcePict);
+ if (channel->bo == NULL)
+ return FALSE;
+
+ channel->pict_format = PICT_a8r8g8b8;
+ channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+ channel->filter = PictFilterBilinear;
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ ox += dx;
+ oy += dy;
+ channel->transform = NULL;
+ } else
+ channel->transform = picture->transform;
+ channel->width = channel->bo->pitch / 4;
+ channel->height = 1;
+ channel->offset[0] = ox;
+ channel->offset[1] = oy;
+ channel->scale[0] = channel->scale[1] = 1;
+ return TRUE;
+}
+
+static int
+gen3_init_linear(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_op *op,
+ struct sna_composite_channel *channel,
+ int ox, int oy)
+{
+ PictLinearGradient *linear =
+ (PictLinearGradient *)picture->pSourcePict;
+ float x0, y0, sf;
+ float dx, dy, offset;
+ int n;
+
+ DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
+ __FUNCTION__,
+ xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
+ xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
+
+ if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
+ return 0;
+
+ dx = xFixedToDouble(linear->p2.x - linear->p1.x);
+ dy = xFixedToDouble(linear->p2.y - linear->p1.y);
+ sf = dx*dx + dy*dy;
+ dx /= sf;
+ dy /= sf;
+
+ x0 = xFixedToDouble(linear->p1.x);
+ y0 = xFixedToDouble(linear->p1.y);
+ offset = dx*x0 + dy*y0;
+
+ n = op->u.gen3.num_constants;
+ channel->gen3.constants = FS_C0 + n / 4;
+ op->u.gen3.constants[n++] = dx;
+ op->u.gen3.constants[n++] = dy;
+ op->u.gen3.constants[n++] = -offset;
+ op->u.gen3.constants[n++] = 0;
+
+ if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
+ return 0;
+
+ channel->gen3.type = SHADER_LINEAR;
+ op->u.gen3.num_constants = n;
+
+ DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n",
+ __FUNCTION__, dx, dy, -offset, channel->gen3.constants - FS_C0));
+ return 1;
+}
+
+static int
+gen3_init_radial(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_op *op,
+ struct sna_composite_channel *channel,
+ int ox, int oy)
+{
+ PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict;
+ double dx, dy, dr, r1;
+ int n;
+
+ dx = xFixedToDouble(radial->c2.x - radial->c1.x);
+ dy = xFixedToDouble(radial->c2.y - radial->c1.y);
+ dr = xFixedToDouble(radial->c2.radius - radial->c1.radius);
+
+ r1 = xFixedToDouble(radial->c1.radius);
+
+ n = op->u.gen3.num_constants;
+ channel->gen3.constants = FS_C0 + n / 4;
+ if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
+ if (radial->c2.radius == radial->c1.radius)
+ return 0;
+
+ op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr;
+ op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr;
+ op->u.gen3.constants[n++] = 1. / dr;
+ op->u.gen3.constants[n++] = -r1 / dr;
+
+ channel->gen3.mode = RADIAL_ONE;
+ } else {
+ op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x);
+ op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y);
+ op->u.gen3.constants[n++] = r1;
+ op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr);
+
+ op->u.gen3.constants[n++] = -2 * dx;
+ op->u.gen3.constants[n++] = -2 * dy;
+ op->u.gen3.constants[n++] = -2 * r1 * dr;
+ op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
+
+ channel->gen3.mode = RADIAL_TWO;
+ }
+
+ if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
+ return 0;
+
+ channel->gen3.type = SHADER_RADIAL;
+ op->u.gen3.num_constants = n;
+ return 1;
+}
+
+static Bool
+gen3_composite_picture(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_op *op,
+ struct sna_composite_channel *channel,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y)
+{
+ PixmapPtr pixmap;
+ uint32_t color;
+ int16_t dx, dy;
+
+ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ channel->card_format = 0;
+
+ if (picture->pDrawable == NULL) {
+ SourcePict *source = picture->pSourcePict;
+ int ret = 0;
+
+ switch (source->type) {
+ case SourcePictTypeSolidFill:
+ ret = gen3_init_solid(sna, channel,
+ source->solidFill.color);
+ break;
+
+ case SourcePictTypeLinear:
+ ret = gen3_init_linear(sna, picture, op, channel,
+ x - dst_x, y - dst_y);
+ break;
+
+ case SourcePictTypeRadial:
+ ret = gen3_init_radial(sna, picture, op, channel,
+ x - dst_x, y - dst_y);
+ break;
+ }
+
+ if (ret == 0)
+ ret = sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+ return ret;
+ }
+
+ if (sna_picture_is_solid(picture, &color))
+ return gen3_init_solid(sna, channel, color);
+
+ if (!gen3_check_repeat(picture->repeat))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen3_check_filter(picture->filter))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+ channel->filter = picture->filter;
+ channel->pict_format = picture->format;
+
+ pixmap = get_drawable_pixmap(picture->pDrawable);
+ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
+
+ x += dx + picture->pDrawable->x;
+ y += dy + picture->pDrawable->y;
+
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ x += dx;
+ y += dy;
+ channel->transform = NULL;
+ channel->filter = PictFilterNearest;
+ } else
+ channel->transform = picture->transform;
+
+ if (!gen3_composite_channel_set_format(channel, picture->format) &&
+ !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
+ return sna_render_picture_convert(sna, picture, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+
+ if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048)
+ return sna_render_picture_extract(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ return sna_render_pixmap_bo(sna, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+}
+
+static inline Bool
+picture_is_cpu(PicturePtr picture)
+{
+ if (!picture->pDrawable)
+ return FALSE;
+
+ /* If it is a solid, try to use the render paths */
+ if (picture->pDrawable->width == 1 &&
+ picture->pDrawable->height == 1 &&
+ picture->repeat)
+ return FALSE;
+
+ return is_cpu(picture->pDrawable);
+}
+
+static Bool
+try_blt(struct sna *sna,
+ PicturePtr dst,
+ PicturePtr source,
+ int width, int height)
+{
+ if (sna->kgem.mode == KGEM_BLT) {
+ DBG(("%s: already performing BLT\n", __FUNCTION__));
+ return TRUE;
+ }
+
+ if (width > 2048 || height > 2048) {
+ DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
+ __FUNCTION__, width, height));
+ return TRUE;
+ }
+
+ /* If we can sample directly from user-space, do so */
+ if (sna->kgem.has_vmap)
+ return FALSE;
+
+ /* is the source picture only in cpu memory e.g. a shm pixmap? */
+ return picture_is_cpu(source);
+}
+
+static void
+gen3_align_vertex(struct sna *sna,
+ struct sna_composite_op *op)
+{
+ if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
+ DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
+ sna->render_state.gen3.last_floats_per_vertex,
+ op->floats_per_vertex,
+ sna->render.vertex_index,
+ (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
+ sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
+ sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
+ sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
+ }
+}
+
+static Bool
+gen3_composite_set_target(struct sna *sna,
+ struct sna_composite_op *op,
+ PicturePtr dst)
+{
+ struct sna_pixmap *priv;
+
+ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+ op->dst.format = dst->format;
+ op->dst.width = op->dst.pixmap->drawable.width;
+ op->dst.height = op->dst.pixmap->drawable.height;
+ priv = sna_pixmap(op->dst.pixmap);
+
+ op->dst.bo = NULL;
+ if (priv && priv->gpu_bo == NULL) {
+ op->dst.bo = priv->cpu_bo;
+ op->damage = &priv->cpu_damage;
+ }
+ if (op->dst.bo == NULL) {
+ priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ op->dst.bo = priv->gpu_bo;
+ if (!priv->gpu_only)
+ op->damage = &priv->gpu_damage;
+ }
+
+ get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
+ &op->dst.x, &op->dst.y);
+
+ DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d)\n",
+ __FUNCTION__,
+ op->dst.pixmap, (int)op->dst.format,
+ op->dst.width, op->dst.height,
+ op->dst.bo->pitch,
+ op->dst.x, op->dst.y));
+
+ return TRUE;
+}
+
+static inline uint8_t mult(uint32_t s, uint32_t m, int shift)
+{
+ s = (s >> shift) & 0xff;
+ m = (m >> shift) & 0xff;
+ return (s * m) >> 8;
+}
+
+static Bool
+gen3_render_composite(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t mask_x, int16_t mask_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ DBG(("%s()\n", __FUNCTION__));
+
+#if NO_COMPOSITE
+ return sna_blt_composite(sna, op,
+ src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height, tmp);
+#endif
+
+ /* Try to use the BLT engine unless it implies a
+ * 3D -> 2D context switch.
+ */
+ if (mask == NULL &&
+ try_blt(sna, dst, src, width, height) &&
+ sna_blt_composite(sna,
+ op, src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height,
+ tmp))
+ return TRUE;
+
+ if (op >= ARRAY_SIZE(gen3_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (!gen3_check_dst_format(dst->format)) {
+ DBG(("%s: fallback due to unhandled dst format: %x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ if (need_tiling(sna, width, height))
+ return sna_tiling_composite(sna,
+ op, src, mask, dst,
+ src_x, src_y,
+ mask_x, mask_y,
+ dst_x, dst_y,
+ width, height,
+ tmp);
+
+ memset(&tmp->u.gen3, 0, sizeof(tmp->u.gen3));
+
+ if (!gen3_composite_set_target(sna, tmp, dst)) {
+ DBG(("%s: unable to set render target\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ tmp->op = op;
+ tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
+ if (tmp->dst.width > 2048 || tmp->dst.height > 2048 ||
+ !gen3_check_pitch_3d(tmp->dst.bo)) {
+ if (!sna_render_composite_redirect(sna, tmp,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ tmp->src.gen3.type = SHADER_TEXTURE;
+ tmp->src.is_affine = TRUE;
+ DBG(("%s: preparing source\n", __FUNCTION__));
+ switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_dst;
+ case 0:
+ tmp->src.gen3.type = SHADER_ZERO;
+ break;
+ case 1:
+ gen3_composite_channel_convert(&tmp->src);
+ break;
+ }
+ DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.gen3.type));
+
+ tmp->mask.gen3.type = SHADER_NONE;
+ tmp->mask.is_affine = TRUE;
+ tmp->need_magic_ca_pass = FALSE;
+ tmp->has_component_alpha = FALSE;
+ if (mask && tmp->src.gen3.type != SHADER_ZERO) {
+ tmp->mask.gen3.type = SHADER_TEXTURE;
+ DBG(("%s: preparing mask\n", __FUNCTION__));
+ switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
+ mask_x, mask_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_src;
+ case 0:
+ tmp->mask.gen3.type = SHADER_ZERO;
+ break;
+ case 1:
+ gen3_composite_channel_convert(&tmp->mask);
+ break;
+ }
+ DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.gen3.type));
+
+ if (tmp->mask.gen3.type == SHADER_ZERO) {
+ if (tmp->src.bo) {
+ kgem_bo_destroy(&sna->kgem,
+ tmp->src.bo);
+ tmp->src.bo = NULL;
+ }
+ tmp->src.gen3.type = SHADER_ZERO;
+ tmp->mask.gen3.type = SHADER_NONE;
+ }
+
+ if (tmp->mask.gen3.type != SHADER_NONE &&
+ mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
+ /* Check if it's component alpha that relies on a source alpha
+ * and on the source value. We can only get one of those
+ * into the single source value that we get to blend with.
+ */
+ tmp->has_component_alpha = TRUE;
+ if (tmp->mask.gen3.type == SHADER_CONSTANT &&
+ tmp->mask.gen3.mode == 0xffffffff) {
+ tmp->mask.gen3.type = SHADER_NONE;
+ tmp->has_component_alpha = FALSE;
+ } else if (tmp->src.gen3.type == SHADER_CONSTANT &&
+ tmp->src.gen3.mode == 0xffffffff) {
+ tmp->src = tmp->mask;
+ tmp->mask.gen3.type = SHADER_NONE;
+ tmp->mask.bo = NULL;
+ tmp->has_component_alpha = FALSE;
+ } else if (tmp->src.gen3.type == SHADER_CONSTANT &&
+ tmp->mask.gen3.type == SHADER_CONSTANT) {
+ uint32_t a,r,g,b;
+
+ a = mult(tmp->src.gen3.mode,
+ tmp->mask.gen3.mode,
+ 24);
+ r = mult(tmp->src.gen3.mode,
+ tmp->mask.gen3.mode,
+ 16);
+ g = mult(tmp->src.gen3.mode,
+ tmp->mask.gen3.mode,
+ 8);
+ b = mult(tmp->src.gen3.mode,
+ tmp->mask.gen3.mode,
+ 0);
+
+ DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
+ __FUNCTION__,
+ tmp->src.gen3.mode,
+ tmp->mask.gen3.mode,
+ a << 24 | r << 16 | g << 8 | b));
+
+ tmp->src.gen3.mode =
+ a << 24 | r << 16 | g << 8 | b;
+
+ tmp->mask.gen3.type = SHADER_NONE;
+ tmp->has_component_alpha = FALSE;
+ } else if (gen3_blend_op[op].src_alpha &&
+ (gen3_blend_op[op].src_blend != BLENDFACT_ZERO)) {
+ if (op != PictOpOver)
+ goto cleanup_mask;
+
+ tmp->need_magic_ca_pass = TRUE;
+ tmp->op = PictOpOutReverse;
+ sna->render.vertex_start = sna->render.vertex_index;
+ }
+ }
+ }
+ DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
+ tmp->src.gen3.type, tmp->mask.gen3.type,
+ tmp->src.is_affine, tmp->mask.is_affine));
+
+ tmp->prim_emit = gen3_emit_composite_primitive;
+ if (tmp->mask.gen3.type == SHADER_NONE ||
+ tmp->mask.gen3.type == SHADER_CONSTANT) {
+ switch (tmp->src.gen3.type) {
+ case SHADER_NONE:
+ case SHADER_CONSTANT:
+ tmp->prim_emit = gen3_emit_composite_primitive_constant;
+ break;
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
+ break;
+ case SHADER_TEXTURE:
+ if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
+ break;
+ }
+ } else if (tmp->mask.gen3.type == SHADER_TEXTURE) {
+ if (tmp->mask.transform == NULL) {
+ if (tmp->src.gen3.type == SHADER_CONSTANT)
+ tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
+ else if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
+ }
+ }
+
+ tmp->floats_per_vertex = 2;
+ if (tmp->src.gen3.type != SHADER_CONSTANT &&
+ tmp->src.gen3.type != SHADER_ZERO)
+ tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3;
+ if (tmp->mask.gen3.type != SHADER_NONE &&
+ tmp->mask.gen3.type != SHADER_CONSTANT)
+ tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3;
+ DBG(("%s: floats_per_vertex = 2 + %d + %d = %d\n", __FUNCTION__,
+ (tmp->src.gen3.type != SHADER_CONSTANT &&
+ tmp->src.gen3.type != SHADER_ZERO) ?
+ tmp->src.is_affine ? 2 : 3 : 0,
+ (tmp->mask.gen3.type != SHADER_NONE &&
+ tmp->mask.gen3.type != SHADER_CONSTANT) ?
+ tmp->mask.is_affine ? 2 : 3 : 0,
+ tmp->floats_per_vertex));
+
+ tmp->blt = gen3_render_composite_blt;
+ tmp->boxes = gen3_render_composite_boxes;
+ tmp->done = gen3_render_composite_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->src.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->mask.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) {
+ if (tmp->src.bo == tmp->dst.bo || tmp->mask.bo == tmp->dst.bo) {
+ kgem_emit_flush(&sna->kgem);
+ } else {
+ OUT_BATCH(_3DSTATE_MODES_5_CMD |
+ PIPELINE_FLUSH_RENDER_CACHE |
+ PIPELINE_FLUSH_TEXTURE_CACHE);
+ kgem_clear_dirty(&sna->kgem);
+ }
+ }
+
+ gen3_emit_composite_state(sna, tmp);
+ gen3_align_vertex(sna, tmp);
+
+ sna->render.op = tmp;
+ return TRUE;
+
+cleanup_mask:
+ if (tmp->mask.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
+cleanup_src:
+ if (tmp->src.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+cleanup_dst:
+ if (tmp->redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
+ return FALSE;
+}
+
+static void
+gen3_emit_composite_spans_vertex(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ int16_t x, int16_t y,
+ float opacity)
+{
+ gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
+ gen3_emit_composite_texcoord(sna, &op->base.src, x, y);
+ OUT_VERTEX(opacity);
+}
+
+static void
+gen3_emit_composite_spans_primitive_zero(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 6;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[1] = op->base.dst.y + box->y2;
+
+ v[2] = op->base.dst.x + box->x1;
+ v[3] = v[1];
+
+ v[4] = v[2];
+ v[5] = op->base.dst.x + box->y1;
+}
+
+static void
+gen3_emit_composite_spans_primitive_constant(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[1] = op->base.dst.y + box->y2;
+ v[2] = opacity;
+
+ v[3] = op->base.dst.x + box->x1;
+ v[4] = v[1];
+ v[5] = opacity;
+
+ v[6] = v[3];
+ v[7] = op->base.dst.y + box->y1;
+ v[8] = opacity;
+}
+
+static void
+gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[1] = op->base.dst.y + box->y2;
+ v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
+ v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
+ v[4] = opacity;
+
+ v[5] = op->base.dst.x + box->x1;
+ v[6] = v[1];
+ v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
+ v[8] = v[3];
+ v[9] = opacity;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + box->y1;
+ v[12] = v[7];
+ v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
+ v[14] = opacity;
+}
+
+static void
+gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ PictTransform *transform = op->base.src.transform;
+ float x, y, *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[6] = v[1] = op->base.dst.y + box->y2;
+ v[10] = v[5] = op->base.dst.x + box->x1;
+ v[11] = op->base.dst.y + box->y1;
+ v[4] = opacity;
+ v[9] = opacity;
+ v[14] = opacity;
+
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
+ (int)op->base.src.offset[1] + box->y2,
+ transform,
+ &x, &y);
+ v[2] = x * op->base.src.scale[0];
+ v[3] = y * op->base.src.scale[1];
+
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y2,
+ transform,
+ &x, &y);
+ v[7] = x * op->base.src.scale[0];
+ v[8] = y * op->base.src.scale[1];
+
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y1,
+ transform,
+ &x, &y);
+ v[12] = x * op->base.src.scale[0];
+ v[13] = y * op->base.src.scale[1];
+}
+
+static void
+gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[1] = op->base.dst.y + box->y2;
+ v[2] = op->base.src.offset[0] + box->x2;
+ v[3] = op->base.src.offset[1] + box->y2;
+ v[4] = opacity;
+
+ v[5] = op->base.dst.x + box->x1;
+ v[6] = v[1];
+ v[7] = op->base.src.offset[0] + box->x1;
+ v[8] = v[3];
+ v[9] = opacity;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + box->y1;
+ v[12] = v[7];
+ v[13] = op->base.src.offset[1] + box->y1;
+ v[14] = opacity;
+}
+
+static void
+gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ PictTransform *transform = op->base.src.transform;
+ float *v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[1] = op->base.dst.y + box->y2;
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
+ (int)op->base.src.offset[1] + box->y2,
+ transform,
+ &v[2], &v[3]);
+ v[4] = opacity;
+
+ v[5] = op->base.dst.x + box->x1;
+ v[6] = v[1];
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y2,
+ transform,
+ &v[7], &v[8]);
+ v[9] = opacity;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + box->y1;
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y1,
+ transform,
+ &v[12], &v[13]);
+ v[14] = opacity;
+}
+
+static void
+gen3_emit_composite_spans_primitive(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ gen3_emit_composite_spans_vertex(sna, op,
+ box->x2, box->y2,
+ opacity);
+ gen3_emit_composite_spans_vertex(sna, op,
+ box->x1, box->y2,
+ opacity);
+ gen3_emit_composite_spans_vertex(sna, op,
+ box->x1, box->y1,
+ opacity);
+}
+
+static void
+gen3_render_composite_spans_boxes(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box, int nbox,
+ float opacity)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ opacity,
+ op->base.dst.x, op->base.dst.y));
+
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+ if (nbox_this_time == 0) {
+ gen3_emit_composite_state(sna, &op->base);
+ nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+ }
+ nbox -= nbox_this_time;
+
+ do {
+ DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1));
+
+ op->prim_emit(sna, op, box++, opacity);
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+static void
+gen3_render_composite_spans_done(struct sna *sna,
+ const struct sna_composite_spans_op *op)
+{
+ gen3_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ sna_render_composite_redirect_done(sna, &op->base);
+ if (op->base.src.bo)
+ kgem_bo_destroy(&sna->kgem, op->base.src.bo);
+}
+
+static Bool
+gen3_render_composite_spans(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_spans_op *tmp)
+{
+ DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
+ src_x, src_y, dst_x, dst_y, width, height));
+
+#if NO_COMPOSITE_SPANS
+ return FALSE;
+#endif
+
+ if (op >= ARRAY_SIZE(gen3_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (!gen3_check_dst_format(dst->format)) {
+ DBG(("%s: fallback due to unhandled dst format: %x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ if (need_tiling(sna, width, height))
+ return FALSE;
+
+ if (!gen3_composite_set_target(sna, &tmp->base, dst)) {
+ DBG(("%s: unable to set render target\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ tmp->base.op = op;
+ tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
+ if (tmp->base.dst.width > 2048 || tmp->base.dst.height > 2048 ||
+ !gen3_check_pitch_3d(tmp->base.dst.bo)) {
+ if (!sna_render_composite_redirect(sna, &tmp->base,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ tmp->base.src.gen3.type = SHADER_TEXTURE;
+ tmp->base.src.is_affine = TRUE;
+ DBG(("%s: preparing source\n", __FUNCTION__));
+ switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_dst;
+ case 0:
+ tmp->base.src.gen3.type = SHADER_ZERO;
+ break;
+ case 1:
+ gen3_composite_channel_convert(&tmp->base.src);
+ break;
+ }
+ DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.gen3.type));
+
+ if (tmp->base.src.gen3.type != SHADER_ZERO)
+ tmp->base.mask.gen3.type = SHADER_OPACITY;
+
+ tmp->prim_emit = gen3_emit_composite_spans_primitive;
+ switch (tmp->base.src.gen3.type) {
+ case SHADER_NONE:
+ assert(0);
+ case SHADER_ZERO:
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
+ break;
+ case SHADER_CONSTANT:
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
+ break;
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ if (tmp->base.src.transform == NULL)
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
+ else if (tmp->base.src.is_affine)
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
+ break;
+ case SHADER_TEXTURE:
+ if (tmp->base.src.transform == NULL)
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
+ else if (tmp->base.src.is_affine)
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
+ break;
+ }
+
+ tmp->base.floats_per_vertex = 2;
+ if (tmp->base.src.gen3.type != SHADER_CONSTANT &&
+ tmp->base.src.gen3.type != SHADER_ZERO)
+ tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
+ tmp->base.floats_per_vertex +=
+ tmp->base.mask.gen3.type == SHADER_OPACITY;
+
+ tmp->boxes = gen3_render_composite_spans_boxes;
+ tmp->done = gen3_render_composite_spans_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->base.dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->base.src.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->base.src.bo)) {
+ if (tmp->base.src.bo == tmp->base.dst.bo) {
+ kgem_emit_flush(&sna->kgem);
+ } else {
+ OUT_BATCH(_3DSTATE_MODES_5_CMD |
+ PIPELINE_FLUSH_RENDER_CACHE |
+ PIPELINE_FLUSH_TEXTURE_CACHE);
+ kgem_clear_dirty(&sna->kgem);
+ }
+ }
+
+ gen3_emit_composite_state(sna, &tmp->base);
+ gen3_align_vertex(sna, &tmp->base);
+ return TRUE;
+
+cleanup_dst:
+ if (tmp->base.redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
+ return FALSE;
+}
+
+static void
+gen3_emit_video_state(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ PixmapPtr pixmap,
+ struct kgem_bo *dst_bo,
+ int width, int height)
+{
+ uint32_t shader_offset;
+ uint32_t ms3, s5;
+
+ /* draw rect -- just clipping */
+ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+ OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) |
+ DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3));
+ OUT_BATCH(0x00000000); /* ymin, xmin */
+ /* ymax, xmax */
+ OUT_BATCH((width - 1) | (height - 1) << 16);
+ OUT_BATCH(0x00000000); /* yorigin, xorigin */
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(5) | I1_LOAD_S(6) |
+ 3);
+ OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+ OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
+ S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
+ s5 = 0x0;
+ if (pixmap->drawable.depth < 24)
+ s5 |= S5_COLOR_DITHER_ENABLE;
+ OUT_BATCH(s5);
+ OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
+ (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
+ (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
+ S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT));
+
+ OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
+ OUT_BATCH(0x00000000);
+
+ OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+ OUT_BATCH(gen3_get_dst_format(sna_format_for_depth(pixmap->drawable.depth)));
+
+ /* front buffer, pitch, offset */
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(BUF_3D_ID_COLOR_BACK |
+ gen3_buf_tiling(dst_bo->tiling) |
+ dst_bo->pitch);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER,
+ 0));
+
+ if (!is_planar_fourcc(frame->id)) {
+ OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
+ OUT_BATCH(0x0000001); /* constant 0 */
+ /* constant 0: brightness/contrast */
+ OUT_BATCH_F(video->brightness / 128.0);
+ OUT_BATCH_F(video->contrast / 255.0);
+ OUT_BATCH_F(0.0);
+ OUT_BATCH_F(0.0);
+
+ OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
+ OUT_BATCH(0x00000001);
+ OUT_BATCH(SS2_COLORSPACE_CONVERSION |
+ (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
+ (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
+ OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCX_ADDR_MODE_SHIFT) |
+ (TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCY_ADDR_MODE_SHIFT) |
+ (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
+ SS3_NORMALIZED_COORDS);
+ OUT_BATCH(0x00000000);
+
+ OUT_BATCH(_3DSTATE_MAP_STATE | 3);
+ OUT_BATCH(0x00000001); /* texture map #1 */
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ frame->bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ frame->YBufOffset));
+
+ ms3 = MAPSURF_422;
+ switch (frame->id) {
+ case FOURCC_YUY2:
+ ms3 |= MT_422_YCRCB_NORMAL;
+ break;
+ case FOURCC_UYVY:
+ ms3 |= MT_422_YCRCB_SWAPY;
+ break;
+ }
+ ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
+ ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
+ OUT_BATCH(ms3);
+ OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
+
+ shader_offset = sna->kgem.nbatch++;
+
+ gen3_fs_dcl(FS_S0);
+ gen3_fs_dcl(FS_T0);
+ gen3_fs_texld(FS_OC, FS_S0, FS_T0);
+ if (video->brightness != 0) {
+ gen3_fs_add(FS_OC,
+ gen3_fs_operand_reg(FS_OC),
+ gen3_fs_operand(FS_C0, X, X, X, ZERO));
+ }
+ } else {
+ /* For the planar formats, we set up three samplers --
+ * one for each plane, in a Y8 format. Because I
+ * couldn't get the special PLANAR_TO_PACKED
+ * shader setup to work, I did the manual pixel shader:
+ *
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * register assignment:
+ * r0 = (y',u',v',0)
+ * r1 = (y,y,y,y)
+ * r2 = (u,u,u,u)
+ * r3 = (v,v,v,v)
+ * OC = (r,g,b,1)
+ */
+ OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
+ OUT_BATCH(0x000001f); /* constants 0-4 */
+ /* constant 0: normalization offsets */
+ OUT_BATCH_F(-0.0625);
+ OUT_BATCH_F(-0.5);
+ OUT_BATCH_F(-0.5);
+ OUT_BATCH_F(0.0);
+ /* constant 1: r coefficients */
+ OUT_BATCH_F(1.1643);
+ OUT_BATCH_F(0.0);
+ OUT_BATCH_F(1.5958);
+ OUT_BATCH_F(0.0);
+ /* constant 2: g coefficients */
+ OUT_BATCH_F(1.1643);
+ OUT_BATCH_F(-0.39173);
+ OUT_BATCH_F(-0.81290);
+ OUT_BATCH_F(0.0);
+ /* constant 3: b coefficients */
+ OUT_BATCH_F(1.1643);
+ OUT_BATCH_F(2.017);
+ OUT_BATCH_F(0.0);
+ OUT_BATCH_F(0.0);
+ /* constant 4: brightness/contrast */
+ OUT_BATCH_F(video->brightness / 128.0);
+ OUT_BATCH_F(video->contrast / 255.0);
+ OUT_BATCH_F(0.0);
+ OUT_BATCH_F(0.0);
+
+ OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
+ OUT_BATCH(0x00000007);
+ /* sampler 0 */
+ OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
+ (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
+ OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCX_ADDR_MODE_SHIFT) |
+ (TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCY_ADDR_MODE_SHIFT) |
+ (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
+ SS3_NORMALIZED_COORDS);
+ OUT_BATCH(0x00000000);
+ /* sampler 1 */
+ OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
+ (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
+ OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCX_ADDR_MODE_SHIFT) |
+ (TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCY_ADDR_MODE_SHIFT) |
+ (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
+ SS3_NORMALIZED_COORDS);
+ OUT_BATCH(0x00000000);
+ /* sampler 2 */
+ OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
+ (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
+ OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCX_ADDR_MODE_SHIFT) |
+ (TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCY_ADDR_MODE_SHIFT) |
+ (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
+ SS3_NORMALIZED_COORDS);
+ OUT_BATCH(0x00000000);
+
+ OUT_BATCH(_3DSTATE_MAP_STATE | 9);
+ OUT_BATCH(0x00000007);
+
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ frame->bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ frame->YBufOffset));
+
+ ms3 = MAPSURF_8BIT | MT_8BIT_I8;
+ ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
+ ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
+ OUT_BATCH(ms3);
+ /* check to see if Y has special pitch than normal
+ * double u/v pitch, e.g i915 XvMC hw requires at
+ * least 1K alignment, so Y pitch might
+ * be same as U/V's.*/
+ if (frame->pitch[1])
+ OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT);
+ else
+ OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT);
+
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ frame->bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ frame->UBufOffset));
+
+ ms3 = MAPSURF_8BIT | MT_8BIT_I8;
+ ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
+ ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
+ OUT_BATCH(ms3);
+ OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
+
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ frame->bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ frame->VBufOffset));
+
+ ms3 = MAPSURF_8BIT | MT_8BIT_I8;
+ ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
+ ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
+ OUT_BATCH(ms3);
+ OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
+
+ shader_offset = sna->kgem.nbatch++;
+
+ /* Declare samplers */
+ gen3_fs_dcl(FS_S0); /* Y */
+ gen3_fs_dcl(FS_S1); /* U */
+ gen3_fs_dcl(FS_S2); /* V */
+ gen3_fs_dcl(FS_T0); /* normalized coords */
+
+ /* Load samplers to temporaries. */
+ gen3_fs_texld(FS_R1, FS_S0, FS_T0);
+ gen3_fs_texld(FS_R2, FS_S1, FS_T0);
+ gen3_fs_texld(FS_R3, FS_S2, FS_T0);
+
+ /* Move the sampled YUV data in R[123] to the first
+ * 3 channels of R0.
+ */
+ gen3_fs_mov_masked(FS_R0, MASK_X,
+ gen3_fs_operand_reg(FS_R1));
+ gen3_fs_mov_masked(FS_R0, MASK_Y,
+ gen3_fs_operand_reg(FS_R2));
+ gen3_fs_mov_masked(FS_R0, MASK_Z,
+ gen3_fs_operand_reg(FS_R3));
+
+ /* Normalize the YUV data */
+ gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C0));
+ /* dot-product the YUV data in R0 by the vectors of
+ * coefficients for calculating R, G, and B, storing
+ * the results in the R, G, or B channels of the output
+ * color. The OC results are implicitly clamped
+ * at the end of the program.
+ */
+ gen3_fs_dp3(FS_OC, MASK_X,
+ gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C1));
+ gen3_fs_dp3(FS_OC, MASK_Y,
+ gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C2));
+ gen3_fs_dp3(FS_OC, MASK_Z,
+ gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C3));
+ /* Set alpha of the output to 1.0, by wiring W to 1
+ * and not actually using the source.
+ */
+ gen3_fs_mov_masked(FS_OC, MASK_W,
+ gen3_fs_operand_one());
+
+ if (video->brightness != 0) {
+ gen3_fs_add(FS_OC,
+ gen3_fs_operand_reg(FS_OC),
+ gen3_fs_operand(FS_C4, X, X, X, ZERO));
+ }
+ }
+
+ sna->kgem.batch[shader_offset] =
+ _3DSTATE_PIXEL_SHADER_PROGRAM |
+ (sna->kgem.nbatch - shader_offset - 2);
+
+ /* video is the last operation in the batch, so state gets reset
+ * afterwards automatically
+ * gen3_reset();
+ */
+}
+
+static void
+gen3_video_get_batch(struct sna *sna)
+{
+ if (!kgem_check_batch(&sna->kgem, 120)) {
+ DBG(("%s: flushing batch: nbatch %d < %d\n",
+ __FUNCTION__,
+ batch_space(sna), 120));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nreloc + 4 > KGEM_RELOC_SIZE(&sna->kgem)) {
+ DBG(("%s: flushing batch: reloc %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nreloc + 4,
+ (int)KGEM_RELOC_SIZE(&sna->kgem)));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nexec + 2 > KGEM_EXEC_SIZE(&sna->kgem)) {
+ DBG(("%s: flushing batch: exec %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nexec + 2,
+ (int)KGEM_EXEC_SIZE(&sna->kgem)));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen3.need_invariant)
+ gen3_emit_invariant(sna);
+}
+
+static int
+gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
+{
+ int size = floats_per_vertex * 3;
+ int rem = batch_space(sna) - 1;
+
+ if (size * want > rem)
+ want = rem / size;
+
+ return want;
+}
+
+static Bool
+gen3_render_video(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ RegionPtr dstRegion,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ PixmapPtr pixmap)
+{
+ BoxPtr pbox = REGION_RECTS(dstRegion);
+ int nbox = REGION_NUM_RECTS(dstRegion);
+ int dxo = dstRegion->extents.x1;
+ int dyo = dstRegion->extents.y1;
+ int width = dstRegion->extents.x2 - dxo;
+ int height = dstRegion->extents.y2 - dyo;
+ float src_scale_x, src_scale_y;
+ int pix_xoff, pix_yoff;
+ struct kgem_bo *dst_bo;
+ int copy = 0;
+
+ DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h));
+
+ if (pixmap->drawable.width > 2048 ||
+ pixmap->drawable.height > 2048 ||
+ !gen3_check_pitch_3d(sna_pixmap_get_bo(pixmap))) {
+ int bpp = pixmap->drawable.bitsPerPixel;
+
+ dst_bo = kgem_create_2d(&sna->kgem,
+ width, height, bpp,
+ kgem_choose_tiling(&sna->kgem,
+ I915_TILING_X,
+ width, height, bpp),
+ 0);
+ if (!dst_bo)
+ return FALSE;
+
+ pix_xoff = -dxo;
+ pix_yoff = -dyo;
+ copy = 1;
+ } else {
+ dst_bo = sna_pixmap_get_bo(pixmap);
+
+ width = pixmap->drawable.width;
+ height = pixmap->drawable.height;
+
+ /* Set up the offset for translating from the given region
+ * (in screen coordinates) to the backing pixmap.
+ */
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+ }
+
+ src_scale_x = ((float)src_w / frame->width) / drw_w;
+ src_scale_y = ((float)src_h / frame->height) / drw_h;
+
+ DBG(("%s: src offset=(%d, %d), scale=(%f, %f), dst offset=(%d, %d)\n",
+ __FUNCTION__,
+ dxo, dyo, src_scale_x, src_scale_y, pix_xoff, pix_yoff));
+
+ gen3_video_get_batch(sna);
+ gen3_emit_video_state(sna, video, frame, pixmap,
+ dst_bo, width, height);
+ do {
+ int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
+ if (nbox_this_time == 0) {
+ gen3_video_get_batch(sna);
+ gen3_emit_video_state(sna, video, frame, pixmap,
+ dst_bo, width, height);
+ nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
+ }
+ nbox -= nbox_this_time;
+
+ OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
+ while (nbox_this_time--) {
+ int box_x1 = pbox->x1;
+ int box_y1 = pbox->y1;
+ int box_x2 = pbox->x2;
+ int box_y2 = pbox->y2;
+
+ pbox++;
+
+ DBG(("%s: box (%d, %d), (%d, %d)\n",
+ __FUNCTION__, box_x1, box_y1, box_x2, box_y2));
+
+ /* bottom right */
+ OUT_BATCH_F(box_x2 + pix_xoff);
+ OUT_BATCH_F(box_y2 + pix_yoff);
+ OUT_BATCH_F((box_x2 - dxo) * src_scale_x);
+ OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
+
+ /* bottom left */
+ OUT_BATCH_F(box_x1 + pix_xoff);
+ OUT_BATCH_F(box_y2 + pix_yoff);
+ OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
+ OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
+
+ /* top left */
+ OUT_BATCH_F(box_x1 + pix_xoff);
+ OUT_BATCH_F(box_y1 + pix_yoff);
+ OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
+ OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
+ }
+ } while (nbox);
+
+ if (copy) {
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+ sna_blt_copy_boxes(sna, GXcopy,
+ dst_bo, -dxo, -dyo,
+ sna_pixmap_get_bo(pixmap), pix_xoff, pix_yoff,
+ pixmap->drawable.bitsPerPixel,
+ REGION_RECTS(dstRegion),
+ REGION_NUM_RECTS(dstRegion));
+
+ kgem_bo_destroy(&sna->kgem, dst_bo);
+ }
+
+ return TRUE;
+}
+
+static void
+gen3_render_copy_setup_source(struct sna *sna,
+ struct sna_composite_channel *channel,
+ PixmapPtr pixmap,
+ struct kgem_bo *bo)
+{
+ channel->gen3.type = SHADER_TEXTURE;
+ channel->filter = gen3_filter(PictFilterNearest);
+ channel->repeat = gen3_texture_repeat(RepeatNone);
+ channel->width = pixmap->drawable.width;
+ channel->height = pixmap->drawable.height;
+ channel->scale[0] = 1./pixmap->drawable.width;
+ channel->scale[1] = 1./pixmap->drawable.height;
+ channel->offset[0] = 0;
+ channel->offset[1] = 0;
+ gen3_composite_channel_set_format(channel,
+ sna_format_for_depth(pixmap->drawable.depth));
+ channel->bo = bo;
+ channel->is_affine = 1;
+}
+
+static Bool
+gen3_render_copy_boxes(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+
+#if NO_COPY_BOXES
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+#endif
+
+ DBG(("%s (%d, %d)->(%d, %d) x %d\n",
+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
+
+ if (sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) ||
+ src_bo == dst_bo || /* XXX handle overlap using 3D ? */
+ src_bo->pitch > 8192 ||
+ src->drawable.width > 2048 ||
+ src->drawable.height > 2048 ||
+ dst_bo->pitch > 8192 ||
+ dst->drawable.width > 2048 ||
+ dst->drawable.height > 2048)
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp.dst.bo = dst_bo;
+
+ gen3_render_copy_setup_source(sna, &tmp.src, src, src_bo);
+
+ tmp.floats_per_vertex = 4;
+ tmp.mask.gen3.type = SHADER_NONE;
+
+ gen3_emit_composite_state(sna, &tmp);
+ gen3_align_vertex(sna, &tmp);
+
+ do {
+ int n_this_time;
+
+ n_this_time = gen3_get_rectangles(sna, &tmp, n);
+ if (n_this_time == 0) {
+ gen3_emit_composite_state(sna, &tmp);
+ n_this_time = gen3_get_rectangles(sna, &tmp, n);
+ }
+ n -= n_this_time;
+
+ do {
+ DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1));
+ OUT_VERTEX(box->x2 + dst_dx);
+ OUT_VERTEX(box->y2 + dst_dy);
+ OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
+
+ OUT_VERTEX(box->x1 + dst_dx);
+ OUT_VERTEX(box->y2 + dst_dy);
+ OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
+
+ OUT_VERTEX(box->x1 + dst_dx);
+ OUT_VERTEX(box->y1 + dst_dy);
+ OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
+
+ box++;
+ } while (--n_this_time);
+ } while (n);
+
+ gen3_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen3_render_copy_blt(struct sna *sna,
+ const struct sna_copy_op *op,
+ int16_t sx, int16_t sy,
+ int16_t w, int16_t h,
+ int16_t dx, int16_t dy)
+{
+ if (!gen3_get_rectangles(sna, &op->base, 1)) {
+ gen3_emit_composite_state(sna, &op->base);
+ gen3_get_rectangles(sna, &op->base, 1);
+ }
+
+ OUT_VERTEX(dx+w);
+ OUT_VERTEX(dy+h);
+ OUT_VERTEX((sx+w)*op->base.src.scale[0]);
+ OUT_VERTEX((sy+h)*op->base.src.scale[1]);
+
+ OUT_VERTEX(dx);
+ OUT_VERTEX(dy+h);
+ OUT_VERTEX(sx*op->base.src.scale[0]);
+ OUT_VERTEX((sy+h)*op->base.src.scale[1]);
+
+ OUT_VERTEX(dx);
+ OUT_VERTEX(dy);
+ OUT_VERTEX(sx*op->base.src.scale[0]);
+ OUT_VERTEX(sy*op->base.src.scale[1]);
+}
+
+static void
+gen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
+{
+ gen3_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+}
+
+static Bool
+gen3_render_copy(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ struct sna_copy_op *tmp)
+{
+#if NO_COPY
+ return sna_blt_copy(sna, alu,
+ src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op);
+#endif
+
+ /* Prefer to use the BLT */
+ if (sna->kgem.mode == KGEM_BLT &&
+ src->drawable.bitsPerPixel == dst->drawable.bitsPerPixel &&
+ sna_blt_copy(sna, alu,
+ src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ tmp))
+ return TRUE;
+
+ /* Must use the BLT if we can't RENDER... */
+ if (!(alu == GXcopy || alu == GXclear) ||
+ src->drawable.width > 2048 || src->drawable.height > 2048 ||
+ dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
+ src_bo->pitch > 8192 || dst_bo->pitch > 8192) {
+ if (src->drawable.bitsPerPixel != dst->drawable.bitsPerPixel)
+ return FALSE;
+
+ return sna_blt_copy(sna, alu, src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ tmp);
+ }
+
+ tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ tmp->base.dst.pixmap = dst;
+ tmp->base.dst.width = dst->drawable.width;
+ tmp->base.dst.height = dst->drawable.height;
+ tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp->base.dst.bo = dst_bo;
+
+ gen3_render_copy_setup_source(sna, &tmp->base.src, src, src_bo);
+
+ tmp->base.floats_per_vertex = 4;
+ tmp->base.mask.gen3.type = SHADER_NONE;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ tmp->blt = gen3_render_copy_blt;
+ tmp->done = gen3_render_copy_done;
+
+ gen3_emit_composite_state(sna, &tmp->base);
+ gen3_align_vertex(sna, &tmp->base);
+ return TRUE;
+}
+
+static Bool
+gen3_render_fill_boxes_try_blt(struct sna *sna,
+ CARD8 op, PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n)
+{
+ uint8_t alu = GXcopy;
+ uint32_t pixel;
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ format))
+ return FALSE;
+
+ if (op == PictOpClear) {
+ alu = GXclear;
+ pixel = 0;
+ op = PictOpSrc;
+ }
+
+ if (op == PictOpOver) {
+ if ((pixel & 0xff000000) == 0xff000000)
+ op = PictOpSrc;
+ }
+
+ if (op != PictOpSrc)
+ return FALSE;
+
+ return sna_blt_fill_boxes(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ pixel, box, n);
+}
+
+static Bool
+gen3_render_fill_boxes(struct sna *sna,
+ CARD8 op,
+ PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+ uint32_t pixel;
+
+#if NO_FILL_BOXES
+ return gen3_render_fill_boxes_try_blt(sna, op, format, color,
+ dst, dst_bo,
+ box, n);
+#endif
+
+ DBG(("%s (op=%d, color=(%04x,%04x,%04x, %04x))\n",
+ __FUNCTION__, op,
+ color->red, color->green, color->blue, color->alpha));
+
+ if (op >= ARRAY_SIZE(gen3_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (dst->drawable.width > 2048 ||
+ dst->drawable.height > 2048 ||
+ dst_bo->pitch > 8192)
+ return gen3_render_fill_boxes_try_blt(sna, op, format, color,
+ dst, dst_bo,
+ box, n);
+
+ if (gen3_render_fill_boxes_try_blt(sna, op, format, color,
+ dst, dst_bo,
+ box, n))
+ return TRUE;
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ PICT_a8r8g8b8))
+ return FALSE;
+
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.op = op;
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = format;
+ tmp.dst.bo = dst_bo;
+ tmp.floats_per_vertex = 2;
+
+ tmp.src.gen3.type = SHADER_CONSTANT;
+ tmp.src.gen3.mode = pixel;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen3_emit_composite_state(sna, &tmp);
+ gen3_align_vertex(sna, &tmp);
+
+ do {
+ int n_this_time = gen3_get_rectangles(sna, &tmp, n);
+ if (n_this_time == 0) {
+ gen3_emit_composite_state(sna, &tmp);
+ n_this_time = gen3_get_rectangles(sna, &tmp, n);
+ }
+ n -= n_this_time;
+
+ do {
+ DBG((" (%d, %d), (%d, %d)\n",
+ box->x1, box->y1, box->x2, box->y2));
+ OUT_VERTEX(box->x2);
+ OUT_VERTEX(box->y2);
+ OUT_VERTEX(box->x1);
+ OUT_VERTEX(box->y2);
+ OUT_VERTEX(box->x1);
+ OUT_VERTEX(box->y1);
+ box++;
+ } while (--n_this_time);
+ } while (n);
+
+ gen3_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen3_render_fill_blt(struct sna *sna,
+ const struct sna_fill_op *op,
+ int16_t x, int16_t y, int16_t w, int16_t h)
+{
+ if (!gen3_get_rectangles(sna, &op->base, 1)) {
+ gen3_emit_composite_state(sna, &op->base);
+ gen3_get_rectangles(sna, &op->base, 1);
+ }
+
+ OUT_VERTEX(x+w);
+ OUT_VERTEX(y+h);
+ OUT_VERTEX(x);
+ OUT_VERTEX(y+h);
+ OUT_VERTEX(x);
+ OUT_VERTEX(y);
+}
+
+static void
+gen3_render_fill_done(struct sna *sna, const struct sna_fill_op *op)
+{
+ gen3_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+}
+
+static Bool
+gen3_render_fill(struct sna *sna, uint8_t alu,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint32_t color,
+ struct sna_fill_op *tmp)
+{
+#if NO_FILL
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op);
+#endif
+
+ /* Prefer to use the BLT if already engaged */
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ tmp))
+ return TRUE;
+
+ /* Must use the BLT if we can't RENDER... */
+ if (!(alu == GXcopy || alu == GXclear) ||
+ dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
+ dst_bo->pitch > 8192)
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ tmp);
+
+ if (alu == GXclear)
+ color = 0;
+
+ tmp->base.op = color == 0 ? PictOpClear : PictOpSrc;
+ tmp->base.dst.pixmap = dst;
+ tmp->base.dst.width = dst->drawable.width;
+ tmp->base.dst.height = dst->drawable.height;
+ tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp->base.dst.bo = dst_bo;
+ tmp->base.floats_per_vertex = 2;
+
+ tmp->base.src.gen3.type = SHADER_CONSTANT;
+ tmp->base.src.gen3.mode =
+ sna_rgba_for_color(color, dst->drawable.depth);
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ tmp->blt = gen3_render_fill_blt;
+ tmp->done = gen3_render_fill_done;
+
+ gen3_emit_composite_state(sna, &tmp->base);
+ gen3_align_vertex(sna, &tmp->base);
+ return TRUE;
+}
+
+static void gen3_render_flush(struct sna *sna)
+{
+ gen3_vertex_finish(sna, TRUE);
+}
+
+static void
+gen3_render_context_switch(struct sna *sna,
+ int new_mode)
+{
+}
+
+static void
+gen3_render_fini(struct sna *sna)
+{
+}
+
+Bool gen3_render_init(struct sna *sna)
+{
+ struct sna_render *render = &sna->render;
+
+ gen3_render_reset(sna);
+
+ render->composite = gen3_render_composite;
+ render->composite_spans = gen3_render_composite_spans;
+
+ render->video = gen3_render_video;
+
+ render->copy_boxes = gen3_render_copy_boxes;
+ render->copy = gen3_render_copy;
+
+ render->fill_boxes = gen3_render_fill_boxes;
+ render->fill = gen3_render_fill;
+
+ render->reset = gen3_render_reset;
+ render->flush = gen3_render_flush;
+ render->context_switch = gen3_render_context_switch;
+ render->fini = gen3_render_fini;
+
+ render->max_3d_size = 2048;
+ return TRUE;
+}
diff --git a/src/sna/gen3_render.h b/src/sna/gen3_render.h
new file mode 100644
index 0000000..3272d5c
--- a/dev/null
+++ b/src/sna/gen3_render.h
@@ -0,0 +1,1479 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _I915_REG_H_
+#define _I915_REG_H_
+
+#define CMD_3D (3 << 29)
+
+#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+
+#define PRIM3D (CMD_3D | (0x1f<<24))
+#define PRIM3D_INDIRECT_SEQUENTIAL ((1<<23) | (0<<17))
+#define PRIM3D_TRILIST (PRIM3D | (0x0<<18))
+#define PRIM3D_TRISTRIP (PRIM3D | (0x1<<18))
+#define PRIM3D_TRISTRIP_RVRSE (PRIM3D | (0x2<<18))
+#define PRIM3D_TRIFAN (PRIM3D | (0x3<<18))
+#define PRIM3D_POLY (PRIM3D | (0x4<<18))
+#define PRIM3D_LINELIST (PRIM3D | (0x5<<18))
+#define PRIM3D_LINESTRIP (PRIM3D | (0x6<<18))
+#define PRIM3D_RECTLIST (PRIM3D | (0x7<<18))
+#define PRIM3D_POINTLIST (PRIM3D | (0x8<<18))
+#define PRIM3D_DIB (PRIM3D | (0x9<<18))
+#define PRIM3D_CLEAR_RECT (PRIM3D | (0xa<<18))
+#define PRIM3D_ZONE_INIT (PRIM3D | (0xd<<18))
+#define PRIM3D_MASK (0x1f<<18)
+
+
+/* p137 */
+#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5 0
+#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE (1<<8)
+#define AA_LINE_REGION_WIDTH_0_5 0
+#define AA_LINE_REGION_WIDTH_1_0 (1<<6)
+#define AA_LINE_REGION_WIDTH_2_0 (2<<6)
+#define AA_LINE_REGION_WIDTH_4_0 (3<<6)
+
+/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/
+#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24))
+#define BFO_ENABLE_STENCIL_REF (1<<23)
+#define BFO_STENCIL_REF_SHIFT 15
+#define BFO_STENCIL_REF_MASK (0xff<<15)
+#define BFO_ENABLE_STENCIL_FUNCS (1<<14)
+#define BFO_STENCIL_TEST_SHIFT 11
+#define BFO_STENCIL_TEST_MASK (0x7<<11)
+#define BFO_STENCIL_FAIL_SHIFT 8
+#define BFO_STENCIL_FAIL_MASK (0x7<<8)
+#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5
+#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5)
+#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2
+#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2)
+#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1)
+#define BFO_STENCIL_TWO_SIDE (1<<0)
+
+/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */
+#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24))
+#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17)
+#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16)
+#define BFM_STENCIL_TEST_MASK_SHIFT 8
+#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8)
+#define BFM_STENCIL_WRITE_MASK_SHIFT 0
+#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0)
+
+/* 3DSTATE_BIN_CONTROL p141 */
+
+/* p143 */
+#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK (0x3<<24)
+#define BUF_3D_ID_DEPTH (0x7<<24)
+#define BUF_3D_USE_FENCE (1<<23)
+#define BUF_3D_TILED_SURFACE (1<<22)
+#define BUF_3D_TILE_WALK_X 0
+#define BUF_3D_TILE_WALK_Y (1<<21)
+/* Dword 2 */
+#define BUF_3D_ADDR(x) ((x) & ~0x3)
+
+/* 3DSTATE_CHROMA_KEY */
+
+/* 3DSTATE_CLEAR_PARAMETERS, p150 */
+#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5)
+/* Dword 1 */
+#define CLEARPARAM_CLEAR_RECT (1 << 16)
+#define CLEARPARAM_ZONE_INIT (0 << 16)
+#define CLEARPARAM_WRITE_COLOR (1 << 2)
+#define CLEARPARAM_WRITE_DEPTH (1 << 1)
+#define CLEARPARAM_WRITE_STENCIL (1 << 0)
+
+/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */
+#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16))
+
+/* 3DSTATE_COORD_SET_BINDINGS, p154 */
+#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24))
+#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3))
+
+/* p156 */
+#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16))
+
+/* p157 */
+#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16))
+
+/* p158 */
+#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16))
+
+/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */
+#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16))
+/* scale in dword 1 */
+
+/* The depth subrectangle is not supported, but must be disabled. */
+/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
+#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | (1 << 1) | (0 << 0))
+
+/* p161 */
+#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define TEX_DEFAULT_COLOR_OGL (0<<30)
+#define TEX_DEFAULT_COLOR_D3D (1<<30)
+#define ZR_EARLY_DEPTH (1<<29)
+#define LOD_PRECLAMP_OGL (1<<28)
+#define LOD_PRECLAMP_D3D (0<<28)
+#define DITHER_FULL_ALWAYS (0<<26)
+#define DITHER_FULL_ON_FB_BLEND (1<<26)
+#define DITHER_CLAMPED_ALWAYS (2<<26)
+#define LINEAR_GAMMA_BLEND_32BPP (1<<25)
+#define DEBUG_DISABLE_ENH_DITHER (1<<24)
+#define DSTORG_HORT_BIAS(x) ((x)<<20)
+#define DSTORG_VERT_BIAS(x) ((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL 0
+#define COLOR_4_2_2_CHNL_WRT_Y (1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR (2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB (3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12)
+#define COLR_BUF_8BIT 0
+#define COLR_BUF_RGB555 (1<<8)
+#define COLR_BUF_RGB565 (2<<8)
+#define COLR_BUF_ARGB8888 (3<<8)
+#define COLR_BUF_ARGB4444 (8<<8)
+#define COLR_BUF_ARGB1555 (9<<8)
+#define COLR_BUF_ARGB2AAA (0xa<<8)
+#define DEPTH_IS_Z 0
+#define DEPTH_IS_W (1<<6)
+#define DEPTH_FRMT_16_FIXED 0
+#define DEPTH_FRMT_16_FLOAT (1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2)
+#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2)
+#define VERT_LINE_STRIDE_1 (1<<1)
+#define VERT_LINE_STRIDE_0 0
+#define VERT_LINE_STRIDE_OFS_1 1
+#define VERT_LINE_STRIDE_OFS_0 0
+
+/* p166 */
+#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS (1<<30)
+#define DRAW_DITHER_OFS_X(x) ((x)<<26)
+#define DRAW_DITHER_OFS_Y(x) ((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x) ((uint16_t)(x)<<16)
+#define DRAW_XMIN(x) ((uint16_t)(x))
+/* Dword 3 */
+#define DRAW_YMAX(x) ((uint16_t)(x)<<16)
+#define DRAW_XMAX(x) ((uint16_t)(x))
+/* Dword 4 */
+#define DRAW_YORG(x) ((uint16_t)(x)<<16)
+#define DRAW_XORG(x) ((uint16_t)(x))
+
+/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */
+
+/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */
+
+/* _3DSTATE_FOG_COLOR, p173 */
+#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x) ((x)<<16)
+#define FOG_COLOR_GREEN(x) ((x)<<8)
+#define FOG_COLOR_BLUE(x) (x)
+
+/* _3DSTATE_FOG_MODE, p174 */
+#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31)
+#define FMC1_FOGFUNC_VERTEX (0<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP (1<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28)
+#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28)
+#define FMC1_FOGFUNC_MASK (3<<28)
+#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27)
+#define FMC1_FOGINDEX_Z (0<<25)
+#define FMC1_FOGINDEX_W (1<<25)
+#define FMC1_C1_C2_MODIFY_ENABLE (1<<24)
+#define FMC1_DENSITY_MODIFY_ENABLE (1<<23)
+#define FMC1_C1_ONE (1<<13)
+#define FMC1_C1_MASK (0xffff<<4)
+/* Dword 2 */
+#define FMC2_C2_ONE (1<<16)
+/* Dword 3 */
+#define FMC3_D_ONE (1<<16)
+
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */
+#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24))
+#define IAB_MODIFY_ENABLE (1<<23)
+#define IAB_ENABLE (1<<22)
+#define IAB_MODIFY_FUNC (1<<21)
+#define IAB_FUNC_SHIFT 16
+#define IAB_MODIFY_SRC_FACTOR (1<<11)
+#define IAB_SRC_FACTOR_SHIFT 6
+#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6)
+#define IAB_MODIFY_DST_FACTOR (1<<5)
+#define IAB_DST_FACTOR_SHIFT 0
+#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0)
+
+#define BLENDFACT_ZERO 0x01
+#define BLENDFACT_ONE 0x02
+#define BLENDFACT_SRC_COLR 0x03
+#define BLENDFACT_INV_SRC_COLR 0x04
+#define BLENDFACT_SRC_ALPHA 0x05
+#define BLENDFACT_INV_SRC_ALPHA 0x06
+#define BLENDFACT_DST_ALPHA 0x07
+#define BLENDFACT_INV_DST_ALPHA 0x08
+#define BLENDFACT_DST_COLR 0x09
+#define BLENDFACT_INV_DST_COLR 0x0a
+#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b
+#define BLENDFACT_CONST_COLOR 0x0c
+#define BLENDFACT_INV_CONST_COLOR 0x0d
+#define BLENDFACT_CONST_ALPHA 0x0e
+#define BLENDFACT_INV_CONST_ALPHA 0x0f
+#define BLENDFACT_MASK 0x0f
+
+#define BLENDFUNC_ADD 0x0
+#define BLENDFUNC_SUBTRACT 0x1
+#define BLENDFUNC_REVERSE_SUBTRACT 0x2
+#define BLENDFUNC_MIN 0x3
+#define BLENDFUNC_MAX 0x4
+#define BLENDFUNC_MASK 0x7
+
+/* 3DSTATE_LOAD_INDIRECT, p180 */
+
+#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16))
+#define LI0_STATE_STATIC_INDIRECT (0x01<<8)
+#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8)
+#define LI0_STATE_SAMPLER (0x04<<8)
+#define LI0_STATE_MAP (0x08<<8)
+#define LI0_STATE_PROGRAM (0x10<<8)
+#define LI0_STATE_CONSTANTS (0x20<<8)
+
+#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define SIS0_FORCE_LOAD (1<<1)
+#define SIS0_BUFFER_VALID (1<<0)
+#define SIS1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define DIS0_BUFFER_RESET (1<<1)
+#define DIS0_BUFFER_VALID (1<<0)
+
+#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define SSB0_FORCE_LOAD (1<<1)
+#define SSB0_BUFFER_VALID (1<<0)
+#define SSB1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define MSB0_FORCE_LOAD (1<<1)
+#define MSB0_BUFFER_VALID (1<<0)
+#define MSB1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define PSP0_FORCE_LOAD (1<<1)
+#define PSP0_BUFFER_VALID (1<<0)
+#define PSP1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define PSC0_FORCE_LOAD (1<<1)
+#define PSC0_BUFFER_VALID (1<<0)
+#define PSC1_BUFFER_LENGTH(x) ((x)&0xff)
+
+/* _3DSTATE_RASTERIZATION_RULES */
+#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE (1<<15)
+#define OGL_POINT_RASTER_RULE (1<<13)
+#define ENABLE_TEXKILL_3D_4D (1<<10)
+#define TEXKILL_3D (0<<9)
+#define TEXKILL_4D (1<<9)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
+#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
+
+/* _3DSTATE_SCISSOR_ENABLE, p256 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT ((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT (1<<1)
+
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x) ((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x) (x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x) ((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x) (x)
+
+/* p189 */
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16))
+#define I1_LOAD_S(n) (1<<(4+n))
+
+#define S0_VB_OFFSET_MASK 0xffffffc
+#define S0_AUTO_CACHE_INV_DISABLE (1<<0)
+
+#define S1_VERTEX_WIDTH_SHIFT 24
+#define S1_VERTEX_WIDTH_MASK (0x3f<<24)
+#define S1_VERTEX_PITCH_SHIFT 16
+#define S1_VERTEX_PITCH_MASK (0x3f<<16)
+
+#define TEXCOORDFMT_2D 0x0
+#define TEXCOORDFMT_3D 0x1
+#define TEXCOORDFMT_4D 0x2
+#define TEXCOORDFMT_1D 0x3
+#define TEXCOORDFMT_2D_16 0x4
+#define TEXCOORDFMT_4D_16 0x5
+#define TEXCOORDFMT_NOT_PRESENT 0xf
+#define S2_TEXCOORD_FMT0_MASK 0xf
+#define S2_TEXCOORD_FMT1_SHIFT 4
+#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4))
+#define S2_TEXCOORD_NONE (~0)
+
+#define TEXCOORD_WRAP_SHORTEST_TCX 8
+#define TEXCOORD_WRAP_SHORTEST_TCY 4
+#define TEXCOORD_WRAP_SHORTEST_TCZ 2
+#define TEXCOORD_PERSPECTIVE_DISABLE 1
+
+#define S3_WRAP_SHORTEST_TCX(unit) (TEXCOORD_WRAP_SHORTEST_TCX << ((unit) * 4))
+#define S3_WRAP_SHORTEST_TCY(unit) (TEXCOORD_WRAP_SHORTEST_TCY << ((unit) * 4))
+#define S3_WRAP_SHORTEST_TCZ(unit) (TEXCOORD_WRAP_SHORTEST_TCZ << ((unit) * 4))
+#define S3_PERSPECTIVE_DISABLE(unit) (TEXCOORD_PERSPECTIVE_DISABLE << ((unit) * 4))
+
+/* S3 not interesting */
+
+#define S4_POINT_WIDTH_SHIFT 23
+#define S4_POINT_WIDTH_MASK (0x1ff<<23)
+#define S4_LINE_WIDTH_SHIFT 19
+#define S4_LINE_WIDTH_ONE (0x2<<19)
+#define S4_LINE_WIDTH_MASK (0xf<<19)
+#define S4_FLATSHADE_ALPHA (1<<18)
+#define S4_FLATSHADE_FOG (1<<17)
+#define S4_FLATSHADE_SPECULAR (1<<16)
+#define S4_FLATSHADE_COLOR (1<<15)
+#define S4_CULLMODE_BOTH (0<<13)
+#define S4_CULLMODE_NONE (1<<13)
+#define S4_CULLMODE_CW (2<<13)
+#define S4_CULLMODE_CCW (3<<13)
+#define S4_CULLMODE_MASK (3<<13)
+#define S4_VFMT_POINT_WIDTH (1<<12)
+#define S4_VFMT_SPEC_FOG (1<<11)
+#define S4_VFMT_COLOR (1<<10)
+#define S4_VFMT_DEPTH_OFFSET (1<<9)
+#define S4_VFMT_XYZ (1<<6)
+#define S4_VFMT_XYZW (2<<6)
+#define S4_VFMT_XY (3<<6)
+#define S4_VFMT_XYW (4<<6)
+#define S4_VFMT_XYZW_MASK (7<<6)
+#define S4_FORCE_DEFAULT_DIFFUSE (1<<5)
+#define S4_FORCE_DEFAULT_SPECULAR (1<<4)
+#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3)
+#define S4_VFMT_FOG_PARAM (1<<2)
+#define S4_SPRITE_POINT_ENABLE (1<<1)
+#define S4_LINE_ANTIALIAS_ENABLE (1<<0)
+
+#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \
+ S4_VFMT_SPEC_FOG | \
+ S4_VFMT_COLOR | \
+ S4_VFMT_DEPTH_OFFSET | \
+ S4_VFMT_XYZW_MASK | \
+ S4_VFMT_FOG_PARAM)
+
+#define S5_WRITEDISABLE_ALPHA (1<<31)
+#define S5_WRITEDISABLE_RED (1<<30)
+#define S5_WRITEDISABLE_GREEN (1<<29)
+#define S5_WRITEDISABLE_BLUE (1<<28)
+#define S5_WRITEDISABLE_MASK (0xf<<28)
+#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27)
+#define S5_LAST_PIXEL_ENABLE (1<<26)
+#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25)
+#define S5_FOG_ENABLE (1<<24)
+#define S5_STENCIL_REF_SHIFT 16
+#define S5_STENCIL_REF_MASK (0xff<<16)
+#define S5_STENCIL_TEST_FUNC_SHIFT 13
+#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13)
+#define S5_STENCIL_FAIL_SHIFT 10
+#define S5_STENCIL_FAIL_MASK (0x7<<10)
+#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7
+#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7)
+#define S5_STENCIL_PASS_Z_PASS_SHIFT 4
+#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4)
+#define S5_STENCIL_WRITE_ENABLE (1<<3)
+#define S5_STENCIL_TEST_ENABLE (1<<2)
+#define S5_COLOR_DITHER_ENABLE (1<<1)
+#define S5_LOGICOP_ENABLE (1<<0)
+
+#define S6_ALPHA_TEST_ENABLE (1<<31)
+#define S6_ALPHA_TEST_FUNC_SHIFT 28
+#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28)
+#define S6_ALPHA_REF_SHIFT 20
+#define S6_ALPHA_REF_MASK (0xff<<20)
+#define S6_DEPTH_TEST_ENABLE (1<<19)
+#define S6_DEPTH_TEST_FUNC_SHIFT 16
+#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16)
+#define S6_CBUF_BLEND_ENABLE (1<<15)
+#define S6_CBUF_BLEND_FUNC_SHIFT 12
+#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12)
+#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8
+#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8)
+#define S6_CBUF_DST_BLEND_FACT_SHIFT 4
+#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4)
+#define S6_DEPTH_WRITE_ENABLE (1<<3)
+#define S6_COLOR_WRITE_ENABLE (1<<2)
+#define S6_TRISTRIP_PV_SHIFT 0
+#define S6_TRISTRIP_PV_MASK (0x3<<0)
+
+#define S7_DEPTH_OFFSET_CONST_MASK ~0
+
+/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */
+/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
+
+/* _3DSTATE_MODES_4, p218 */
+#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24))
+#define ENABLE_LOGIC_OP_FUNC (1<<23)
+#define LOGIC_OP_FUNC(x) ((x)<<18)
+#define LOGICOP_MASK (0xf<<18)
+#define LOGICOP_COPY 0xc
+#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK (1<<17)
+#define STENCIL_TEST_MASK(x) ((x)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK (1<<16)
+#define STENCIL_WRITE_MASK(x) ((x)&0xff)
+
+/* _3DSTATE_MODES_5, p220 */
+#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24))
+#define PIPELINE_FLUSH_RENDER_CACHE (1<<18)
+#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16)
+
+/* p221 */
+#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16))
+#define PS1_REG(n) (1<<(n))
+#define PS2_CONST_X(n) (n)
+#define PS3_CONST_Y(n) (n)
+#define PS4_CONST_Z(n) (n)
+#define PS5_CONST_W(n) (n)
+
+/* p222 */
+
+#define I915_MAX_TEX_INDIRECT 4
+#define I915_MAX_TEX_INSN 32
+#define I915_MAX_ALU_INSN 64
+#define I915_MAX_DECL_INSN 27
+#define I915_MAX_TEMPORARY 16
+
+/* Each instruction is 3 dwords long, though most don't require all
+ * this space. Maximum of 123 instructions. Smaller maxes per insn
+ * type.
+ */
+#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16))
+
+#define REG_TYPE_R 0 /* temporary regs, no need to
+ * dcl, must be written before
+ * read -- Preserved between
+ * phases.
+ */
+#define REG_TYPE_T 1 /* Interpolated values, must be
+ * dcl'ed before use.
+ *
+ * 0..7: texture coord,
+ * 8: diffuse spec,
+ * 9: specular color,
+ * 10: fog parameter in w.
+ */
+#define REG_TYPE_CONST 2 /* Restriction: only one const
+ * can be referenced per
+ * instruction, though it may be
+ * selected for multiple inputs.
+ * Constants not initialized
+ * default to zero.
+ */
+#define REG_TYPE_S 3 /* sampler */
+#define REG_TYPE_OC 4 /* output color (rgba) */
+#define REG_TYPE_OD 5 /* output depth (w), xyz are
+ * temporaries. If not written,
+ * interpolated depth is used?
+ */
+#define REG_TYPE_U 6 /* unpreserved temporaries */
+#define REG_TYPE_MASK 0x7
+#define REG_NR_MASK 0xf
+
+/* REG_TYPE_T:
+ */
+#define T_TEX0 0
+#define T_TEX1 1
+#define T_TEX2 2
+#define T_TEX3 3
+#define T_TEX4 4
+#define T_TEX5 5
+#define T_TEX6 6
+#define T_TEX7 7
+#define T_DIFFUSE 8
+#define T_SPECULAR 9
+#define T_FOG_W 10 /* interpolated fog is in W coord */
+
+/* Arithmetic instructions */
+
+/* .replicate_swizzle == selection and replication of a particular
+ * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
+ */
+#define A0_NOP (0x0<<24) /* no operation */
+#define A0_ADD (0x1<<24) /* dst = src0 + src1 */
+#define A0_MOV (0x2<<24) /* dst = src0 */
+#define A0_MUL (0x3<<24) /* dst = src0 * src1 */
+#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */
+#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR (0x10<<24) /* dst = floor(src0) */
+#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */
+#define A0_TRC (0x12<<24) /* dst = int(src0) */
+#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_DEST_SATURATE (1<<22)
+#define A0_DEST_TYPE_SHIFT 19
+/* Allow: R, OC, OD, U */
+#define A0_DEST_NR_SHIFT 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define A0_DEST_CHANNEL_X (1<<10)
+#define A0_DEST_CHANNEL_Y (2<<10)
+#define A0_DEST_CHANNEL_Z (4<<10)
+#define A0_DEST_CHANNEL_W (8<<10)
+#define A0_DEST_CHANNEL_ALL (0xf<<10)
+#define A0_DEST_CHANNEL_SHIFT 10
+#define A0_SRC0_TYPE_SHIFT 7
+#define A0_SRC0_NR_SHIFT 2
+
+#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
+#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
+
+#define SRC_X 0
+#define SRC_Y 1
+#define SRC_Z 2
+#define SRC_W 3
+#define SRC_ZERO 4
+#define SRC_ONE 5
+
+#define A1_SRC0_CHANNEL_X_NEGATE (1<<31)
+#define A1_SRC0_CHANNEL_X_SHIFT 28
+#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27)
+#define A1_SRC0_CHANNEL_Y_SHIFT 24
+#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23)
+#define A1_SRC0_CHANNEL_Z_SHIFT 20
+#define A1_SRC0_CHANNEL_W_NEGATE (1<<19)
+#define A1_SRC0_CHANNEL_W_SHIFT 16
+#define A1_SRC1_TYPE_SHIFT 13
+#define A1_SRC1_NR_SHIFT 8
+#define A1_SRC1_CHANNEL_X_NEGATE (1<<7)
+#define A1_SRC1_CHANNEL_X_SHIFT 4
+#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3)
+#define A1_SRC1_CHANNEL_Y_SHIFT 0
+
+#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31)
+#define A2_SRC1_CHANNEL_Z_SHIFT 28
+#define A2_SRC1_CHANNEL_W_NEGATE (1<<27)
+#define A2_SRC1_CHANNEL_W_SHIFT 24
+#define A2_SRC2_TYPE_SHIFT 21
+#define A2_SRC2_NR_SHIFT 16
+#define A2_SRC2_CHANNEL_X_NEGATE (1<<15)
+#define A2_SRC2_CHANNEL_X_SHIFT 12
+#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11)
+#define A2_SRC2_CHANNEL_Y_SHIFT 8
+#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7)
+#define A2_SRC2_CHANNEL_Z_SHIFT 4
+#define A2_SRC2_CHANNEL_W_NEGATE (1<<3)
+#define A2_SRC2_CHANNEL_W_SHIFT 0
+
+/* Texture instructions */
+#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared
+ * sampler and address, and output
+ * filtered texel data to destination
+ * register */
+#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a
+ * perspective divide of the texture
+ * coordinate .xyz values by .w before
+ * sampling. */
+#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the
+ * computed LOD by w. Only S4.6 two's
+ * comp is used. This implies that a
+ * float to fixed conversion is
+ * done. */
+#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling
+ * operation. Simply kills the pixel
+ * if any channel of the address
+ * register is < 0.0. */
+#define T0_DEST_TYPE_SHIFT 19
+/* Allow: R, OC, OD, U */
+/* Note: U (unpreserved) regs do not retain their values between
+ * phases (cannot be used for feedback)
+ *
+ * Note: oC and OD registers can only be used as the destination of a
+ * texture instruction once per phase (this is an implementation
+ * restriction).
+ */
+#define T0_DEST_NR_SHIFT 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_MASK (0xf<<0)
+
+#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */
+/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
+#define T1_ADDRESS_REG_NR_SHIFT 17
+#define T2_MBZ 0
+
+/* Declaration instructions */
+#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib)
+ * register or an s (sampler)
+ * register. */
+#define D0_SAMPLE_TYPE_SHIFT 22
+#define D0_SAMPLE_TYPE_2D (0x0<<22)
+#define D0_SAMPLE_TYPE_CUBE (0x1<<22)
+#define D0_SAMPLE_TYPE_VOLUME (0x2<<22)
+#define D0_SAMPLE_TYPE_MASK (0x3<<22)
+
+#define D0_TYPE_SHIFT 19
+/* Allow: T, S */
+#define D0_NR_SHIFT 14
+/* Allow T: 0..10, S: 0..15 */
+#define D0_CHANNEL_X (1<<10)
+#define D0_CHANNEL_Y (2<<10)
+#define D0_CHANNEL_Z (4<<10)
+#define D0_CHANNEL_W (8<<10)
+#define D0_CHANNEL_ALL (0xf<<10)
+#define D0_CHANNEL_NONE (0<<10)
+
+#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y)
+#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z)
+
+/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
+ * or specular declarations.
+ *
+ * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
+ *
+ * Must be zero for S (sampler) dcls
+ */
+#define D1_MBZ 0
+#define D2_MBZ 0
+
+/* p207.
+ * The DWORD count is 3 times the number of bits set in MS1_MAPMASK_MASK
+ */
+#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16))
+
+#define MS1_MAPMASK_SHIFT 0
+#define MS1_MAPMASK_MASK (0x8fff<<0)
+
+#define MS2_UNTRUSTED_SURFACE (1<<31)
+#define MS2_ADDRESS_MASK 0xfffffffc
+#define MS2_VERTICAL_LINE_STRIDE (1<<1)
+#define MS2_VERTICAL_OFFSET (1<<1)
+
+#define MS3_HEIGHT_SHIFT 21
+#define MS3_WIDTH_SHIFT 10
+#define MS3_PALETTE_SELECT (1<<9)
+#define MS3_MAPSURF_FORMAT_SHIFT 7
+#define MS3_MAPSURF_FORMAT_MASK (0x7<<7)
+#define MAPSURF_8BIT (1<<7)
+#define MAPSURF_16BIT (2<<7)
+#define MAPSURF_32BIT (3<<7)
+#define MAPSURF_422 (5<<7)
+#define MAPSURF_COMPRESSED (6<<7)
+#define MAPSURF_4BIT_INDEXED (7<<7)
+#define MS3_MT_FORMAT_MASK (0x7 << 3)
+#define MS3_MT_FORMAT_SHIFT 3
+#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */
+#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */
+#define MT_8BIT_L8 (1<<3)
+#define MT_8BIT_A8 (4<<3)
+#define MT_8BIT_MONO8 (5<<3)
+#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */
+#define MT_16BIT_ARGB1555 (1<<3)
+#define MT_16BIT_ARGB4444 (2<<3)
+#define MT_16BIT_AY88 (3<<3)
+#define MT_16BIT_88DVDU (5<<3)
+#define MT_16BIT_BUMP_655LDVDU (6<<3)
+#define MT_16BIT_I16 (7<<3)
+#define MT_16BIT_L16 (8<<3)
+#define MT_16BIT_A16 (9<<3)
+#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */
+#define MT_32BIT_ABGR8888 (1<<3)
+#define MT_32BIT_XRGB8888 (2<<3)
+#define MT_32BIT_XBGR8888 (3<<3)
+#define MT_32BIT_QWVU8888 (4<<3)
+#define MT_32BIT_AXVU8888 (5<<3)
+#define MT_32BIT_LXVU8888 (6<<3)
+#define MT_32BIT_XLVU8888 (7<<3)
+#define MT_32BIT_ARGB2101010 (8<<3)
+#define MT_32BIT_ABGR2101010 (9<<3)
+#define MT_32BIT_AWVU2101010 (0xA<<3)
+#define MT_32BIT_GR1616 (0xB<<3)
+#define MT_32BIT_VU1616 (0xC<<3)
+#define MT_32BIT_xI824 (0xD<<3)
+#define MT_32BIT_xA824 (0xE<<3)
+#define MT_32BIT_xL824 (0xF<<3)
+#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */
+#define MT_422_YCRCB_NORMAL (1<<3)
+#define MT_422_YCRCB_SWAPUV (2<<3)
+#define MT_422_YCRCB_SWAPUVY (3<<3)
+#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */
+#define MT_COMPRESS_DXT2_3 (1<<3)
+#define MT_COMPRESS_DXT4_5 (2<<3)
+#define MT_COMPRESS_FXT1 (3<<3)
+#define MT_COMPRESS_DXT1_RGB (4<<3)
+#define MS3_USE_FENCE_REGS (1<<2)
+#define MS3_TILED_SURFACE (1<<1)
+#define MS3_TILE_WALK (1<<0)
+
+/* The pitch is the pitch measured in DWORDS, minus 1 */
+#define MS4_PITCH_SHIFT 21
+#define MS4_CUBE_FACE_ENA_NEGX (1<<20)
+#define MS4_CUBE_FACE_ENA_POSX (1<<19)
+#define MS4_CUBE_FACE_ENA_NEGY (1<<18)
+#define MS4_CUBE_FACE_ENA_POSY (1<<17)
+#define MS4_CUBE_FACE_ENA_NEGZ (1<<16)
+#define MS4_CUBE_FACE_ENA_POSZ (1<<15)
+#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15)
+#define MS4_MAX_LOD_SHIFT 9
+#define MS4_MAX_LOD_MASK (0x3f<<9)
+#define MS4_MIP_LAYOUT_LEGACY (0<<8)
+#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8)
+#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8)
+#define MS4_VOLUME_DEPTH_SHIFT 0
+#define MS4_VOLUME_DEPTH_MASK (0xff<<0)
+
+/* p244.
+ * The DWORD count is 3 times the number of bits set in SS1_MAPMASK_MASK.
+ */
+#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16))
+
+#define SS1_MAPMASK_SHIFT 0
+#define SS1_MAPMASK_MASK (0x8fff<<0)
+
+#define SS2_REVERSE_GAMMA_ENABLE (1<<31)
+#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30)
+#define SS2_COLORSPACE_CONVERSION (1<<29)
+#define SS2_CHROMAKEY_SHIFT 27
+#define SS2_BASE_MIP_LEVEL_SHIFT 22
+#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22)
+#define SS2_MIP_FILTER_SHIFT 20
+#define SS2_MIP_FILTER_MASK (0x3<<20)
+#define MIPFILTER_NONE 0
+#define MIPFILTER_NEAREST 1
+#define MIPFILTER_LINEAR 3
+#define SS2_MAG_FILTER_SHIFT 17
+#define SS2_MAG_FILTER_MASK (0x7<<17)
+#define FILTER_NEAREST 0
+#define FILTER_LINEAR 1
+#define FILTER_ANISOTROPIC 2
+#define FILTER_4X4_1 3
+#define FILTER_4X4_2 4
+#define FILTER_4X4_FLAT 5
+#define FILTER_6X5_MONO 6 /* XXX - check */
+#define SS2_MIN_FILTER_SHIFT 14
+#define SS2_MIN_FILTER_MASK (0x7<<14)
+#define SS2_LOD_BIAS_SHIFT 5
+#define SS2_LOD_BIAS_ONE (0x10<<5)
+#define SS2_LOD_BIAS_MASK (0x1ff<<5)
+/* Shadow requires:
+ * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format
+ * FILTER_4X4_x MIN and MAG filters
+ */
+#define SS2_SHADOW_ENABLE (1<<4)
+#define SS2_MAX_ANISO_MASK (1<<3)
+#define SS2_MAX_ANISO_2 (0<<3)
+#define SS2_MAX_ANISO_4 (1<<3)
+#define SS2_SHADOW_FUNC_SHIFT 0
+#define SS2_SHADOW_FUNC_MASK (0x7<<0)
+/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */
+
+#define SS3_MIN_LOD_SHIFT 24
+#define SS3_MIN_LOD_ONE (0x10<<24)
+#define SS3_MIN_LOD_MASK (0xff<<24)
+#define SS3_KILL_PIXEL_ENABLE (1<<17)
+#define SS3_TCX_ADDR_MODE_SHIFT 12
+#define SS3_TCX_ADDR_MODE_MASK (0x7<<12)
+#define TEXCOORDMODE_WRAP 0
+#define TEXCOORDMODE_MIRROR 1
+#define TEXCOORDMODE_CLAMP_EDGE 2
+#define TEXCOORDMODE_CUBE 3
+#define TEXCOORDMODE_CLAMP_BORDER 4
+#define TEXCOORDMODE_MIRROR_ONCE 5
+#define SS3_TCY_ADDR_MODE_SHIFT 9
+#define SS3_TCY_ADDR_MODE_MASK (0x7<<9)
+#define SS3_TCZ_ADDR_MODE_SHIFT 6
+#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6)
+#define SS3_NORMALIZED_COORDS (1<<5)
+#define SS3_TEXTUREMAP_INDEX_SHIFT 1
+#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1)
+#define SS3_DEINTERLACER_ENABLE (1<<0)
+
+#define SS4_BORDER_COLOR_MASK (~0)
+
+/* 3DSTATE_SPAN_STIPPLE, p258
+ */
+#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE (1<<16)
+#define ST1_MASK (0xffff)
+
+#define FLUSH_MAP_CACHE (1<<0)
+#define FLUSH_RENDER_CACHE (1<<1)
+
+#endif
+/* -*- c-basic-offset: 4 -*- */
+/*
+ * Copyright © 2006,2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/* Each instruction is 3 dwords long, though most don't require all
+ * this space. Maximum of 123 instructions. Smaller maxes per insn
+ * type.
+ */
+#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16))
+
+#define REG_TYPE_R 0 /* temporary regs, no need to
+ * dcl, must be written before
+ * read -- Preserved between
+ * phases.
+ */
+#define REG_TYPE_T 1 /* Interpolated values, must be
+ * dcl'ed before use.
+ *
+ * 0..7: texture coord,
+ * 8: diffuse spec,
+ * 9: specular color,
+ * 10: fog parameter in w.
+ */
+#define REG_TYPE_CONST 2 /* Restriction: only one const
+ * can be referenced per
+ * instruction, though it may be
+ * selected for multiple inputs.
+ * Constants not initialized
+ * default to zero.
+ */
+#define REG_TYPE_S 3 /* sampler */
+#define REG_TYPE_OC 4 /* output color (rgba) */
+#define REG_TYPE_OD 5 /* output depth (w), xyz are
+ * temporaries. If not written,
+ * interpolated depth is used?
+ */
+#define REG_TYPE_U 6 /* unpreserved temporaries */
+#define REG_TYPE_MASK 0x7
+#define REG_TYPE_SHIFT 4
+#define REG_NR_MASK 0xf
+
+/* REG_TYPE_T:
+*/
+#define T_TEX0 0
+#define T_TEX1 1
+#define T_TEX2 2
+#define T_TEX3 3
+#define T_TEX4 4
+#define T_TEX5 5
+#define T_TEX6 6
+#define T_TEX7 7
+#define T_DIFFUSE 8
+#define T_SPECULAR 9
+#define T_FOG_W 10 /* interpolated fog is in W coord */
+
+/* Arithmetic instructions */
+
+/* .replicate_swizzle == selection and replication of a particular
+ * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
+ */
+#define A0_NOP (0x0<<24) /* no operation */
+#define A0_ADD (0x1<<24) /* dst = src0 + src1 */
+#define A0_MOV (0x2<<24) /* dst = src0 */
+#define A0_MUL (0x3<<24) /* dst = src0 * src1 */
+#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */
+#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR (0x10<<24) /* dst = floor(src0) */
+#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */
+#define A0_TRC (0x12<<24) /* dst = int(src0) */
+#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_DEST_SATURATE (1<<22)
+#define A0_DEST_TYPE_SHIFT 19
+/* Allow: R, OC, OD, U */
+#define A0_DEST_NR_SHIFT 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define A0_DEST_CHANNEL_X (1<<10)
+#define A0_DEST_CHANNEL_Y (2<<10)
+#define A0_DEST_CHANNEL_Z (4<<10)
+#define A0_DEST_CHANNEL_W (8<<10)
+#define A0_DEST_CHANNEL_ALL (0xf<<10)
+#define A0_DEST_CHANNEL_SHIFT 10
+#define A0_SRC0_TYPE_SHIFT 7
+#define A0_SRC0_NR_SHIFT 2
+
+#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
+#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
+
+#define SRC_X 0
+#define SRC_Y 1
+#define SRC_Z 2
+#define SRC_W 3
+#define SRC_ZERO 4
+#define SRC_ONE 5
+
+#define A1_SRC0_CHANNEL_X_NEGATE (1<<31)
+#define A1_SRC0_CHANNEL_X_SHIFT 28
+#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27)
+#define A1_SRC0_CHANNEL_Y_SHIFT 24
+#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23)
+#define A1_SRC0_CHANNEL_Z_SHIFT 20
+#define A1_SRC0_CHANNEL_W_NEGATE (1<<19)
+#define A1_SRC0_CHANNEL_W_SHIFT 16
+#define A1_SRC1_TYPE_SHIFT 13
+#define A1_SRC1_NR_SHIFT 8
+#define A1_SRC1_CHANNEL_X_NEGATE (1<<7)
+#define A1_SRC1_CHANNEL_X_SHIFT 4
+#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3)
+#define A1_SRC1_CHANNEL_Y_SHIFT 0
+
+#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31)
+#define A2_SRC1_CHANNEL_Z_SHIFT 28
+#define A2_SRC1_CHANNEL_W_NEGATE (1<<27)
+#define A2_SRC1_CHANNEL_W_SHIFT 24
+#define A2_SRC2_TYPE_SHIFT 21
+#define A2_SRC2_NR_SHIFT 16
+#define A2_SRC2_CHANNEL_X_NEGATE (1<<15)
+#define A2_SRC2_CHANNEL_X_SHIFT 12
+#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11)
+#define A2_SRC2_CHANNEL_Y_SHIFT 8
+#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7)
+#define A2_SRC2_CHANNEL_Z_SHIFT 4
+#define A2_SRC2_CHANNEL_W_NEGATE (1<<3)
+#define A2_SRC2_CHANNEL_W_SHIFT 0
+
+/* Texture instructions */
+#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared
+ * sampler and address, and output
+ * filtered texel data to destination
+ * register */
+#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a
+ * perspective divide of the texture
+ * coordinate .xyz values by .w before
+ * sampling. */
+#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the
+ * computed LOD by w. Only S4.6 two's
+ * comp is used. This implies that a
+ * float to fixed conversion is
+ * done. */
+#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling
+ * operation. Simply kills the pixel
+ * if any channel of the address
+ * register is < 0.0. */
+#define T0_DEST_TYPE_SHIFT 19
+/* Allow: R, OC, OD, U */
+/* Note: U (unpreserved) regs do not retain their values between
+ * phases (cannot be used for feedback)
+ *
+ * Note: oC and OD registers can only be used as the destination of a
+ * texture instruction once per phase (this is an implementation
+ * restriction).
+ */
+#define T0_DEST_NR_SHIFT 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_MASK (0xf<<0)
+
+#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */
+/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
+#define T1_ADDRESS_REG_NR_SHIFT 17
+#define T2_MBZ 0
+
+/* Declaration instructions */
+#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib)
+ * register or an s (sampler)
+ * register. */
+#define D0_SAMPLE_TYPE_SHIFT 22
+#define D0_SAMPLE_TYPE_2D (0x0<<22)
+#define D0_SAMPLE_TYPE_CUBE (0x1<<22)
+#define D0_SAMPLE_TYPE_VOLUME (0x2<<22)
+#define D0_SAMPLE_TYPE_MASK (0x3<<22)
+
+#define D0_TYPE_SHIFT 19
+/* Allow: T, S */
+#define D0_NR_SHIFT 14
+/* Allow T: 0..10, S: 0..15 */
+#define D0_CHANNEL_X (1<<10)
+#define D0_CHANNEL_Y (2<<10)
+#define D0_CHANNEL_Z (4<<10)
+#define D0_CHANNEL_W (8<<10)
+#define D0_CHANNEL_ALL (0xf<<10)
+#define D0_CHANNEL_NONE (0<<10)
+
+#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y)
+#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z)
+
+/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
+ * or specular declarations.
+ *
+ * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
+ *
+ * Must be zero for S (sampler) dcls
+ */
+#define D1_MBZ 0
+#define D2_MBZ 0
+
+
+/* MASK_* are the unshifted bitmasks of the destination mask in arithmetic
+ * operations
+ */
+#define MASK_X 0x1
+#define MASK_Y 0x2
+#define MASK_Z 0x4
+#define MASK_W 0x8
+#define MASK_XYZ (MASK_X | MASK_Y | MASK_Z)
+#define MASK_XYZW (MASK_XYZ | MASK_W)
+#define MASK_SATURATE 0x10
+
+/* Temporary, undeclared regs. Preserved between phases */
+#define FS_R0 ((REG_TYPE_R << REG_TYPE_SHIFT) | 0)
+#define FS_R1 ((REG_TYPE_R << REG_TYPE_SHIFT) | 1)
+#define FS_R2 ((REG_TYPE_R << REG_TYPE_SHIFT) | 2)
+#define FS_R3 ((REG_TYPE_R << REG_TYPE_SHIFT) | 3)
+
+/* Texture coordinate regs. Must be declared. */
+#define FS_T0 ((REG_TYPE_T << REG_TYPE_SHIFT) | 0)
+#define FS_T1 ((REG_TYPE_T << REG_TYPE_SHIFT) | 1)
+#define FS_T2 ((REG_TYPE_T << REG_TYPE_SHIFT) | 2)
+#define FS_T3 ((REG_TYPE_T << REG_TYPE_SHIFT) | 3)
+#define FS_T4 ((REG_TYPE_T << REG_TYPE_SHIFT) | 4)
+#define FS_T5 ((REG_TYPE_T << REG_TYPE_SHIFT) | 5)
+#define FS_T6 ((REG_TYPE_T << REG_TYPE_SHIFT) | 6)
+#define FS_T7 ((REG_TYPE_T << REG_TYPE_SHIFT) | 7)
+#define FS_T8 ((REG_TYPE_T << REG_TYPE_SHIFT) | 8)
+#define FS_T9 ((REG_TYPE_T << REG_TYPE_SHIFT) | 9)
+#define FS_T10 ((REG_TYPE_T << REG_TYPE_SHIFT) | 10)
+
+/* Constant values */
+#define FS_C0 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0)
+#define FS_C1 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1)
+#define FS_C2 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2)
+#define FS_C3 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3)
+#define FS_C4 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4)
+#define FS_C5 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5)
+#define FS_C6 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6)
+#define FS_C7 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7)
+
+/* Sampler regs */
+#define FS_S0 ((REG_TYPE_S << REG_TYPE_SHIFT) | 0)
+#define FS_S1 ((REG_TYPE_S << REG_TYPE_SHIFT) | 1)
+#define FS_S2 ((REG_TYPE_S << REG_TYPE_SHIFT) | 2)
+#define FS_S3 ((REG_TYPE_S << REG_TYPE_SHIFT) | 3)
+
+/* Output color */
+#define FS_OC ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0)
+
+/* Output depth */
+#define FS_OD ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0)
+
+/* Unpreserved temporary regs */
+#define FS_U0 ((REG_TYPE_U << REG_TYPE_SHIFT) | 0)
+#define FS_U1 ((REG_TYPE_U << REG_TYPE_SHIFT) | 1)
+#define FS_U2 ((REG_TYPE_U << REG_TYPE_SHIFT) | 2)
+#define FS_U3 ((REG_TYPE_U << REG_TYPE_SHIFT) | 3)
+
+#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3)
+#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 4)
+#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 4)
+#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 4)
+
+#define REG_CHANNEL_MASK 0xf
+
+#define REG_NR(reg) ((reg) & REG_NR_MASK)
+#define REG_TYPE(reg) (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK)
+#define REG_X(reg) (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+#define REG_Y(reg) (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+#define REG_Z(reg) (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+#define REG_W(reg) (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+
+enum gen3_fs_channel {
+ X_CHANNEL_VAL = 0,
+ Y_CHANNEL_VAL,
+ Z_CHANNEL_VAL,
+ W_CHANNEL_VAL,
+ ZERO_CHANNEL_VAL,
+ ONE_CHANNEL_VAL,
+
+ NEG_X_CHANNEL_VAL = X_CHANNEL_VAL | 0x8,
+ NEG_Y_CHANNEL_VAL = Y_CHANNEL_VAL | 0x8,
+ NEG_Z_CHANNEL_VAL = Z_CHANNEL_VAL | 0x8,
+ NEG_W_CHANNEL_VAL = W_CHANNEL_VAL | 0x8,
+ NEG_ONE_CHANNEL_VAL = ONE_CHANNEL_VAL | 0x8
+};
+
+#define gen3_fs_operand(reg, x, y, z, w) \
+ (reg) | \
+(x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \
+(y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \
+(z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \
+(w##_CHANNEL_VAL << W_CHANNEL_SHIFT)
+
+/**
+ * Construct an operand description for using a register with no swizzling
+ */
+#define gen3_fs_operand_reg(reg) \
+ gen3_fs_operand(reg, X, Y, Z, W)
+
+#define gen3_fs_operand_reg_negate(reg) \
+ gen3_fs_operand(reg, NEG_X, NEG_Y, NEG_Z, NEG_W)
+
+/**
+ * Returns an operand containing (0.0, 0.0, 0.0, 0.0).
+ */
+#define gen3_fs_operand_zero() gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO)
+
+/**
+ * Returns an unused operand
+ */
+#define gen3_fs_operand_none() gen3_fs_operand_zero()
+
+/**
+ * Returns an operand containing (1.0, 1.0, 1.0, 1.0).
+ */
+#define gen3_fs_operand_one() gen3_fs_operand(FS_R0, ONE, ONE, ONE, ONE)
+
+#define gen3_get_hardware_channel_val(val, shift, negate) \
+ (((val & 0x7) << shift) | ((val & 0x8) ? negate : 0))
+
+/**
+ * Outputs a fragment shader command to declare a sampler or texture register.
+ */
+#define gen3_fs_dcl(reg) \
+ do { \
+ OUT_BATCH(D0_DCL | \
+ (REG_TYPE(reg) << D0_TYPE_SHIFT) | \
+ (REG_NR(reg) << D0_NR_SHIFT) | \
+ ((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \
+ OUT_BATCH(0); \
+ OUT_BATCH(0); \
+ } while (0)
+
+#define gen3_fs_texld(dest_reg, sampler_reg, address_reg) \
+ do { \
+ OUT_BATCH(T0_TEXLD | \
+ (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \
+ (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \
+ (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \
+ OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \
+ (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \
+ OUT_BATCH(0); \
+ } while (0)
+
+#define gen3_fs_texldp(dest_reg, sampler_reg, address_reg) \
+ do { \
+ OUT_BATCH(T0_TEXLDP | \
+ (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \
+ (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \
+ (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \
+ OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \
+ (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \
+ OUT_BATCH(0); \
+ } while (0)
+
+#define gen3_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2) \
+ _gen3_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2)
+
+#define gen3_fs_arith(op, dest_reg, operand0, operand1, operand2) \
+ _gen3_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2)
+
+#define _gen3_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \
+ do { \
+ /* Set up destination register and write mask */ \
+ OUT_BATCH(cmd | \
+ (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \
+ (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \
+ (((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \
+ (((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \
+ /* Set up operand 0 */ \
+ (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \
+ (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \
+ OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \
+ A1_SRC0_CHANNEL_X_SHIFT, \
+ A1_SRC0_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand0), \
+ A1_SRC0_CHANNEL_Y_SHIFT, \
+ A1_SRC0_CHANNEL_Y_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Z(operand0), \
+ A1_SRC0_CHANNEL_Z_SHIFT, \
+ A1_SRC0_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand0), \
+ A1_SRC0_CHANNEL_W_SHIFT, \
+ A1_SRC0_CHANNEL_W_NEGATE) | \
+ /* Set up operand 1 */ \
+ (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \
+ (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \
+ gen3_get_hardware_channel_val(REG_X(operand1), \
+ A1_SRC1_CHANNEL_X_SHIFT, \
+ A1_SRC1_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand1), \
+ A1_SRC1_CHANNEL_Y_SHIFT, \
+ A1_SRC1_CHANNEL_Y_NEGATE)); \
+ OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \
+ A2_SRC1_CHANNEL_Z_SHIFT, \
+ A2_SRC1_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand1), \
+ A2_SRC1_CHANNEL_W_SHIFT, \
+ A2_SRC1_CHANNEL_W_NEGATE) | \
+ /* Set up operand 2 */ \
+ (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \
+ (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \
+ gen3_get_hardware_channel_val(REG_X(operand2), \
+ A2_SRC2_CHANNEL_X_SHIFT, \
+ A2_SRC2_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand2), \
+ A2_SRC2_CHANNEL_Y_SHIFT, \
+ A2_SRC2_CHANNEL_Y_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Z(operand2), \
+ A2_SRC2_CHANNEL_Z_SHIFT, \
+ A2_SRC2_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand2), \
+ A2_SRC2_CHANNEL_W_SHIFT, \
+ A2_SRC2_CHANNEL_W_NEGATE)); \
+ } while (0)
+
+#define _gen3_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\
+ /* Set up destination register and write mask */ \
+ OUT_BATCH(cmd | \
+ (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \
+ (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \
+ (A0_DEST_CHANNEL_ALL) | \
+ /* Set up operand 0 */ \
+ (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \
+ (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \
+ OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \
+ A1_SRC0_CHANNEL_X_SHIFT, \
+ A1_SRC0_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand0), \
+ A1_SRC0_CHANNEL_Y_SHIFT, \
+ A1_SRC0_CHANNEL_Y_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Z(operand0), \
+ A1_SRC0_CHANNEL_Z_SHIFT, \
+ A1_SRC0_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand0), \
+ A1_SRC0_CHANNEL_W_SHIFT, \
+ A1_SRC0_CHANNEL_W_NEGATE) | \
+ /* Set up operand 1 */ \
+ (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \
+ (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \
+ gen3_get_hardware_channel_val(REG_X(operand1), \
+ A1_SRC1_CHANNEL_X_SHIFT, \
+ A1_SRC1_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand1), \
+ A1_SRC1_CHANNEL_Y_SHIFT, \
+ A1_SRC1_CHANNEL_Y_NEGATE)); \
+ OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \
+ A2_SRC1_CHANNEL_Z_SHIFT, \
+ A2_SRC1_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand1), \
+ A2_SRC1_CHANNEL_W_SHIFT, \
+ A2_SRC1_CHANNEL_W_NEGATE) | \
+ /* Set up operand 2 */ \
+ (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \
+ (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \
+ gen3_get_hardware_channel_val(REG_X(operand2), \
+ A2_SRC2_CHANNEL_X_SHIFT, \
+ A2_SRC2_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand2), \
+ A2_SRC2_CHANNEL_Y_SHIFT, \
+ A2_SRC2_CHANNEL_Y_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Z(operand2), \
+ A2_SRC2_CHANNEL_Z_SHIFT, \
+ A2_SRC2_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand2), \
+ A2_SRC2_CHANNEL_W_SHIFT, \
+ A2_SRC2_CHANNEL_W_NEGATE)); \
+} while (0)
+
+#define gen3_fs_mov(dest_reg, operand0) \
+ gen3_fs_arith(MOV, dest_reg, \
+ operand0, \
+ gen3_fs_operand_none(), \
+ gen3_fs_operand_none())
+
+#define gen3_fs_mov_masked(dest_reg, dest_mask, operand0) \
+ gen3_fs_arith_masked (MOV, dest_reg, dest_mask, \
+ operand0, \
+ gen3_fs_operand_none(), \
+ gen3_fs_operand_none())
+
+
+#define gen3_fs_frc(dest_reg, operand0) \
+ gen3_fs_arith (FRC, dest_reg, \
+ operand0, \
+ gen3_fs_operand_none(), \
+ gen3_fs_operand_none())
+
+/** Add operand0 and operand1 and put the result in dest_reg */
+#define gen3_fs_add(dest_reg, operand0, operand1) \
+ gen3_fs_arith (ADD, dest_reg, \
+ operand0, operand1, \
+ gen3_fs_operand_none())
+
+/** Multiply operand0 and operand1 and put the result in dest_reg */
+#define gen3_fs_mul(dest_reg, operand0, operand1) \
+ gen3_fs_arith (MUL, dest_reg, \
+ operand0, operand1, \
+ gen3_fs_operand_none())
+
+/** Computes 1/(operand0.replicate_swizzle) puts the result in dest_reg */
+#define gen3_fs_rcp(dest_reg, dest_mask, operand0) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (RCP, dest_reg, dest_mask, \
+ operand0, \
+ gen3_fs_operand_none (), \
+ gen3_fs_operand_none ()); \
+ } else { \
+ gen3_fs_arith (RCP, dest_reg, \
+ operand0, \
+ gen3_fs_operand_none (), \
+ gen3_fs_operand_none ()); \
+ } \
+ } while (0)
+
+/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */
+#define gen3_fs_rsq(dest_reg, dest_mask, operand0) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (RSQ, dest_reg, dest_mask, \
+ operand0, \
+ gen3_fs_operand_none (), \
+ gen3_fs_operand_none ()); \
+ } else { \
+ gen3_fs_arith (RSQ, dest_reg, \
+ operand0, \
+ gen3_fs_operand_none (), \
+ gen3_fs_operand_none ()); \
+ } \
+ } while (0)
+
+/** Puts the minimum of operand0 and operand1 in dest_reg */
+#define gen3_fs_min(dest_reg, operand0, operand1) \
+ gen3_fs_arith (MIN, dest_reg, \
+ operand0, operand1, \
+ gen3_fs_operand_none())
+
+/** Puts the maximum of operand0 and operand1 in dest_reg */
+#define gen3_fs_max(dest_reg, operand0, operand1) \
+ gen3_fs_arith (MAX, dest_reg, \
+ operand0, operand1, \
+ gen3_fs_operand_none())
+
+#define gen3_fs_cmp(dest_reg, operand0, operand1, operand2) \
+ gen3_fs_arith (CMP, dest_reg, operand0, operand1, operand2)
+
+/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */
+#define gen3_fs_mad(dest_reg, dest_mask, op0, op1, op2) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \
+ } else { \
+ gen3_fs_arith (MAD, dest_reg, op0, op1, op2); \
+ } \
+ } while (0)
+
+#define gen3_fs_dp2add(dest_reg, dest_mask, op0, op1, op2) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \
+ } else { \
+ gen3_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \
+ } \
+ } while (0)
+
+/**
+ * Perform a 3-component dot-product of operand0 and operand1 and put the
+ * resulting scalar in the channels of dest_reg specified by the dest_mask.
+ */
+#define gen3_fs_dp3(dest_reg, dest_mask, op0, op1) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (DP3, dest_reg, dest_mask, \
+ op0, op1,\
+ gen3_fs_operand_none()); \
+ } else { \
+ gen3_fs_arith (DP3, dest_reg, op0, op1,\
+ gen3_fs_operand_none()); \
+ } \
+ } while (0)
+
+/**
+ * Perform a 4-component dot-product of operand0 and operand1 and put the
+ * resulting scalar in the channels of dest_reg specified by the dest_mask.
+ */
+#define gen3_fs_dp4(dest_reg, dest_mask, op0, op1) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (DP4, dest_reg, dest_mask, \
+ op0, op1,\
+ gen3_fs_operand_none()); \
+ } else { \
+ gen3_fs_arith (DP4, dest_reg, op0, op1,\
+ gen3_fs_operand_none()); \
+ } \
+ } while (0)
+
+#define SHADER_TRAPEZOIDS (1 << 24)
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
new file mode 100644
index 0000000..82fef2d
--- a/dev/null
+++ b/src/sna/gen4_render.c
@@ -0,0 +1,2762 @@
+/*
+ * Copyright © 2006,2008,2011 Intel Corporation
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@sna.com>
+ * Eric Anholt <eric@anholt.net>
+ * Carl Worth <cworth@redhat.com>
+ * Keith Packard <keithp@keithp.com>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xf86.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "sna_video.h"
+
+#include "gen4_render.h"
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+/* gen4 has a serious issue with its shaders that we need to flush
+ * after every rectangle... So until that is resolved, prefer
+ * the BLT engine.
+ */
+#define PREFER_BLT 1
+
+#define FLUSH() do { \
+ gen4_vertex_flush(sna); \
+ OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); \
+} while (0)
+
+#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
+
+/* Set up a default static partitioning of the URB, which is supposed to
+ * allow anything we would want to do, at potentially lower performance.
+ */
+#define URB_CS_ENTRY_SIZE 1
+#define URB_CS_ENTRIES 0
+
+#define URB_VS_ENTRY_SIZE 1 // each 512-bit row
+#define URB_VS_ENTRIES 8 // we needs at least 8 entries
+
+#define URB_GS_ENTRY_SIZE 0
+#define URB_GS_ENTRIES 0
+
+#define URB_CLIP_ENTRY_SIZE 0
+#define URB_CLIP_ENTRIES 0
+
+#define URB_SF_ENTRY_SIZE 2
+#define URB_SF_ENTRIES 1
+
+/*
+ * this program computes dA/dx and dA/dy for the texture coordinates along
+ * with the base texture coordinate. It was extracted from the Mesa driver
+ */
+
+#define SF_KERNEL_NUM_GRF 16
+#define SF_MAX_THREADS 2
+
+#define PS_KERNEL_NUM_GRF 32
+#define PS_MAX_THREADS 48
+
+static const uint32_t sf_kernel[][4] = {
+#include "exa_sf.g4b"
+};
+
+static const uint32_t sf_kernel_mask[][4] = {
+#include "exa_sf_mask.g4b"
+};
+
+static const uint32_t ps_kernel_nomask_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_nomask_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_a.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca_srcalpha.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_a.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca_srcalpha.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_masknoca_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample_a.g4b"
+#include "exa_wm_noca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_masknoca_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_a.g4b"
+#include "exa_wm_noca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_packed_static[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_yuv_rgb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_planar_static[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_planar.g4b"
+#include "exa_wm_yuv_rgb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+#define KERNEL(kernel_enum, kernel, masked) \
+ [kernel_enum] = {&kernel, sizeof(kernel), masked}
+static const struct wm_kernel_info {
+ const void *data;
+ unsigned int size;
+ Bool has_mask;
+} wm_kernels[] = {
+ KERNEL(WM_KERNEL, ps_kernel_nomask_affine, FALSE),
+ KERNEL(WM_KERNEL_PROJECTIVE, ps_kernel_nomask_projective, FALSE),
+
+ KERNEL(WM_KERNEL_MASK, ps_kernel_masknoca_affine, TRUE),
+ KERNEL(WM_KERNEL_MASK_PROJECTIVE, ps_kernel_masknoca_projective, TRUE),
+
+ KERNEL(WM_KERNEL_MASKCA, ps_kernel_maskca_affine, TRUE),
+ KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, ps_kernel_maskca_projective, TRUE),
+
+ KERNEL(WM_KERNEL_MASKCA_SRCALPHA,
+ ps_kernel_maskca_srcalpha_affine, TRUE),
+ KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+ ps_kernel_maskca_srcalpha_projective, TRUE),
+
+ KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, FALSE),
+ KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, FALSE),
+};
+#undef KERNEL
+
+static const struct blendinfo {
+ Bool src_alpha;
+ uint32_t src_blend;
+ uint32_t dst_blend;
+} gen4_blend_op[] = {
+ /* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
+ /* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
+ /* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
+ /* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
+ /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
+ /* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
+ /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
+ /* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
+ /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
+ /* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
+ /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
+ /* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
+ /* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
+};
+
+/**
+ * Highest-valued BLENDFACTOR used in gen4_blend_op.
+ *
+ * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
+ * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
+ * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
+ */
+#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
+
+static const struct formatinfo {
+ CARD32 pict_fmt;
+ uint32_t card_fmt;
+} gen4_tex_formats[] = {
+ {PICT_a8, GEN4_SURFACEFORMAT_A8_UNORM},
+ {PICT_a8r8g8b8, GEN4_SURFACEFORMAT_B8G8R8A8_UNORM},
+ {PICT_x8r8g8b8, GEN4_SURFACEFORMAT_B8G8R8X8_UNORM},
+ {PICT_a8b8g8r8, GEN4_SURFACEFORMAT_R8G8B8A8_UNORM},
+ {PICT_x8b8g8r8, GEN4_SURFACEFORMAT_R8G8B8X8_UNORM},
+ {PICT_r8g8b8, GEN4_SURFACEFORMAT_R8G8B8_UNORM},
+ {PICT_r5g6b5, GEN4_SURFACEFORMAT_B5G6R5_UNORM},
+ {PICT_a1r5g5b5, GEN4_SURFACEFORMAT_B5G5R5A1_UNORM},
+ {PICT_a2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10A2_UNORM},
+ {PICT_x2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10X2_UNORM},
+ {PICT_a2b10g10r10, GEN4_SURFACEFORMAT_R10G10B10A2_UNORM},
+ {PICT_x2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10X2_UNORM},
+ {PICT_a4r4g4b4, GEN4_SURFACEFORMAT_B4G4R4A4_UNORM},
+};
+
+#define BLEND_OFFSET(s, d) \
+ (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
+
+#define SAMPLER_OFFSET(sf, se, mf, me, k) \
+ ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
+
+static bool
+gen4_emit_pipelined_pointers(struct sna *sna,
+ const struct sna_composite_op *op,
+ int blend, int kernel);
+
+#define OUT_BATCH(v) batch_emit(sna, v)
+#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
+#define OUT_VERTEX_F(v) vertex_emit(sna, v)
+
+static int
+gen4_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine)
+{
+ int base;
+
+ if (has_mask) {
+ if (is_ca) {
+ if (gen4_blend_op[op].src_alpha)
+ base = WM_KERNEL_MASKCA_SRCALPHA;
+ else
+ base = WM_KERNEL_MASKCA;
+ } else
+ base = WM_KERNEL_MASK;
+ } else
+ base = WM_KERNEL;
+
+ return base + !is_affine;
+}
+
+static void gen4_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ struct gen4_render_state *state = &sna->render_state.gen4;
+
+ if (!op->need_magic_ca_pass)
+ return;
+
+ DBG(("%s: CA fixup\n", __FUNCTION__));
+
+ gen4_emit_pipelined_pointers
+ (sna, op, PictOpAdd,
+ gen4_choose_composite_kernel(PictOpAdd,
+ TRUE, TRUE, op->is_affine));
+
+ OUT_BATCH(GEN4_3DPRIMITIVE |
+ GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) |
+ 4);
+ OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
+ OUT_BATCH(sna->render.vertex_start);
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ state->last_primitive = sna->kgem.nbatch;
+}
+
+static void gen4_vertex_flush(struct sna *sna)
+{
+ if (sna->render_state.gen4.vertex_offset == 0)
+ return;
+
+ DBG(("%s[%x] = %d\n", __FUNCTION__,
+ 4*sna->render_state.gen4.vertex_offset,
+ sna->render.vertex_index - sna->render.vertex_start));
+ sna->kgem.batch[sna->render_state.gen4.vertex_offset] =
+ sna->render.vertex_index - sna->render.vertex_start;
+ sna->render_state.gen4.vertex_offset = 0;
+
+ if (sna->render.op)
+ gen4_magic_ca_pass(sna, sna->render.op);
+}
+
+static void gen4_vertex_finish(struct sna *sna, Bool last)
+{
+ struct kgem_bo *bo;
+ int i, delta;
+
+ gen4_vertex_flush(sna);
+ if (!sna->render.vertex_used)
+ return;
+
+ /* Note: we only need dword alignment (currently) */
+
+ if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
+ DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
+ sna->render.vertex_used, sna->kgem.nbatch));
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ sna->render.vertex_data,
+ sna->render.vertex_used * 4);
+ delta = sna->kgem.nbatch * 4;
+ bo = NULL;
+ sna->kgem.nbatch += sna->render.vertex_used;
+ } else {
+ bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used);
+ if (bo && !kgem_bo_write(&sna->kgem, bo,
+ sna->render.vertex_data,
+ 4*sna->render.vertex_used)) {
+ kgem_bo_destroy(&sna->kgem, bo);
+ return;
+ }
+ delta = 0;
+ DBG(("%s: new vbo: %d\n", __FUNCTION__,
+ sna->render.vertex_used));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
+ if (sna->render.vertex_reloc[i]) {
+ DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
+ i, sna->render.vertex_reloc[i]));
+
+ sna->kgem.batch[sna->render.vertex_reloc[i]] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[i],
+ bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta);
+ sna->render.vertex_reloc[i] = 0;
+ }
+ }
+
+ if (bo)
+ kgem_bo_destroy(&sna->kgem, bo);
+
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ sna->render_state.gen4.vb_id = 0;
+}
+
+static uint32_t gen4_get_blend(int op,
+ Bool has_component_alpha,
+ uint32_t dst_format)
+{
+ uint32_t src, dst;
+
+ src = gen4_blend_op[op].src_blend;
+ dst = gen4_blend_op[op].dst_blend;
+
+ /* If there's no dst alpha channel, adjust the blend op so that we'll treat
+ * it as always 1.
+ */
+ if (PICT_FORMAT_A(dst_format) == 0) {
+ if (src == GEN4_BLENDFACTOR_DST_ALPHA)
+ src = GEN4_BLENDFACTOR_ONE;
+ else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
+ src = GEN4_BLENDFACTOR_ZERO;
+ }
+
+ /* If the source alpha is being used, then we should only be in a
+ * case where the source blend factor is 0, and the source blend
+ * value is the mask channels multiplied by the source picture's alpha.
+ */
+ if (has_component_alpha && gen4_blend_op[op].src_alpha) {
+ if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
+ dst = GEN4_BLENDFACTOR_SRC_COLOR;
+ else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
+ dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
+ }
+
+ DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
+ op, dst_format, PICT_FORMAT_A(dst_format),
+ src, dst, BLEND_OFFSET(src, dst)));
+ return BLEND_OFFSET(src, dst);
+}
+
+static uint32_t gen4_get_dest_format(PictFormat format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ default:
+ return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case PICT_r5g6b5:
+ return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
+ case PICT_a8:
+ return GEN4_SURFACEFORMAT_A8_UNORM;
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
+ }
+}
+
+static Bool gen4_check_dst_format(PictFormat format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_r5g6b5:
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static bool gen4_format_is_dst(uint32_t format)
+{
+ switch (format) {
+ case GEN4_SURFACEFORMAT_B8G8R8A8_UNORM:
+ case GEN4_SURFACEFORMAT_R8G8B8A8_UNORM:
+ case GEN4_SURFACEFORMAT_B10G10R10A2_UNORM:
+ case GEN4_SURFACEFORMAT_B5G6R5_UNORM:
+ case GEN4_SURFACEFORMAT_B5G5R5A1_UNORM:
+ case GEN4_SURFACEFORMAT_A8_UNORM:
+ case GEN4_SURFACEFORMAT_B4G4R4A4_UNORM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+typedef struct gen4_surface_state_padded {
+ struct gen4_surface_state state;
+ char pad[32 - sizeof(struct gen4_surface_state)];
+} gen4_surface_state_padded;
+
+static void null_create(struct sna_static_stream *stream)
+{
+ /* A bunch of zeros useful for legacy border color and depth-stencil */
+ sna_static_stream_map(stream, 64, 64);
+}
+
+static void
+sampler_state_init(struct gen4_sampler_state *sampler_state,
+ sampler_filter_t filter,
+ sampler_extend_t extend)
+{
+ sampler_state->ss0.lod_preclamp = 1; /* GL mode */
+
+ /* We use the legacy mode to get the semantics specified by
+ * the Render extension. */
+ sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
+
+ switch (filter) {
+ default:
+ case SAMPLER_FILTER_NEAREST:
+ sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
+ sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
+ break;
+ case SAMPLER_FILTER_BILINEAR:
+ sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
+ sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
+ break;
+ }
+
+ switch (extend) {
+ default:
+ case SAMPLER_EXTEND_NONE:
+ sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
+ break;
+ case SAMPLER_EXTEND_REPEAT:
+ sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
+ break;
+ case SAMPLER_EXTEND_PAD:
+ sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
+ sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
+ sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
+ break;
+ case SAMPLER_EXTEND_REFLECT:
+ sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
+ sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
+ sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
+ break;
+ }
+}
+
+static uint32_t gen4_get_card_format(PictFormat format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(gen4_tex_formats); i++) {
+ if (gen4_tex_formats[i].pict_fmt == format)
+ return gen4_tex_formats[i].card_fmt;
+ }
+ return -1;
+}
+
+static uint32_t gen4_filter(uint32_t filter)
+{
+ switch (filter) {
+ default:
+ assert(0);
+ case PictFilterNearest:
+ return SAMPLER_FILTER_NEAREST;
+ case PictFilterBilinear:
+ return SAMPLER_FILTER_BILINEAR;
+ }
+}
+
+static uint32_t gen4_check_filter(PicturePtr picture)
+{
+ switch (picture->filter) {
+ case PictFilterNearest:
+ case PictFilterBilinear:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static uint32_t gen4_repeat(uint32_t repeat)
+{
+ switch (repeat) {
+ default:
+ assert(0);
+ case RepeatNone:
+ return SAMPLER_EXTEND_NONE;
+ case RepeatNormal:
+ return SAMPLER_EXTEND_REPEAT;
+ case RepeatPad:
+ return SAMPLER_EXTEND_PAD;
+ case RepeatReflect:
+ return SAMPLER_EXTEND_REFLECT;
+ }
+}
+
+static bool gen4_check_repeat(PicturePtr picture)
+{
+ if (!picture->repeat)
+ return TRUE;
+
+ switch (picture->repeatType) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+/**
+ * Sets up the common fields for a surface state buffer for the given
+ * picture in the given surface state buffer.
+ */
+static int
+gen4_bind_bo(struct sna *sna,
+ struct kgem_bo *bo,
+ uint32_t width,
+ uint32_t height,
+ uint32_t format,
+ Bool is_dst)
+{
+ struct gen4_surface_state *ss;
+ uint32_t domains;
+ uint16_t offset;
+
+ /* After the first bind, we manage the cache domains within the batch */
+ if (is_dst) {
+ domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
+ kgem_bo_mark_dirty(bo);
+ } else {
+ domains = I915_GEM_DOMAIN_SAMPLER << 16;
+ is_dst = gen4_format_is_dst(format);
+ }
+
+ offset = sna->kgem.surface - sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
+ offset *= sizeof(uint32_t);
+
+ if (is_dst) {
+ if (bo->dst_bound)
+ return bo->dst_bound;
+
+ bo->dst_bound = offset;
+ } else {
+ if (bo->src_bound)
+ return bo->src_bound;
+
+ bo->src_bound = offset;
+ }
+
+ sna->kgem.surface -=
+ sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
+ ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
+
+ ss->ss0.surface_type = GEN4_SURFACE_2D;
+ ss->ss0.surface_format = format;
+
+ ss->ss0.data_return_format = GEN4_SURFACERETURNFORMAT_FLOAT32;
+ ss->ss0.color_blend = 1;
+ ss->ss1.base_addr =
+ kgem_add_reloc(&sna->kgem,
+ sna->kgem.surface + 1,
+ bo, domains, 0);
+
+ ss->ss2.height = height - 1;
+ ss->ss2.width = width - 1;
+ ss->ss3.pitch = bo->pitch - 1;
+ ss->ss3.tiled_surface = bo->tiling != I915_TILING_NONE;
+ ss->ss3.tile_walk = bo->tiling == I915_TILING_Y;
+
+ DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
+ offset, bo->handle, ss->ss1.base_addr,
+ ss->ss0.surface_format, width, height, bo->pitch, bo->tiling,
+ domains & 0xffff ? "render" : "sampler"));
+
+ return offset;
+}
+
+fastcall static void
+gen4_emit_composite_primitive_solid(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = 1.;
+ v[2] = 1.;
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ v[4] = 0.;
+ v[5] = 1.;
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ v[7] = 0.;
+ v[8] = 0.;
+}
+
+fastcall static void
+gen4_emit_composite_primitive_identity_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ const float *sf = op->src.scale;
+ float sx, sy, *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ sx = r->src.x + op->src.offset[0];
+ sy = r->src.y + op->src.offset[1];
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (sx + r->width) * sf[0];
+ v[2] = (sy + r->height) * sf[1];
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ v[4] = sx * sf[0];
+ v[5] = v[2];
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ v[7] = v[4];
+ v[8] = sy * sf[1];
+}
+
+fastcall static void
+gen4_emit_composite_primitive_affine_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform,
+ &v[1], &v[2]);
+ v[1] *= op->src.scale[0];
+ v[2] *= op->src.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform,
+ &v[4], &v[5]);
+ v[4] *= op->src.scale[0];
+ v[5] *= op->src.scale[1];
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y,
+ op->src.transform,
+ &v[7], &v[8]);
+ v[7] *= op->src.scale[0];
+ v[8] *= op->src.scale[1];
+}
+
+fastcall static void
+gen4_emit_composite_primitive_identity_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float src_x, src_y;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (src_x + w) * op->src.scale[0];
+ v[2] = (src_y + h) * op->src.scale[1];
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[5] = dst.f;
+ v[6] = src_x * op->src.scale[0];
+ v[7] = v[2];
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
+
+ dst.p.y = r->dst.y;
+ v[10] = dst.f;
+ v[11] = v[6];
+ v[12] = src_y * op->src.scale[1];
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
+}
+
+fastcall static void
+gen4_emit_composite_primitive(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
+ Bool is_affine = op->is_affine;
+ const float *src_sf = op->src.scale;
+ const float *mask_sf = op->mask.scale;
+
+ if (is_affine) {
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1],
+ op->src.transform,
+ &src_x[0],
+ &src_y[0]);
+
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[1],
+ &src_y[1]);
+
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width,
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[2],
+ &src_y[2]);
+ } else {
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1],
+ op->src.transform,
+ &src_x[0],
+ &src_y[0],
+ &src_w[0]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[1],
+ &src_y[1],
+ &src_w[1]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width,
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[2],
+ &src_y[2],
+ &src_w[2]))
+ return;
+ }
+
+ if (op->mask.bo) {
+ if (is_affine) {
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1],
+ op->mask.transform,
+ &mask_x[0],
+ &mask_y[0]);
+
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[1],
+ &mask_y[1]);
+
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width,
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[2],
+ &mask_y[2]);
+ } else {
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1],
+ op->mask.transform,
+ &mask_x[0],
+ &mask_y[0],
+ &mask_w[0]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[1],
+ &mask_y[1],
+ &mask_w[1]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width,
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[2],
+ &mask_y[2],
+ &mask_w[2]))
+ return;
+ }
+ }
+
+ OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height);
+ OUT_VERTEX_F(src_x[2] * src_sf[0]);
+ OUT_VERTEX_F(src_y[2] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[2]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[2] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[2] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[2]);
+ }
+
+ OUT_VERTEX(r->dst.x, r->dst.y + r->height);
+ OUT_VERTEX_F(src_x[1] * src_sf[0]);
+ OUT_VERTEX_F(src_y[1] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[1]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[1] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[1] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[1]);
+ }
+
+ OUT_VERTEX(r->dst.x, r->dst.y);
+ OUT_VERTEX_F(src_x[0] * src_sf[0]);
+ OUT_VERTEX_F(src_y[0] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[0]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[0] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[0] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[0]);
+ }
+}
+
+static void gen4_emit_vertex_buffer(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int id = op->u.gen4.ve_id;
+
+ OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
+ OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
+ (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
+ sna->render.vertex_reloc[id] = sna->kgem.nbatch;
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ sna->render_state.gen4.vb_id |= 1 << id;
+}
+
+static void gen4_emit_primitive(struct sna *sna)
+{
+ if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
+ sna->render_state.gen4.vertex_offset = sna->kgem.nbatch - 5;
+ return;
+ }
+
+ OUT_BATCH(GEN4_3DPRIMITIVE |
+ GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) |
+ 4);
+ sna->render_state.gen4.vertex_offset = sna->kgem.nbatch;
+ OUT_BATCH(0); /* vertex count, to be filled in later */
+ OUT_BATCH(sna->render.vertex_index);
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+ sna->render.vertex_start = sna->render.vertex_index;
+
+ sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
+}
+
+static bool gen4_rectangle_begin(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int id = op->u.gen4.ve_id;
+ int ndwords;
+
+ ndwords = 0;
+ if ((sna->render_state.gen4.vb_id & (1 << id)) == 0)
+ ndwords += 5;
+ if (sna->render_state.gen4.vertex_offset == 0)
+ ndwords += op->need_magic_ca_pass ? 20 : 6;
+ if (ndwords == 0)
+ return true;
+
+ if (!kgem_check_batch(&sna->kgem, ndwords))
+ return false;
+
+ if ((sna->render_state.gen4.vb_id & (1 << id)) == 0)
+ gen4_emit_vertex_buffer(sna, op);
+ if (sna->render_state.gen4.vertex_offset == 0)
+ gen4_emit_primitive(sna);
+
+ return true;
+}
+
+static int gen4_get_rectangles__flush(struct sna *sna)
+{
+ if (!kgem_check_batch(&sna->kgem, 25))
+ return 0;
+ if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 1)
+ return 0;
+ if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1)
+ return 0;
+
+ gen4_vertex_finish(sna, FALSE);
+ sna->render.vertex_index = 0;
+
+ return ARRAY_SIZE(sna->render.vertex_data);
+}
+
+inline static int gen4_get_rectangles(struct sna *sna,
+ const struct sna_composite_op *op,
+ int want)
+{
+ int rem = vertex_space(sna);
+
+ if (rem < 3*op->floats_per_vertex) {
+ DBG(("flushing vbo for %s: %d < %d\n",
+ __FUNCTION__, rem, 3*op->floats_per_vertex));
+ rem = gen4_get_rectangles__flush(sna);
+ if (rem == 0)
+ return 0;
+ }
+
+ if (!gen4_rectangle_begin(sna, op))
+ return 0;
+
+ if (want * op->floats_per_vertex*3 > rem)
+ want = rem / (3*op->floats_per_vertex);
+
+ sna->render.vertex_index += 3*want;
+ return want;
+}
+
+static uint32_t *gen4_composite_get_binding_table(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint16_t *offset)
+{
+ uint32_t *table;
+
+ sna->kgem.surface -=
+ sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
+ /* Clear all surplus entries to zero in case of prefetch */
+ table = memset(sna->kgem.batch + sna->kgem.surface,
+ 0, sizeof(struct gen4_surface_state_padded));
+
+ *offset = sna->kgem.surface;
+
+ DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
+
+ return table;
+}
+
+static void
+gen4_emit_sip(struct sna *sna)
+{
+ /* Set system instruction pointer */
+ OUT_BATCH(GEN4_STATE_SIP | 0);
+ OUT_BATCH(0);
+}
+
+static void
+gen4_emit_urb(struct sna *sna)
+{
+ int urb_vs_start, urb_vs_size;
+ int urb_gs_start, urb_gs_size;
+ int urb_clip_start, urb_clip_size;
+ int urb_sf_start, urb_sf_size;
+ int urb_cs_start, urb_cs_size;
+
+ urb_vs_start = 0;
+ urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
+ urb_gs_start = urb_vs_start + urb_vs_size;
+ urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
+ urb_clip_start = urb_gs_start + urb_gs_size;
+ urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
+ urb_sf_start = urb_clip_start + urb_clip_size;
+ urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
+ urb_cs_start = urb_sf_start + urb_sf_size;
+ urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
+
+ OUT_BATCH(GEN4_URB_FENCE |
+ UF0_CS_REALLOC |
+ UF0_SF_REALLOC |
+ UF0_CLIP_REALLOC |
+ UF0_GS_REALLOC |
+ UF0_VS_REALLOC |
+ 1);
+ OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
+ ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
+ ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
+ OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
+ ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
+
+ /* Constant buffer state */
+ OUT_BATCH(GEN4_CS_URB_STATE | 0);
+ OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
+}
+
+static void
+gen4_emit_state_base_address(struct sna *sna)
+{
+ assert(sna->render_state.gen4.general_bo->proxy == NULL);
+ OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
+ sna->kgem.nbatch,
+ sna->render_state.gen4.general_bo,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
+ sna->kgem.nbatch,
+ NULL,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+ OUT_BATCH(0); /* media */
+
+ /* upper bounds, all disabled */
+ OUT_BATCH(BASE_ADDRESS_MODIFY);
+ OUT_BATCH(0);
+}
+
+static void
+gen4_emit_invariant(struct sna *sna)
+{
+ OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
+ if (sna->kgem.gen >= 45)
+ OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+ else
+ OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+ gen4_emit_sip(sna);
+ gen4_emit_state_base_address(sna);
+
+ sna->render_state.gen4.needs_invariant = FALSE;
+}
+
+static void
+gen4_get_batch(struct sna *sna)
+{
+ kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
+ DBG(("%s: flushing batch: %d < %d+%d\n",
+ __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
+ 150, 4*8));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen4.needs_invariant)
+ gen4_emit_invariant(sna);
+}
+
+static void
+gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
+{
+ if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
+ DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
+ sna->render_state.gen4.floats_per_vertex,
+ op->floats_per_vertex,
+ sna->render.vertex_index,
+ (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
+ sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
+ sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
+ sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
+ }
+}
+
+static void
+gen4_emit_binding_table(struct sna *sna, uint16_t offset)
+{
+ if (sna->render_state.gen4.surface_table == offset)
+ return;
+
+ sna->render_state.gen4.surface_table = offset;
+
+ /* Binding table pointers */
+ OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
+ OUT_BATCH(0); /* vs */
+ OUT_BATCH(0); /* gs */
+ OUT_BATCH(0); /* clip */
+ OUT_BATCH(0); /* sf */
+ /* Only the PS uses the binding table */
+ OUT_BATCH(offset*4);
+}
+
+static bool
+gen4_emit_pipelined_pointers(struct sna *sna,
+ const struct sna_composite_op *op,
+ int blend, int kernel)
+{
+ uint16_t offset = sna->kgem.nbatch, last;
+
+ OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
+ OUT_BATCH(sna->render_state.gen4.vs);
+ OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
+ OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
+ OUT_BATCH(sna->render_state.gen4.sf[op->mask.bo != NULL]);
+ OUT_BATCH(sna->render_state.gen4.wm +
+ SAMPLER_OFFSET(op->src.filter, op->src.repeat,
+ op->mask.filter, op->mask.repeat,
+ kernel));
+ OUT_BATCH(sna->render_state.gen4.cc +
+ gen4_get_blend(blend, op->has_component_alpha, op->dst.format));
+
+ last = sna->render_state.gen4.last_pipelined_pointers;
+ if (last &&
+ sna->kgem.batch[offset + 1] == sna->kgem.batch[last + 1] &&
+ sna->kgem.batch[offset + 3] == sna->kgem.batch[last + 3] &&
+ sna->kgem.batch[offset + 4] == sna->kgem.batch[last + 4] &&
+ sna->kgem.batch[offset + 5] == sna->kgem.batch[last + 5] &&
+ sna->kgem.batch[offset + 6] == sna->kgem.batch[last + 6]) {
+ sna->kgem.nbatch = offset;
+ return false;
+ } else {
+ sna->render_state.gen4.last_pipelined_pointers = offset;
+ return true;
+ }
+}
+
+static void
+gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
+{
+ uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
+ uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
+
+ if (sna->render_state.gen4.drawrect_limit == limit &&
+ sna->render_state.gen4.drawrect_offset == offset)
+ return;
+ sna->render_state.gen4.drawrect_offset = offset;
+ sna->render_state.gen4.drawrect_limit = limit;
+
+ OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
+ OUT_BATCH(0x00000000);
+ OUT_BATCH(limit);
+ OUT_BATCH(offset);
+}
+
+static void
+gen4_emit_vertex_elements(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ /*
+ * vertex data in vertex buffer
+ * position: (x, y)
+ * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
+ * texture coordinate 1 if (has_mask is TRUE): same as above
+ */
+ struct gen4_render_state *render = &sna->render_state.gen4;
+ Bool has_mask = op->mask.bo != NULL;
+ int nelem = has_mask ? 2 : 1;
+ int selem;
+ uint32_t w_component;
+ uint32_t src_format;
+ int id = op->u.gen4.ve_id;
+
+ if (render->ve_id == id)
+ return;
+
+ render->ve_id = id;
+
+ if (op->is_affine) {
+ src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
+ w_component = GEN4_VFCOMPONENT_STORE_1_FLT;
+ selem = 2;
+ } else {
+ src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
+ w_component = GEN4_VFCOMPONENT_STORE_SRC;
+ selem = 3;
+ }
+
+ /* The VUE layout
+ * dword 0-3: position (x, y, 1.0, 1.0),
+ * dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
+ * [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
+ */
+ OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + nelem) - 1));
+
+ /* x,y */
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
+ 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */
+ OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
+ GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+ (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
+
+ /* u0, v0, w0 */
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ src_format << VE0_FORMAT_SHIFT |
+ 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */
+ OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ w_component << VE1_VFCOMPONENT_2_SHIFT |
+ GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+ (2*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
+
+ /* u1, v1, w1 */
+ if (has_mask) {
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ src_format << VE0_FORMAT_SHIFT |
+ ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */
+ OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ w_component << VE1_VFCOMPONENT_2_SHIFT |
+ GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+ (3*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
+ }
+}
+
+static void
+gen4_emit_state(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint16_t wm_binding_table)
+{
+ gen4_emit_binding_table(sna, wm_binding_table);
+ if (gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel))
+ gen4_emit_urb(sna);
+ gen4_emit_vertex_elements(sna, op);
+ gen4_emit_drawing_rectangle(sna, op);
+}
+
+static void
+gen4_bind_surfaces(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen4_get_batch(sna);
+
+ binding_table = gen4_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen4_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen4_get_dest_format(op->dst.format),
+ TRUE);
+ binding_table[1] =
+ gen4_bind_bo(sna,
+ op->src.bo, op->src.width, op->src.height,
+ op->src.card_format,
+ FALSE);
+ if (op->mask.bo)
+ binding_table[2] =
+ gen4_bind_bo(sna,
+ op->mask.bo,
+ op->mask.width,
+ op->mask.height,
+ op->mask.card_format,
+ FALSE);
+
+ gen4_emit_state(sna, op, offset);
+}
+
+fastcall static void
+gen4_render_composite_blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
+ r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
+ r->dst.x, r->dst.y, op->dst.x, op->dst.y,
+ r->width, r->height));
+
+ if (!gen4_get_rectangles(sna, op, 1)) {
+ gen4_bind_surfaces(sna, op);
+ gen4_get_rectangles(sna, op, 1);
+ }
+
+ op->prim_emit(sna, op, r);
+
+ /* XXX are the shaders fubar? */
+ FLUSH();
+}
+
+static void
+gen4_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
+ __FUNCTION__, nbox, op->dst.x, op->dst.y,
+ op->src.offset[0], op->src.offset[1],
+ op->src.width, op->src.height,
+ op->mask.offset[0], op->mask.offset[1],
+ op->mask.width, op->mask.height));
+
+ do {
+ struct sna_composite_rectangles r;
+
+ r.dst.x = box->x1;
+ r.dst.y = box->y1;
+ r.width = box->x2 - box->x1;
+ r.height = box->y2 - box->y1;
+ r.mask = r.src = r.dst;
+ gen4_render_composite_blt(sna, op, &r);
+ box++;
+ } while (--nbox);
+}
+
+#ifndef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
+static uint32_t gen4_bind_video_source(struct sna *sna,
+ struct kgem_bo *src_bo,
+ uint32_t src_offset,
+ int src_width,
+ int src_height,
+ int src_pitch,
+ uint32_t src_surf_format)
+{
+ struct gen4_surface_state *ss;
+
+ sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
+
+ ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
+ ss->ss0.surface_type = GEN4_SURFACE_2D;
+ ss->ss0.surface_format = src_surf_format;
+ ss->ss0.color_blend = 1;
+
+ ss->ss1.base_addr =
+ kgem_add_reloc(&sna->kgem,
+ sna->kgem.surface + 1,
+ src_bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ src_offset);
+
+ ss->ss2.width = src_width - 1;
+ ss->ss2.height = src_height - 1;
+ ss->ss3.pitch = src_pitch - 1;
+
+ return sna->kgem.surface * sizeof(uint32_t);
+}
+
+static void gen4_video_bind_surfaces(struct sna *sna,
+ const struct sna_composite_op *op,
+ struct sna_video_frame *frame)
+{
+ uint32_t src_surf_format;
+ uint32_t src_surf_base[6];
+ int src_width[6];
+ int src_height[6];
+ int src_pitch[6];
+ uint32_t *binding_table;
+ uint16_t offset;
+ int n_src, n;
+
+ src_surf_base[0] = frame->YBufOffset;
+ src_surf_base[1] = frame->YBufOffset;
+ src_surf_base[2] = frame->VBufOffset;
+ src_surf_base[3] = frame->VBufOffset;
+ src_surf_base[4] = frame->UBufOffset;
+ src_surf_base[5] = frame->UBufOffset;
+
+ if (is_planar_fourcc(frame->id)) {
+ src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
+ src_width[1] = src_width[0] = frame->width;
+ src_height[1] = src_height[0] = frame->height;
+ src_pitch[1] = src_pitch[0] = frame->pitch[1];
+ src_width[4] = src_width[5] = src_width[2] = src_width[3] =
+ frame->width / 2;
+ src_height[4] = src_height[5] = src_height[2] = src_height[3] =
+ frame->height / 2;
+ src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
+ frame->pitch[0];
+ n_src = 6;
+ } else {
+ if (frame->id == FOURCC_UYVY)
+ src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
+ else
+ src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
+
+ src_width[0] = frame->width;
+ src_height[0] = frame->height;
+ src_pitch[0] = frame->pitch[0];
+ n_src = 1;
+ }
+
+ gen4_get_batch(sna);
+
+ binding_table = gen4_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen4_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen4_get_dest_format(op->dst.format),
+ TRUE);
+ for (n = 0; n < n_src; n++) {
+ binding_table[1+n] =
+ gen4_bind_video_source(sna,
+ frame->bo,
+ src_surf_base[n],
+ src_width[n],
+ src_height[n],
+ src_pitch[n],
+ src_surf_format);
+ }
+
+ gen4_emit_state(sna, op, offset);
+}
+
+static Bool
+gen4_render_video(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ RegionPtr dstRegion,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ PixmapPtr pixmap)
+{
+ struct sna_composite_op tmp;
+ int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ float src_scale_x, src_scale_y;
+ struct sna_pixmap *priv;
+ BoxPtr box;
+
+ DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h));
+
+ priv = sna_pixmap_force_to_gpu(pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = PictOpSrc;
+ tmp.dst.pixmap = pixmap;
+ tmp.dst.width = pixmap->drawable.width;
+ tmp.dst.height = pixmap->drawable.height;
+ tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
+ tmp.dst.bo = priv->gpu_bo;
+
+ tmp.src.filter = SAMPLER_FILTER_BILINEAR;
+ tmp.src.repeat = SAMPLER_EXTEND_NONE;
+ tmp.u.gen4.wm_kernel =
+ is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, frame->bo))
+ kgem_submit(&sna->kgem);
+
+ if (!kgem_bo_is_dirty(frame->bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen4_video_bind_surfaces(sna, &tmp, frame);
+ gen4_align_vertex(sna, &tmp);
+
+ /* Set up the offset for translating from the given region (in screen
+ * coordinates) to the backing pixmap.
+ */
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+
+ dxo = dstRegion->extents.x1;
+ dyo = dstRegion->extents.y1;
+
+ /* Use normalized texture coordinates */
+ src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
+ src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
+
+ box = REGION_RECTS(dstRegion);
+ nbox = REGION_NUM_RECTS(dstRegion);
+ while (nbox--) {
+ BoxRec r;
+
+ r.x1 = box->x1 + pix_xoff;
+ r.x2 = box->x2 + pix_xoff;
+ r.y1 = box->y1 + pix_yoff;
+ r.y2 = box->y2 + pix_yoff;
+
+ if (!gen4_get_rectangles(sna, &tmp, 1)) {
+ gen4_video_bind_surfaces(sna, &tmp, frame);
+ gen4_get_rectangles(sna, &tmp, 1);
+ }
+
+ OUT_VERTEX(r.x2, r.y2);
+ OUT_VERTEX_F((box->x2 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+
+ OUT_VERTEX(r.x1, r.y2);
+ OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+
+ OUT_VERTEX(r.x1, r.y1);
+ OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
+
+ FLUSH();
+
+ sna_damage_add_box(&priv->gpu_damage, &r);
+ sna_damage_subtract_box(&priv->cpu_damage, &r);
+ box++;
+ }
+
+ return TRUE;
+}
+
+static Bool
+gen4_composite_solid_init(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color)
+{
+ channel->filter = PictFilterNearest;
+ channel->repeat = RepeatNormal;
+ channel->is_affine = TRUE;
+ channel->is_solid = TRUE;
+ channel->transform = NULL;
+ channel->width = 1;
+ channel->height = 1;
+ channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ channel->bo = sna_render_get_solid(sna, color);
+
+ channel->scale[0] = channel->scale[1] = 1;
+ channel->offset[0] = channel->offset[1] = 0;
+ return channel->bo != NULL;
+}
+
+static int
+gen4_composite_picture(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int w, int h,
+ int dst_x, int dst_y)
+{
+ PixmapPtr pixmap;
+ uint32_t color;
+ int16_t dx, dy;
+
+ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ channel->is_solid = FALSE;
+ channel->card_format = -1;
+
+ if (sna_picture_is_solid(picture, &color))
+ return gen4_composite_solid_init(sna, channel, color);
+
+ if (picture->pDrawable == NULL)
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen4_check_repeat(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen4_check_filter(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+ channel->filter = picture->filter;
+
+ pixmap = get_drawable_pixmap(picture->pDrawable);
+ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
+
+ x += dx + picture->pDrawable->x;
+ y += dy + picture->pDrawable->y;
+
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ x += dx;
+ y += dy;
+ channel->transform = NULL;
+ channel->filter = PictFilterNearest;
+ } else
+ channel->transform = picture->transform;
+
+ channel->card_format = gen4_get_card_format(picture->format);
+ if (channel->card_format == -1)
+ return sna_render_picture_convert(sna, picture, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+
+ if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192)
+ return sna_render_picture_extract(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ return sna_render_pixmap_bo(sna, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+}
+
+static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
+{
+ channel->repeat = gen4_repeat(channel->repeat);
+ channel->filter = gen4_filter(channel->filter);
+ if (channel->card_format == -1)
+ channel->card_format = gen4_get_card_format(channel->pict_format);
+}
+
+static void
+gen4_render_composite_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ gen4_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ sna->render.op = NULL;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ sna_render_composite_redirect_done(sna, op);
+
+ if (op->src.bo)
+ kgem_bo_destroy(&sna->kgem, op->src.bo);
+ if (op->mask.bo)
+ kgem_bo_destroy(&sna->kgem, op->mask.bo);
+}
+
+static Bool
+gen4_composite_set_target(struct sna *sna,
+ PicturePtr dst,
+ struct sna_composite_op *op)
+{
+ struct sna_pixmap *priv;
+
+ if (!gen4_check_dst_format(dst->format)) {
+ DBG(("%s: incompatible render target format %08x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+ op->dst.width = op->dst.pixmap->drawable.width;
+ op->dst.height = op->dst.pixmap->drawable.height;
+ op->dst.format = dst->format;
+ priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ op->dst.bo = priv->gpu_bo;
+ if (!priv->gpu_only)
+ op->damage = &priv->gpu_damage;
+
+ get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
+ &op->dst.x, &op->dst.y);
+ return TRUE;
+}
+
+static inline Bool
+picture_is_cpu(PicturePtr picture)
+{
+ if (!picture->pDrawable)
+ return FALSE;
+
+ /* If it is a solid, try to use the render paths */
+ if (picture->pDrawable->width == 1 &&
+ picture->pDrawable->height == 1 &&
+ picture->repeat)
+ return FALSE;
+
+ return is_cpu(picture->pDrawable);
+}
+
+#if PREFER_BLT
+static inline bool prefer_blt(struct sna *sna)
+{
+ return true;
+}
+#else
+static inline bool prefer_blt(struct sna *sna)
+{
+ return sna->kgem.mode != KGEM_RENDER;
+}
+#endif
+
+static Bool
+try_blt(struct sna *sna,
+ PicturePtr dst,
+ PicturePtr source,
+ int width, int height)
+{
+ if (prefer_blt(sna)) {
+ DBG(("%s: already performing BLT\n", __FUNCTION__));
+ return TRUE;
+ }
+
+ if (width > 8192 || height > 8192) {
+ DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
+ __FUNCTION__, width, height));
+ return TRUE;
+ }
+
+ /* is the source picture only in cpu memory e.g. a shm pixmap? */
+ return picture_is_cpu(source);
+}
+
+static Bool
+gen4_render_composite(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t msk_x, int16_t msk_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
+ width, height, sna->kgem.mode));
+
+ if (mask == NULL &&
+ try_blt(sna, dst, src, width, height) &&
+ sna_blt_composite(sna, op,
+ src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height, tmp))
+ return TRUE;
+
+ if (op >= ARRAY_SIZE(gen4_blend_op))
+ return FALSE;
+
+ if (need_tiling(sna, width, height))
+ return sna_tiling_composite(sna,
+ op, src, mask, dst,
+ src_x, src_y,
+ msk_x, msk_y,
+ dst_x, dst_y,
+ width, height,
+ tmp);
+
+ if (!gen4_composite_set_target(sna, dst, tmp))
+ return FALSE;
+
+ if (tmp->dst.width > 8192 || tmp->dst.height > 8192) {
+ if (!sna_render_composite_redirect(sna, tmp,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ switch (gen4_composite_picture(sna, src, &tmp->src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ DBG(("%s: failed to prepare source\n", __FUNCTION__));
+ goto cleanup_dst;
+ case 0:
+ gen4_composite_solid_init(sna, &tmp->src, 0);
+ case 1:
+ gen4_composite_channel_convert(&tmp->src);
+ break;
+ }
+
+ tmp->op = op;
+ tmp->is_affine = tmp->src.is_affine;
+ tmp->has_component_alpha = FALSE;
+ tmp->need_magic_ca_pass = FALSE;
+
+ tmp->prim_emit = gen4_emit_composite_primitive;
+ if (mask) {
+ if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
+ tmp->has_component_alpha = TRUE;
+
+ /* Check if it's component alpha that relies on a source alpha and on
+ * the source value. We can only get one of those into the single
+ * source value that we get to blend with.
+ */
+ if (gen4_blend_op[op].src_alpha &&
+ (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
+ if (op != PictOpOver) {
+ DBG(("%s -- fallback: unhandled component alpha blend\n",
+ __FUNCTION__));
+
+ goto cleanup_src;
+ }
+
+ tmp->need_magic_ca_pass = TRUE;
+ tmp->op = PictOpOutReverse;
+ }
+ }
+
+ switch (gen4_composite_picture(sna, mask, &tmp->mask,
+ msk_x, msk_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ DBG(("%s: failed to prepare mask\n", __FUNCTION__));
+ goto cleanup_src;
+ case 0:
+ gen4_composite_solid_init(sna, &tmp->mask, 0);
+ case 1:
+ gen4_composite_channel_convert(&tmp->mask);
+ break;
+ }
+
+ tmp->is_affine &= tmp->mask.is_affine;
+
+ if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
+ tmp->prim_emit = gen4_emit_composite_primitive_identity_source_mask;
+
+ tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
+ } else {
+ if (tmp->src.is_solid)
+ tmp->prim_emit = gen4_emit_composite_primitive_solid;
+ else if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen4_emit_composite_primitive_identity_source;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen4_emit_composite_primitive_affine_source;
+
+ tmp->mask.filter = SAMPLER_FILTER_NEAREST;
+ tmp->mask.repeat = SAMPLER_EXTEND_NONE;
+
+ tmp->floats_per_vertex = 3 + !tmp->is_affine;
+ }
+
+ tmp->u.gen4.wm_kernel =
+ gen4_choose_composite_kernel(tmp->op,
+ tmp->mask.bo != NULL,
+ tmp->has_component_alpha,
+ tmp->is_affine);
+ tmp->u.gen4.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine;
+
+ tmp->blt = gen4_render_composite_blt;
+ tmp->boxes = gen4_render_composite_boxes;
+ tmp->done = gen4_render_composite_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->src.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->mask.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen4_bind_surfaces(sna, tmp);
+ gen4_align_vertex(sna, tmp);
+
+ sna->render.op = tmp;
+ return TRUE;
+
+cleanup_src:
+ if (tmp->src.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+cleanup_dst:
+ if (tmp->redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
+ return FALSE;
+}
+
+static uint32_t gen4_get_dest_format_for_depth(int depth)
+{
+ switch (depth) {
+ case 32:
+ case 24:
+ default: return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case 30: return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case 16: return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
+ case 8: return GEN4_SURFACEFORMAT_A8_UNORM;
+ }
+}
+
+static uint32_t gen4_get_card_format_for_depth(int depth)
+{
+ switch (depth) {
+ case 32:
+ default: return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case 30: return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case 24: return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
+ case 16: return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
+ case 8: return GEN4_SURFACEFORMAT_A8_UNORM;
+ }
+}
+
+static void
+gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen4_get_batch(sna);
+
+ binding_table = gen4_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen4_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen4_get_dest_format_for_depth(op->dst.pixmap->drawable.depth),
+ TRUE);
+ binding_table[1] =
+ gen4_bind_bo(sna,
+ op->src.bo, op->src.width, op->src.height,
+ op->src.card_format,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) {
+ sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
+ offset = sna->render_state.gen4.surface_table;
+ }
+
+ gen4_emit_state(sna, op, offset);
+}
+
+static void
+gen4_render_copy_one(struct sna *sna,
+ const struct sna_composite_op *op,
+ int sx, int sy,
+ int w, int h,
+ int dx, int dy)
+{
+ if (!gen4_get_rectangles(sna, op, 1)) {
+ gen4_copy_bind_surfaces(sna, op);
+ gen4_get_rectangles(sna, op, 1);
+ }
+
+ OUT_VERTEX(dx+w, dy+h);
+ OUT_VERTEX_F((sx+w)*op->src.scale[0]);
+ OUT_VERTEX_F((sy+h)*op->src.scale[1]);
+
+ OUT_VERTEX(dx, dy+h);
+ OUT_VERTEX_F(sx*op->src.scale[0]);
+ OUT_VERTEX_F((sy+h)*op->src.scale[1]);
+
+ OUT_VERTEX(dx, dy);
+ OUT_VERTEX_F(sx*op->src.scale[0]);
+ OUT_VERTEX_F(sy*op->src.scale[1]);
+
+ FLUSH();
+}
+
+static Bool
+gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+
+ if (prefer_blt(sna) &&
+ sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
+ src->drawable.width > 8192 || src->drawable.height > 8192 ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+
+ DBG(("%s (%d, %d)->(%d, %d) x %d\n",
+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp.dst.bo = dst_bo;
+
+ tmp.src.bo = src_bo;
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_NONE;
+ tmp.src.card_format =
+ gen4_get_card_format_for_depth(src->drawable.depth),
+ tmp.src.width = src->drawable.width;
+ tmp.src.height = src->drawable.height;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.u.gen4.wm_kernel = WM_KERNEL;
+ tmp.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen4_copy_bind_surfaces(sna, &tmp);
+ gen4_align_vertex(sna, &tmp);
+
+ tmp.src.scale[0] = 1. / src->drawable.width;
+ tmp.src.scale[1] = 1. / src->drawable.height;
+ do {
+ gen4_render_copy_one(sna, &tmp,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ box->x1 + dst_dx, box->y1 + dst_dy);
+ box++;
+ } while (--n);
+
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen4_render_copy_blt(struct sna *sna,
+ const struct sna_copy_op *op,
+ int16_t sx, int16_t sy,
+ int16_t w, int16_t h,
+ int16_t dx, int16_t dy)
+{
+ gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy);
+}
+
+static void
+gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
+{
+ gen4_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+}
+
+static Bool
+gen4_render_copy(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ struct sna_copy_op *op)
+{
+ if (prefer_blt(sna) &&
+ sna_blt_copy(sna, alu,
+ src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
+ src->drawable.width > 8192 || src->drawable.height > 8192 ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_copy(sna, alu, src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op);
+
+ op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ op->base.dst.pixmap = dst;
+ op->base.dst.width = dst->drawable.width;
+ op->base.dst.height = dst->drawable.height;
+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ op->base.dst.bo = dst_bo;
+
+ op->base.src.bo = src_bo;
+ op->base.src.card_format =
+ gen4_get_card_format_for_depth(src->drawable.depth),
+ op->base.src.width = src->drawable.width;
+ op->base.src.height = src->drawable.height;
+ op->base.src.scale[0] = 1./src->drawable.width;
+ op->base.src.scale[1] = 1./src->drawable.height;
+ op->base.src.filter = SAMPLER_FILTER_NEAREST;
+ op->base.src.repeat = SAMPLER_EXTEND_NONE;
+
+ op->base.is_affine = true;
+ op->base.floats_per_vertex = 3;
+ op->base.u.gen4.wm_kernel = WM_KERNEL;
+ op->base.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen4_copy_bind_surfaces(sna, &op->base);
+ gen4_align_vertex(sna, &op->base);
+
+ op->blt = gen4_render_copy_blt;
+ op->done = gen4_render_copy_done;
+ return TRUE;
+}
+
+static void
+gen4_fill_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen4_get_batch(sna);
+
+ binding_table = gen4_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen4_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen4_get_dest_format_for_depth(op->dst.pixmap->drawable.depth),
+ TRUE);
+ binding_table[1] =
+ gen4_bind_bo(sna,
+ op->src.bo, 1, 1,
+ GEN4_SURFACEFORMAT_B8G8R8A8_UNORM,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) {
+ sna->kgem.surface +=
+ sizeof(struct gen4_surface_state_padded)/sizeof(uint32_t);
+ offset = sna->render_state.gen4.surface_table;
+ }
+
+ gen4_emit_state(sna, op, offset);
+}
+
+static void
+gen4_render_fill_one(struct sna *sna,
+ const struct sna_composite_op *op,
+ int x, int y, int w, int h)
+{
+ if (!gen4_get_rectangles(sna, op, 1)) {
+ gen4_fill_bind_surfaces(sna, op);
+ gen4_get_rectangles(sna, op, 1);
+ }
+
+ OUT_VERTEX(x+w, y+h);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x, y+h);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x, y);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
+
+ FLUSH();
+}
+
+static Bool
+gen4_render_fill_boxes(struct sna *sna,
+ CARD8 op,
+ PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+ uint32_t pixel;
+
+ if (op >= ARRAY_SIZE(gen4_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (prefer_blt(sna) ||
+ dst->drawable.width > 8192 ||
+ dst->drawable.height > 8192 ||
+ !gen4_check_dst_format(format)) {
+ uint8_t alu = GXcopy;
+
+ if (op == PictOpClear) {
+ alu = GXclear;
+ pixel = 0;
+ op = PictOpSrc;
+ }
+
+ if (op == PictOpOver && color->alpha >= 0xff00)
+ op = PictOpSrc;
+
+ if (op == PictOpSrc &&
+ sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ format) &&
+ sna_blt_fill_boxes(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ pixel, box, n))
+ return TRUE;
+
+ if (dst->drawable.width > 8192 ||
+ dst->drawable.height > 8192 ||
+ !gen4_check_dst_format(format))
+ return FALSE;
+ }
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ PICT_a8r8g8b8))
+ return FALSE;
+
+ DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n));
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = op;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = format;
+ tmp.dst.bo = dst_bo;
+
+ tmp.src.bo = sna_render_get_solid(sna, pixel);
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.u.gen4.wm_kernel = WM_KERNEL;
+ tmp.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen4_fill_bind_surfaces(sna, &tmp);
+ gen4_align_vertex(sna, &tmp);
+
+ do {
+ gen4_render_fill_one(sna, &tmp,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1);
+ box++;
+ } while (--n);
+
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen4_render_fill_blt(struct sna *sna, const struct sna_fill_op *op,
+ int16_t x, int16_t y, int16_t w, int16_t h)
+{
+ gen4_render_fill_one(sna, &op->base, x, y, w, h);
+}
+
+static void
+gen4_render_fill_done(struct sna *sna, const struct sna_fill_op *op)
+{
+ gen4_vertex_flush(sna);
+ kgem_bo_destroy(&sna->kgem, op->base.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+}
+
+static Bool
+gen4_render_fill(struct sna *sna, uint8_t alu,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint32_t color,
+ struct sna_fill_op *op)
+{
+ if (prefer_blt(sna) &&
+ sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op);
+
+ if (alu == GXclear)
+ color = 0;
+
+ op->base.op = color == 0 ? PictOpClear : PictOpSrc;
+
+ op->base.dst.pixmap = dst;
+ op->base.dst.width = dst->drawable.width;
+ op->base.dst.height = dst->drawable.height;
+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ op->base.dst.bo = dst_bo;
+
+ op->base.src.bo =
+ sna_render_get_solid(sna,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
+ op->base.src.filter = SAMPLER_FILTER_NEAREST;
+ op->base.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ op->base.is_affine = TRUE;
+ op->base.floats_per_vertex = 3;
+ op->base.u.gen4.wm_kernel = WM_KERNEL;
+ op->base.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen4_fill_bind_surfaces(sna, &op->base);
+ gen4_align_vertex(sna, &op->base);
+
+ op->blt = gen4_render_fill_blt;
+ op->done = gen4_render_fill_done;
+ return TRUE;
+}
+
+static void
+gen4_render_flush(struct sna *sna)
+{
+ gen4_vertex_finish(sna, TRUE);
+}
+
+static void
+gen4_render_context_switch(struct sna *sna,
+ int new_mode)
+{
+ if (sna->kgem.mode == 0)
+ return;
+
+ if (new_mode == KGEM_BLT) {
+#if 0
+ OUT_BATCH(MI_FLUSH |
+ MI_STATE_INSTRUCTION_CACHE_FLUSH |
+ MI_INHIBIT_RENDER_CACHE_FLUSH);
+#endif
+ }
+}
+
+static void gen4_render_reset(struct sna *sna)
+{
+ sna->render_state.gen4.needs_invariant = TRUE;
+ sna->render_state.gen4.vb_id = 0;
+ sna->render_state.gen4.ve_id = -1;
+ sna->render_state.gen4.last_primitive = -1;
+ sna->render_state.gen4.last_pipelined_pointers = 0;
+
+ sna->render_state.gen4.drawrect_offset = -1;
+ sna->render_state.gen4.drawrect_limit = -1;
+ sna->render_state.gen4.surface_table = -1;
+}
+
+static void gen4_render_fini(struct sna *sna)
+{
+ kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
+}
+
+static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
+{
+ struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
+
+ /* Set up the vertex shader to be disabled (passthrough) */
+ vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
+ vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
+ vs->vs6.vs_enable = 0;
+ vs->vs6.vert_cache_disable = 1;
+
+ return sna_static_stream_offsetof(stream, vs);
+}
+
+static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
+ uint32_t kernel)
+{
+ struct gen4_sf_unit_state *sf_state;
+
+ sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
+
+ sf_state->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
+ sf_state->thread0.kernel_start_pointer = kernel >> 6;
+ sf_state->sf1.single_program_flow = 1;
+ /* scratch space is not used in our kernel */
+ sf_state->thread2.scratch_space_base_pointer = 0;
+ sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
+ sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
+ sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
+ /* don't smash vertex header, read start from dw8 */
+ sf_state->thread3.urb_entry_read_offset = 1;
+ sf_state->thread3.dispatch_grf_start_reg = 3;
+ sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
+ sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
+ sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
+ sf_state->thread4.stats_enable = 1;
+ sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
+ sf_state->sf6.cull_mode = GEN4_CULLMODE_NONE;
+ sf_state->sf6.scissor = 0;
+ sf_state->sf7.trifan_pv = 2;
+ sf_state->sf6.dest_org_vbias = 0x8;
+ sf_state->sf6.dest_org_hbias = 0x8;
+
+ return sna_static_stream_offsetof(stream, sf_state);
+}
+
+static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
+ sampler_filter_t src_filter,
+ sampler_extend_t src_extend,
+ sampler_filter_t mask_filter,
+ sampler_extend_t mask_extend)
+{
+ struct gen4_sampler_state *sampler_state;
+
+ sampler_state = sna_static_stream_map(stream,
+ sizeof(struct gen4_sampler_state) * 2,
+ 32);
+ sampler_state_init(&sampler_state[0], src_filter, src_extend);
+ sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
+
+ return sna_static_stream_offsetof(stream, sampler_state);
+}
+
+static void gen4_init_wm_state(struct gen4_wm_unit_state *state,
+ Bool has_mask,
+ uint32_t kernel,
+ uint32_t sampler)
+{
+ state->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
+ state->thread0.kernel_start_pointer = kernel >> 6;
+
+ state->thread1.single_program_flow = 0;
+
+ /* scratch space is not used in our kernel */
+ state->thread2.scratch_space_base_pointer = 0;
+ state->thread2.per_thread_scratch_space = 0;
+
+ state->thread3.const_urb_entry_read_length = 0;
+ state->thread3.const_urb_entry_read_offset = 0;
+
+ state->thread3.urb_entry_read_offset = 0;
+ /* wm kernel use urb from 3, see wm_program in compiler module */
+ state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
+
+ state->wm4.sampler_count = 1; /* 1-4 samplers */
+
+ state->wm4.sampler_state_pointer = sampler >> 5;
+ state->wm5.max_threads = PS_MAX_THREADS - 1;
+ state->wm5.transposed_urb_read = 0;
+ state->wm5.thread_dispatch_enable = 1;
+ /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
+ * start point
+ */
+ state->wm5.enable_16_pix = 1;
+ state->wm5.enable_8_pix = 0;
+ state->wm5.early_depth_test = 1;
+
+ /* Each pair of attributes (src/mask coords) is two URB entries */
+ if (has_mask) {
+ state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
+ state->thread3.urb_entry_read_length = 4;
+ } else {
+ state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
+ state->thread3.urb_entry_read_length = 2;
+ }
+}
+
+static uint32_t gen4_create_cc_viewport(struct sna_static_stream *stream)
+{
+ struct gen4_cc_viewport vp;
+
+ vp.min_depth = -1.e35;
+ vp.max_depth = 1.e35;
+
+ return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
+}
+
+static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
+{
+ uint8_t *ptr, *base;
+ uint32_t vp;
+ int i, j;
+
+ vp = gen4_create_cc_viewport(stream);
+ base = ptr =
+ sna_static_stream_map(stream,
+ GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
+ 64);
+
+ for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
+ for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
+ struct gen4_cc_unit_state *state =
+ (struct gen4_cc_unit_state *)ptr;
+
+ state->cc3.blend_enable = 1; /* enable color blend */
+ state->cc4.cc_viewport_state_offset = vp >> 5;
+
+ state->cc5.logicop_func = 0xc; /* COPY */
+ state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
+
+ /* Fill in alpha blend factors same as color, for the future. */
+ state->cc5.ia_src_blend_factor = i;
+ state->cc5.ia_dest_blend_factor = j;
+
+ state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
+ state->cc6.clamp_post_alpha_blend = 1;
+ state->cc6.clamp_pre_alpha_blend = 1;
+ state->cc6.src_blend_factor = i;
+ state->cc6.dest_blend_factor = j;
+
+ ptr += 64;
+ }
+ }
+
+ return sna_static_stream_offsetof(stream, base);
+}
+
+static Bool gen4_render_setup(struct sna *sna)
+{
+ struct gen4_render_state *state = &sna->render_state.gen4;
+ struct sna_static_stream general;
+ struct gen4_wm_unit_state_padded *wm_state;
+ uint32_t sf[2], wm[KERNEL_COUNT];
+ int i, j, k, l, m;
+
+ sna_static_stream_init(&general);
+
+ /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
+ * dumps, you know it points to zero.
+ */
+ null_create(&general);
+
+ /* Set up the two SF states (one for blending with a mask, one without) */
+ sf[0] = sna_static_stream_add(&general,
+ sf_kernel,
+ sizeof(sf_kernel),
+ 64);
+ sf[1] = sna_static_stream_add(&general,
+ sf_kernel_mask,
+ sizeof(sf_kernel_mask),
+ 64);
+ for (m = 0; m < KERNEL_COUNT; m++) {
+ wm[m] = sna_static_stream_add(&general,
+ wm_kernels[m].data,
+ wm_kernels[m].size,
+ 64);
+ }
+
+ state->vs = gen4_create_vs_unit_state(&general);
+
+ state->sf[0] = gen4_create_sf_state(&general, sf[0]);
+ state->sf[1] = gen4_create_sf_state(&general, sf[1]);
+
+
+ /* Set up the WM states: each filter/extend type for source and mask, per
+ * kernel.
+ */
+ wm_state = sna_static_stream_map(&general,
+ sizeof(*wm_state) * KERNEL_COUNT *
+ FILTER_COUNT * EXTEND_COUNT *
+ FILTER_COUNT * EXTEND_COUNT,
+ 64);
+ state->wm = sna_static_stream_offsetof(&general, wm_state);
+ for (i = 0; i < FILTER_COUNT; i++) {
+ for (j = 0; j < EXTEND_COUNT; j++) {
+ for (k = 0; k < FILTER_COUNT; k++) {
+ for (l = 0; l < EXTEND_COUNT; l++) {
+ uint32_t sampler_state;
+
+ sampler_state =
+ gen4_create_sampler_state(&general,
+ i, j,
+ k, l);
+
+ for (m = 0; m < KERNEL_COUNT; m++) {
+ gen4_init_wm_state(&wm_state->state,
+ wm_kernels[m].has_mask,
+ wm[m],
+ sampler_state);
+ wm_state++;
+ }
+ }
+ }
+ }
+ }
+
+ state->cc = gen4_create_cc_unit_state(&general);
+
+ state->general_bo = sna_static_stream_fini(sna, &general);
+ return state->general_bo != NULL;
+}
+
+Bool gen4_render_init(struct sna *sna)
+{
+ if (!gen4_render_setup(sna))
+ return FALSE;
+
+ gen4_render_reset(sna);
+
+ sna->render.composite = gen4_render_composite;
+ sna->render.video = gen4_render_video;
+
+ sna->render.copy_boxes = gen4_render_copy_boxes;
+ sna->render.copy = gen4_render_copy;
+
+ sna->render.fill_boxes = gen4_render_fill_boxes;
+ sna->render.fill = gen4_render_fill;
+
+ sna->render.flush = gen4_render_flush;
+ sna->render.context_switch = gen4_render_context_switch;
+ sna->render.reset = gen4_render_reset;
+ sna->render.fini = gen4_render_fini;
+
+ sna->render.max_3d_size = 8192;
+ return TRUE;
+}
diff --git a/src/sna/gen4_render.h b/src/sna/gen4_render.h
new file mode 100644
index 0000000..a014e52
--- a/dev/null
+++ b/src/sna/gen4_render.h
@@ -0,0 +1,2643 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef GEN5_RENDER_H
+#define GEN5_RENDER_H
+
+#define GEN4_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
+ ((Pipeline) << 27) | \
+ ((Opcode) << 24) | \
+ ((Subopcode) << 16))
+
+#define GEN4_URB_FENCE GEN4_3D(0, 0, 0)
+#define GEN4_CS_URB_STATE GEN4_3D(0, 0, 1)
+#define GEN4_CONSTANT_BUFFER GEN4_3D(0, 0, 2)
+#define GEN4_STATE_PREFETCH GEN4_3D(0, 0, 3)
+
+#define GEN4_STATE_BASE_ADDRESS GEN4_3D(0, 1, 1)
+#define GEN4_STATE_SIP GEN4_3D(0, 1, 2)
+#define GEN4_PIPELINE_SELECT GEN4_3D(0, 1, 4)
+
+#define NEW_PIPELINE_SELECT GEN4_3D(1, 1, 4)
+
+#define GEN4_MEDIA_STATE_POINTERS GEN4_3D(2, 0, 0)
+#define GEN4_MEDIA_OBJECT GEN4_3D(2, 1, 0)
+
+#define GEN4_3DSTATE_PIPELINED_POINTERS GEN4_3D(3, 0, 0)
+#define GEN4_3DSTATE_BINDING_TABLE_POINTERS GEN4_3D(3, 0, 1)
+
+#define GEN4_3DSTATE_VERTEX_BUFFERS GEN4_3D(3, 0, 8)
+#define GEN4_3DSTATE_VERTEX_ELEMENTS GEN4_3D(3, 0, 9)
+#define GEN4_3DSTATE_INDEX_BUFFER GEN4_3D(3, 0, 0xa)
+#define GEN4_3DSTATE_VF_STATISTICS GEN4_3D(3, 0, 0xb)
+
+#define GEN4_3DSTATE_DRAWING_RECTANGLE GEN4_3D(3, 1, 0)
+#define GEN4_3DSTATE_CONSTANT_COLOR GEN4_3D(3, 1, 1)
+#define GEN4_3DSTATE_SAMPLER_PALETTE_LOAD GEN4_3D(3, 1, 2)
+#define GEN4_3DSTATE_CHROMA_KEY GEN4_3D(3, 1, 4)
+#define GEN4_3DSTATE_DEPTH_BUFFER GEN4_3D(3, 1, 5)
+# define GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
+# define GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
+
+#define GEN4_3DSTATE_POLY_STIPPLE_OFFSET GEN4_3D(3, 1, 6)
+#define GEN4_3DSTATE_POLY_STIPPLE_PATTERN GEN4_3D(3, 1, 7)
+#define GEN4_3DSTATE_LINE_STIPPLE GEN4_3D(3, 1, 8)
+#define GEN4_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN4_3D(3, 1, 9)
+/* These two are BLC and CTG only, not BW or CL */
+#define GEN4_3DSTATE_AA_LINE_PARAMS GEN4_3D(3, 1, 0xa)
+#define GEN4_3DSTATE_GS_SVB_INDEX GEN4_3D(3, 1, 0xb)
+
+#define GEN4_PIPE_CONTROL GEN4_3D(3, 2, 0)
+
+#define GEN4_3DPRIMITIVE GEN4_3D(3, 3, 0)
+
+#define GEN4_3DSTATE_CLEAR_PARAMS GEN4_3D(3, 1, 0x10)
+/* DW1 */
+# define GEN4_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
+
+#define PIPELINE_SELECT_3D 0
+#define PIPELINE_SELECT_MEDIA 1
+
+#define UF0_CS_REALLOC (1 << 13)
+#define UF0_VFE_REALLOC (1 << 12)
+#define UF0_SF_REALLOC (1 << 11)
+#define UF0_CLIP_REALLOC (1 << 10)
+#define UF0_GS_REALLOC (1 << 9)
+#define UF0_VS_REALLOC (1 << 8)
+#define UF1_CLIP_FENCE_SHIFT 20
+#define UF1_GS_FENCE_SHIFT 10
+#define UF1_VS_FENCE_SHIFT 0
+#define UF2_CS_FENCE_SHIFT 20
+#define UF2_VFE_FENCE_SHIFT 10
+#define UF2_SF_FENCE_SHIFT 0
+
+/* for GEN4_STATE_BASE_ADDRESS */
+#define BASE_ADDRESS_MODIFY (1 << 0)
+
+/* for GEN4_3DSTATE_PIPELINED_POINTERS */
+#define GEN4_GS_DISABLE 0
+#define GEN4_GS_ENABLE 1
+#define GEN4_CLIP_DISABLE 0
+#define GEN4_CLIP_ENABLE 1
+
+/* for GEN4_PIPE_CONTROL */
+#define GEN4_PIPE_CONTROL_NOWRITE (0 << 14)
+#define GEN4_PIPE_CONTROL_WRITE_QWORD (1 << 14)
+#define GEN4_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
+#define GEN4_PIPE_CONTROL_WRITE_TIME (3 << 14)
+#define GEN4_PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define GEN4_PIPE_CONTROL_WC_FLUSH (1 << 12)
+#define GEN4_PIPE_CONTROL_IS_FLUSH (1 << 11)
+#define GEN4_PIPE_CONTROL_TC_FLUSH (1 << 10)
+#define GEN4_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
+#define GEN4_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
+#define GEN4_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define GEN4_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
+
+/* VERTEX_BUFFER_STATE Structure */
+#define VB0_BUFFER_INDEX_SHIFT 27
+#define VB0_VERTEXDATA (0 << 26)
+#define VB0_INSTANCEDATA (1 << 26)
+#define VB0_BUFFER_PITCH_SHIFT 0
+
+/* VERTEX_ELEMENT_STATE Structure */
+#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
+#define VE0_VALID (1 << 26)
+#define VE0_FORMAT_SHIFT 16
+#define VE0_OFFSET_SHIFT 0
+#define VE1_VFCOMPONENT_0_SHIFT 28
+#define VE1_VFCOMPONENT_1_SHIFT 24
+#define VE1_VFCOMPONENT_2_SHIFT 20
+#define VE1_VFCOMPONENT_3_SHIFT 16
+#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
+
+/* 3DPRIMITIVE bits */
+#define GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
+#define GEN4_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
+/* Primitive types are in gen4_defines.h */
+#define GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT 10
+
+#define GEN4_SVG_CTL 0x7400
+
+#define GEN4_SVG_CTL_GS_BA (0 << 8)
+#define GEN4_SVG_CTL_SS_BA (1 << 8)
+#define GEN4_SVG_CTL_IO_BA (2 << 8)
+#define GEN4_SVG_CTL_GS_AUB (3 << 8)
+#define GEN4_SVG_CTL_IO_AUB (4 << 8)
+#define GEN4_SVG_CTL_SIP (5 << 8)
+
+#define GEN4_SVG_RDATA 0x7404
+#define GEN4_SVG_WORK_CTL 0x7408
+
+#define GEN4_VF_CTL 0x7500
+
+#define GEN4_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
+#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
+#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
+#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
+#define GEN4_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
+#define GEN4_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
+#define GEN4_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
+#define GEN4_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_VF_STRG_VAL 0x7504
+#define GEN4_VF_STR_VL_OVR 0x7508
+#define GEN4_VF_VC_OVR 0x750c
+#define GEN4_VF_STR_PSKIP 0x7510
+#define GEN4_VF_MAX_PRIM 0x7514
+#define GEN4_VF_RDATA 0x7518
+
+#define GEN4_VS_CTL 0x7600
+#define GEN4_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN4_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN4_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_VS_STRG_VAL 0x7604
+#define GEN4_VS_RDATA 0x7608
+
+#define GEN4_SF_CTL 0x7b00
+#define GEN4_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
+#define GEN4_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
+#define GEN4_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
+#define GEN4_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN4_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN4_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_SF_STRG_VAL 0x7b04
+#define GEN4_SF_RDATA 0x7b18
+
+#define GEN4_WIZ_CTL 0x7c00
+#define GEN4_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
+#define GEN4_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
+#define GEN4_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
+#define GEN4_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
+#define GEN4_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
+#define GEN4_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
+#define GEN4_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
+#define GEN4_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
+#define GEN4_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN4_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN4_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_WIZ_STRG_VAL 0x7c04
+#define GEN4_WIZ_RDATA 0x7c18
+
+#define GEN4_TS_CTL 0x7e00
+#define GEN4_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
+#define GEN4_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
+#define GEN4_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
+#define GEN4_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
+#define GEN4_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_TS_STRG_VAL 0x7e04
+#define GEN4_TS_RDATA 0x7e08
+
+#define GEN4_TD_CTL 0x8000
+#define GEN4_TD_CTL_MUX_SHIFT 8
+#define GEN4_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
+#define GEN4_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
+#define GEN4_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
+#define GEN4_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
+#define GEN4_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
+#define GEN4_TD_CTL2 0x8004
+#define GEN4_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
+#define GEN4_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
+#define GEN4_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
+#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
+#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
+#define GEN4_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
+#define GEN4_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
+#define GEN4_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
+#define GEN4_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
+#define GEN4_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
+#define GEN4_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
+#define GEN4_TD_VF_VS_EMSK 0x8008
+#define GEN4_TD_GS_EMSK 0x800c
+#define GEN4_TD_CLIP_EMSK 0x8010
+#define GEN4_TD_SF_EMSK 0x8014
+#define GEN4_TD_WIZ_EMSK 0x8018
+#define GEN4_TD_0_6_EHTRG_VAL 0x801c
+#define GEN4_TD_0_7_EHTRG_VAL 0x8020
+#define GEN4_TD_0_6_EHTRG_MSK 0x8024
+#define GEN4_TD_0_7_EHTRG_MSK 0x8028
+#define GEN4_TD_RDATA 0x802c
+#define GEN4_TD_TS_EMSK 0x8030
+
+#define GEN4_EU_CTL 0x8800
+#define GEN4_EU_CTL_SELECT_SHIFT 16
+#define GEN4_EU_CTL_DATA_MUX_SHIFT 8
+#define GEN4_EU_ATT_0 0x8810
+#define GEN4_EU_ATT_1 0x8814
+#define GEN4_EU_ATT_DATA_0 0x8820
+#define GEN4_EU_ATT_DATA_1 0x8824
+#define GEN4_EU_ATT_CLR_0 0x8830
+#define GEN4_EU_ATT_CLR_1 0x8834
+#define GEN4_EU_RDATA 0x8840
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+#define _3DPRIMITIVE 0x00
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define GEN4_ANISORATIO_2 0
+#define GEN4_ANISORATIO_4 1
+#define GEN4_ANISORATIO_6 2
+#define GEN4_ANISORATIO_8 3
+#define GEN4_ANISORATIO_10 4
+#define GEN4_ANISORATIO_12 5
+#define GEN4_ANISORATIO_14 6
+#define GEN4_ANISORATIO_16 7
+
+#define GEN4_BLENDFACTOR_ONE 0x1
+#define GEN4_BLENDFACTOR_SRC_COLOR 0x2
+#define GEN4_BLENDFACTOR_SRC_ALPHA 0x3
+#define GEN4_BLENDFACTOR_DST_ALPHA 0x4
+#define GEN4_BLENDFACTOR_DST_COLOR 0x5
+#define GEN4_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define GEN4_BLENDFACTOR_CONST_COLOR 0x7
+#define GEN4_BLENDFACTOR_CONST_ALPHA 0x8
+#define GEN4_BLENDFACTOR_SRC1_COLOR 0x9
+#define GEN4_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define GEN4_BLENDFACTOR_ZERO 0x11
+#define GEN4_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define GEN4_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define GEN4_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define GEN4_BLENDFACTOR_INV_DST_COLOR 0x15
+#define GEN4_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define GEN4_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define GEN4_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define GEN4_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define GEN4_BLENDFUNCTION_ADD 0
+#define GEN4_BLENDFUNCTION_SUBTRACT 1
+#define GEN4_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define GEN4_BLENDFUNCTION_MIN 3
+#define GEN4_BLENDFUNCTION_MAX 4
+
+#define GEN4_ALPHATEST_FORMAT_UNORM8 0
+#define GEN4_ALPHATEST_FORMAT_FLOAT32 1
+
+#define GEN4_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define GEN4_CHROMAKEY_REPLACE_BLACK 1
+
+#define GEN4_CLIP_API_OGL 0
+#define GEN4_CLIP_API_DX 1
+
+#define GEN4_CLIPMODE_NORMAL 0
+#define GEN4_CLIPMODE_CLIP_ALL 1
+#define GEN4_CLIPMODE_CLIP_NON_REJECTED 2
+#define GEN4_CLIPMODE_REJECT_ALL 3
+#define GEN4_CLIPMODE_ACCEPT_ALL 4
+
+#define GEN4_CLIP_NDCSPACE 0
+#define GEN4_CLIP_SCREENSPACE 1
+
+#define GEN4_COMPAREFUNCTION_ALWAYS 0
+#define GEN4_COMPAREFUNCTION_NEVER 1
+#define GEN4_COMPAREFUNCTION_LESS 2
+#define GEN4_COMPAREFUNCTION_EQUAL 3
+#define GEN4_COMPAREFUNCTION_LEQUAL 4
+#define GEN4_COMPAREFUNCTION_GREATER 5
+#define GEN4_COMPAREFUNCTION_NOTEQUAL 6
+#define GEN4_COMPAREFUNCTION_GEQUAL 7
+
+#define GEN4_COVERAGE_PIXELS_HALF 0
+#define GEN4_COVERAGE_PIXELS_1 1
+#define GEN4_COVERAGE_PIXELS_2 2
+#define GEN4_COVERAGE_PIXELS_4 3
+
+#define GEN4_CULLMODE_BOTH 0
+#define GEN4_CULLMODE_NONE 1
+#define GEN4_CULLMODE_FRONT 2
+#define GEN4_CULLMODE_BACK 3
+
+#define GEN4_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define GEN4_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define GEN4_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define GEN4_DEPTHFORMAT_D32_FLOAT 1
+#define GEN4_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define GEN4_DEPTHFORMAT_D16_UNORM 5
+
+#define GEN4_FLOATING_POINT_IEEE_754 0
+#define GEN4_FLOATING_POINT_NON_IEEE_754 1
+
+#define GEN4_FRONTWINDING_CW 0
+#define GEN4_FRONTWINDING_CCW 1
+
+#define GEN4_INDEX_BYTE 0
+#define GEN4_INDEX_WORD 1
+#define GEN4_INDEX_DWORD 2
+
+#define GEN4_LOGICOPFUNCTION_CLEAR 0
+#define GEN4_LOGICOPFUNCTION_NOR 1
+#define GEN4_LOGICOPFUNCTION_AND_INVERTED 2
+#define GEN4_LOGICOPFUNCTION_COPY_INVERTED 3
+#define GEN4_LOGICOPFUNCTION_AND_REVERSE 4
+#define GEN4_LOGICOPFUNCTION_INVERT 5
+#define GEN4_LOGICOPFUNCTION_XOR 6
+#define GEN4_LOGICOPFUNCTION_NAND 7
+#define GEN4_LOGICOPFUNCTION_AND 8
+#define GEN4_LOGICOPFUNCTION_EQUIV 9
+#define GEN4_LOGICOPFUNCTION_NOOP 10
+#define GEN4_LOGICOPFUNCTION_OR_INVERTED 11
+#define GEN4_LOGICOPFUNCTION_COPY 12
+#define GEN4_LOGICOPFUNCTION_OR_REVERSE 13
+#define GEN4_LOGICOPFUNCTION_OR 14
+#define GEN4_LOGICOPFUNCTION_SET 15
+
+#define GEN4_MAPFILTER_NEAREST 0x0
+#define GEN4_MAPFILTER_LINEAR 0x1
+#define GEN4_MAPFILTER_ANISOTROPIC 0x2
+
+#define GEN4_MIPFILTER_NONE 0
+#define GEN4_MIPFILTER_NEAREST 1
+#define GEN4_MIPFILTER_LINEAR 3
+
+#define GEN4_POLYGON_FRONT_FACING 0
+#define GEN4_POLYGON_BACK_FACING 1
+
+#define GEN4_PREFILTER_ALWAYS 0x0
+#define GEN4_PREFILTER_NEVER 0x1
+#define GEN4_PREFILTER_LESS 0x2
+#define GEN4_PREFILTER_EQUAL 0x3
+#define GEN4_PREFILTER_LEQUAL 0x4
+#define GEN4_PREFILTER_GREATER 0x5
+#define GEN4_PREFILTER_NOTEQUAL 0x6
+#define GEN4_PREFILTER_GEQUAL 0x7
+
+#define GEN4_PROVOKING_VERTEX_0 0
+#define GEN4_PROVOKING_VERTEX_1 1
+#define GEN4_PROVOKING_VERTEX_2 2
+
+#define GEN4_RASTRULE_UPPER_LEFT 0
+#define GEN4_RASTRULE_UPPER_RIGHT 1
+
+#define GEN4_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define GEN4_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define GEN4_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define GEN4_STENCILOP_KEEP 0
+#define GEN4_STENCILOP_ZERO 1
+#define GEN4_STENCILOP_REPLACE 2
+#define GEN4_STENCILOP_INCRSAT 3
+#define GEN4_STENCILOP_DECRSAT 4
+#define GEN4_STENCILOP_INCR 5
+#define GEN4_STENCILOP_DECR 6
+#define GEN4_STENCILOP_INVERT 7
+
+#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define GEN4_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define GEN4_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define GEN4_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define GEN4_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define GEN4_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define GEN4_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define GEN4_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define GEN4_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define GEN4_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define GEN4_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define GEN4_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define GEN4_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define GEN4_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define GEN4_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define GEN4_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define GEN4_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define GEN4_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define GEN4_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define GEN4_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define GEN4_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define GEN4_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define GEN4_SURFACEFORMAT_R32G32_SINT 0x086
+#define GEN4_SURFACEFORMAT_R32G32_UINT 0x087
+#define GEN4_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define GEN4_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define GEN4_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define GEN4_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define GEN4_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define GEN4_SURFACEFORMAT_R64_FLOAT 0x08D
+#define GEN4_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define GEN4_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define GEN4_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define GEN4_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define GEN4_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define GEN4_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define GEN4_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define GEN4_SURFACEFORMAT_R32G32_USCALED 0x096
+#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define GEN4_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define GEN4_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define GEN4_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define GEN4_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define GEN4_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define GEN4_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define GEN4_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define GEN4_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define GEN4_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define GEN4_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define GEN4_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define GEN4_SURFACEFORMAT_R32_SINT 0x0D6
+#define GEN4_SURFACEFORMAT_R32_UINT 0x0D7
+#define GEN4_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define GEN4_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define GEN4_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define GEN4_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define GEN4_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define GEN4_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define GEN4_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define GEN4_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define GEN4_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define GEN4_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define GEN4_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define GEN4_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define GEN4_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define GEN4_SURFACEFORMAT_R32_UNORM 0x0F1
+#define GEN4_SURFACEFORMAT_R32_SNORM 0x0F2
+#define GEN4_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define GEN4_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define GEN4_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define GEN4_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define GEN4_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define GEN4_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define GEN4_SURFACEFORMAT_R32_USCALED 0x0F9
+#define GEN4_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define GEN4_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define GEN4_SURFACEFORMAT_R8G8_UNORM 0x106
+#define GEN4_SURFACEFORMAT_R8G8_SNORM 0x107
+#define GEN4_SURFACEFORMAT_R8G8_SINT 0x108
+#define GEN4_SURFACEFORMAT_R8G8_UINT 0x109
+#define GEN4_SURFACEFORMAT_R16_UNORM 0x10A
+#define GEN4_SURFACEFORMAT_R16_SNORM 0x10B
+#define GEN4_SURFACEFORMAT_R16_SINT 0x10C
+#define GEN4_SURFACEFORMAT_R16_UINT 0x10D
+#define GEN4_SURFACEFORMAT_R16_FLOAT 0x10E
+#define GEN4_SURFACEFORMAT_I16_UNORM 0x111
+#define GEN4_SURFACEFORMAT_L16_UNORM 0x112
+#define GEN4_SURFACEFORMAT_A16_UNORM 0x113
+#define GEN4_SURFACEFORMAT_L8A8_UNORM 0x114
+#define GEN4_SURFACEFORMAT_I16_FLOAT 0x115
+#define GEN4_SURFACEFORMAT_L16_FLOAT 0x116
+#define GEN4_SURFACEFORMAT_A16_FLOAT 0x117
+#define GEN4_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define GEN4_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define GEN4_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define GEN4_SURFACEFORMAT_R16_SSCALED 0x11E
+#define GEN4_SURFACEFORMAT_R16_USCALED 0x11F
+#define GEN4_SURFACEFORMAT_R8_UNORM 0x140
+#define GEN4_SURFACEFORMAT_R8_SNORM 0x141
+#define GEN4_SURFACEFORMAT_R8_SINT 0x142
+#define GEN4_SURFACEFORMAT_R8_UINT 0x143
+#define GEN4_SURFACEFORMAT_A8_UNORM 0x144
+#define GEN4_SURFACEFORMAT_I8_UNORM 0x145
+#define GEN4_SURFACEFORMAT_L8_UNORM 0x146
+#define GEN4_SURFACEFORMAT_P4A4_UNORM 0x147
+#define GEN4_SURFACEFORMAT_A4P4_UNORM 0x148
+#define GEN4_SURFACEFORMAT_R8_SSCALED 0x149
+#define GEN4_SURFACEFORMAT_R8_USCALED 0x14A
+#define GEN4_SURFACEFORMAT_R1_UINT 0x181
+#define GEN4_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define GEN4_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define GEN4_SURFACEFORMAT_BC1_UNORM 0x186
+#define GEN4_SURFACEFORMAT_BC2_UNORM 0x187
+#define GEN4_SURFACEFORMAT_BC3_UNORM 0x188
+#define GEN4_SURFACEFORMAT_BC4_UNORM 0x189
+#define GEN4_SURFACEFORMAT_BC5_UNORM 0x18A
+#define GEN4_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define GEN4_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define GEN4_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define GEN4_SURFACEFORMAT_MONO8 0x18E
+#define GEN4_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define GEN4_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define GEN4_SURFACEFORMAT_DXT1_RGB 0x191
+#define GEN4_SURFACEFORMAT_FXT1 0x192
+#define GEN4_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define GEN4_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define GEN4_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define GEN4_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define GEN4_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define GEN4_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define GEN4_SURFACEFORMAT_BC4_SNORM 0x199
+#define GEN4_SURFACEFORMAT_BC5_SNORM 0x19A
+#define GEN4_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define GEN4_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define GEN4_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define GEN4_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+
+#define GEN4_SURFACERETURNFORMAT_FLOAT32 0
+#define GEN4_SURFACERETURNFORMAT_S1 1
+
+#define GEN4_SURFACE_1D 0
+#define GEN4_SURFACE_2D 1
+#define GEN4_SURFACE_3D 2
+#define GEN4_SURFACE_CUBE 3
+#define GEN4_SURFACE_BUFFER 4
+#define GEN4_SURFACE_NULL 7
+
+#define GEN4_BORDER_COLOR_MODE_DEFAULT 0
+#define GEN4_BORDER_COLOR_MODE_LEGACY 1
+
+#define GEN4_TEXCOORDMODE_WRAP 0
+#define GEN4_TEXCOORDMODE_MIRROR 1
+#define GEN4_TEXCOORDMODE_CLAMP 2
+#define GEN4_TEXCOORDMODE_CUBE 3
+#define GEN4_TEXCOORDMODE_CLAMP_BORDER 4
+#define GEN4_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define GEN4_THREAD_PRIORITY_NORMAL 0
+#define GEN4_THREAD_PRIORITY_HIGH 1
+
+#define GEN4_TILEWALK_XMAJOR 0
+#define GEN4_TILEWALK_YMAJOR 1
+
+#define GEN4_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define GEN4_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+#define GEN4_VERTEXBUFFER_ACCESS_VERTEXDATA 0
+#define GEN4_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
+
+#define GEN4_VFCOMPONENT_NOSTORE 0
+#define GEN4_VFCOMPONENT_STORE_SRC 1
+#define GEN4_VFCOMPONENT_STORE_0 2
+#define GEN4_VFCOMPONENT_STORE_1_FLT 3
+#define GEN4_VFCOMPONENT_STORE_1_INT 4
+#define GEN4_VFCOMPONENT_STORE_VID 5
+#define GEN4_VFCOMPONENT_STORE_IID 6
+#define GEN4_VFCOMPONENT_STORE_PID 7
+
+
+
+/* Execution Unit (EU) defines
+ */
+
+#define GEN4_ALIGN_1 0
+#define GEN4_ALIGN_16 1
+
+#define GEN4_ADDRESS_DIRECT 0
+#define GEN4_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define GEN4_CHANNEL_X 0
+#define GEN4_CHANNEL_Y 1
+#define GEN4_CHANNEL_Z 2
+#define GEN4_CHANNEL_W 3
+
+#define GEN4_COMPRESSION_NONE 0
+#define GEN4_COMPRESSION_2NDHALF 1
+#define GEN4_COMPRESSION_COMPRESSED 2
+
+#define GEN4_CONDITIONAL_NONE 0
+#define GEN4_CONDITIONAL_Z 1
+#define GEN4_CONDITIONAL_NZ 2
+#define GEN4_CONDITIONAL_EQ 1 /* Z */
+#define GEN4_CONDITIONAL_NEQ 2 /* NZ */
+#define GEN4_CONDITIONAL_G 3
+#define GEN4_CONDITIONAL_GE 4
+#define GEN4_CONDITIONAL_L 5
+#define GEN4_CONDITIONAL_LE 6
+#define GEN4_CONDITIONAL_C 7
+#define GEN4_CONDITIONAL_O 8
+
+#define GEN4_DEBUG_NONE 0
+#define GEN4_DEBUG_BREAKPOINT 1
+
+#define GEN4_DEPENDENCY_NORMAL 0
+#define GEN4_DEPENDENCY_NOTCLEARED 1
+#define GEN4_DEPENDENCY_NOTCHECKED 2
+#define GEN4_DEPENDENCY_DISABLE 3
+
+#define GEN4_EXECUTE_1 0
+#define GEN4_EXECUTE_2 1
+#define GEN4_EXECUTE_4 2
+#define GEN4_EXECUTE_8 3
+#define GEN4_EXECUTE_16 4
+#define GEN4_EXECUTE_32 5
+
+#define GEN4_HORIZONTAL_STRIDE_0 0
+#define GEN4_HORIZONTAL_STRIDE_1 1
+#define GEN4_HORIZONTAL_STRIDE_2 2
+#define GEN4_HORIZONTAL_STRIDE_4 3
+
+#define GEN4_INSTRUCTION_NORMAL 0
+#define GEN4_INSTRUCTION_SATURATE 1
+
+#define GEN4_MASK_ENABLE 0
+#define GEN4_MASK_DISABLE 1
+
+#define GEN4_OPCODE_MOV 1
+#define GEN4_OPCODE_SEL 2
+#define GEN4_OPCODE_NOT 4
+#define GEN4_OPCODE_AND 5
+#define GEN4_OPCODE_OR 6
+#define GEN4_OPCODE_XOR 7
+#define GEN4_OPCODE_SHR 8
+#define GEN4_OPCODE_SHL 9
+#define GEN4_OPCODE_RSR 10
+#define GEN4_OPCODE_RSL 11
+#define GEN4_OPCODE_ASR 12
+#define GEN4_OPCODE_CMP 16
+#define GEN4_OPCODE_JMPI 32
+#define GEN4_OPCODE_IF 34
+#define GEN4_OPCODE_IFF 35
+#define GEN4_OPCODE_ELSE 36
+#define GEN4_OPCODE_ENDIF 37
+#define GEN4_OPCODE_DO 38
+#define GEN4_OPCODE_WHILE 39
+#define GEN4_OPCODE_BREAK 40
+#define GEN4_OPCODE_CONTINUE 41
+#define GEN4_OPCODE_HALT 42
+#define GEN4_OPCODE_MSAVE 44
+#define GEN4_OPCODE_MRESTORE 45
+#define GEN4_OPCODE_PUSH 46
+#define GEN4_OPCODE_POP 47
+#define GEN4_OPCODE_WAIT 48
+#define GEN4_OPCODE_SEND 49
+#define GEN4_OPCODE_ADD 64
+#define GEN4_OPCODE_MUL 65
+#define GEN4_OPCODE_AVG 66
+#define GEN4_OPCODE_FRC 67
+#define GEN4_OPCODE_RNDU 68
+#define GEN4_OPCODE_RNDD 69
+#define GEN4_OPCODE_RNDE 70
+#define GEN4_OPCODE_RNDZ 71
+#define GEN4_OPCODE_MAC 72
+#define GEN4_OPCODE_MACH 73
+#define GEN4_OPCODE_LZD 74
+#define GEN4_OPCODE_SAD2 80
+#define GEN4_OPCODE_SADA2 81
+#define GEN4_OPCODE_DP4 84
+#define GEN4_OPCODE_DPH 85
+#define GEN4_OPCODE_DP3 86
+#define GEN4_OPCODE_DP2 87
+#define GEN4_OPCODE_DPA2 88
+#define GEN4_OPCODE_LINE 89
+#define GEN4_OPCODE_NOP 126
+
+#define GEN4_PREDICATE_NONE 0
+#define GEN4_PREDICATE_NORMAL 1
+#define GEN4_PREDICATE_ALIGN1_ANYV 2
+#define GEN4_PREDICATE_ALIGN1_ALLV 3
+#define GEN4_PREDICATE_ALIGN1_ANY2H 4
+#define GEN4_PREDICATE_ALIGN1_ALL2H 5
+#define GEN4_PREDICATE_ALIGN1_ANY4H 6
+#define GEN4_PREDICATE_ALIGN1_ALL4H 7
+#define GEN4_PREDICATE_ALIGN1_ANY8H 8
+#define GEN4_PREDICATE_ALIGN1_ALL8H 9
+#define GEN4_PREDICATE_ALIGN1_ANY16H 10
+#define GEN4_PREDICATE_ALIGN1_ALL16H 11
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_X 2
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_W 5
+#define GEN4_PREDICATE_ALIGN16_ANY4H 6
+#define GEN4_PREDICATE_ALIGN16_ALL4H 7
+
+#define GEN4_ARCHITECTURE_REGISTER_FILE 0
+#define GEN4_GENERAL_REGISTER_FILE 1
+#define GEN4_MESSAGE_REGISTER_FILE 2
+#define GEN4_IMMEDIATE_VALUE 3
+
+#define GEN4_REGISTER_TYPE_UD 0
+#define GEN4_REGISTER_TYPE_D 1
+#define GEN4_REGISTER_TYPE_UW 2
+#define GEN4_REGISTER_TYPE_W 3
+#define GEN4_REGISTER_TYPE_UB 4
+#define GEN4_REGISTER_TYPE_B 5
+#define GEN4_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define GEN4_REGISTER_TYPE_HF 6
+#define GEN4_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define GEN4_REGISTER_TYPE_F 7
+
+#define GEN4_ARF_NULL 0x00
+#define GEN4_ARF_ADDRESS 0x10
+#define GEN4_ARF_ACCUMULATOR 0x20
+#define GEN4_ARF_FLAG 0x30
+#define GEN4_ARF_MASK 0x40
+#define GEN4_ARF_MASK_STACK 0x50
+#define GEN4_ARF_MASK_STACK_DEPTH 0x60
+#define GEN4_ARF_STATE 0x70
+#define GEN4_ARF_CONTROL 0x80
+#define GEN4_ARF_NOTIFICATION_COUNT 0x90
+#define GEN4_ARF_IP 0xA0
+
+#define GEN4_AMASK 0
+#define GEN4_IMASK 1
+#define GEN4_LMASK 2
+#define GEN4_CMASK 3
+
+
+
+#define GEN4_THREAD_NORMAL 0
+#define GEN4_THREAD_ATOMIC 1
+#define GEN4_THREAD_SWITCH 2
+
+#define GEN4_VERTICAL_STRIDE_0 0
+#define GEN4_VERTICAL_STRIDE_1 1
+#define GEN4_VERTICAL_STRIDE_2 2
+#define GEN4_VERTICAL_STRIDE_4 3
+#define GEN4_VERTICAL_STRIDE_8 4
+#define GEN4_VERTICAL_STRIDE_16 5
+#define GEN4_VERTICAL_STRIDE_32 6
+#define GEN4_VERTICAL_STRIDE_64 7
+#define GEN4_VERTICAL_STRIDE_128 8
+#define GEN4_VERTICAL_STRIDE_256 9
+#define GEN4_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define GEN4_WIDTH_1 0
+#define GEN4_WIDTH_2 1
+#define GEN4_WIDTH_4 2
+#define GEN4_WIDTH_8 3
+#define GEN4_WIDTH_16 4
+
+#define GEN4_STATELESS_BUFFER_BOUNDARY_1K 0
+#define GEN4_STATELESS_BUFFER_BOUNDARY_2K 1
+#define GEN4_STATELESS_BUFFER_BOUNDARY_4K 2
+#define GEN4_STATELESS_BUFFER_BOUNDARY_8K 3
+#define GEN4_STATELESS_BUFFER_BOUNDARY_16K 4
+#define GEN4_STATELESS_BUFFER_BOUNDARY_32K 5
+#define GEN4_STATELESS_BUFFER_BOUNDARY_64K 6
+#define GEN4_STATELESS_BUFFER_BOUNDARY_128K 7
+#define GEN4_STATELESS_BUFFER_BOUNDARY_256K 8
+#define GEN4_STATELESS_BUFFER_BOUNDARY_512K 9
+#define GEN4_STATELESS_BUFFER_BOUNDARY_1M 10
+#define GEN4_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define GEN4_POLYGON_FACING_FRONT 0
+#define GEN4_POLYGON_FACING_BACK 1
+
+#define GEN4_MESSAGE_TARGET_NULL 0
+#define GEN4_MESSAGE_TARGET_MATH 1
+#define GEN4_MESSAGE_TARGET_SAMPLER 2
+#define GEN4_MESSAGE_TARGET_GATEWAY 3
+#define GEN4_MESSAGE_TARGET_DATAPORT_READ 4
+#define GEN4_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define GEN4_MESSAGE_TARGET_URB 6
+#define GEN4_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define GEN4_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define GEN4_SAMPLER_RETURN_FORMAT_UINT32 2
+#define GEN4_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define GEN4_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define GEN4_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define GEN4_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define GEN4_SAMPLER_MESSAGE_SIMD8_LD 3
+#define GEN4_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define GEN4_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define GEN4_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define GEN4_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define GEN4_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define GEN4_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define GEN4_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define GEN4_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define GEN4_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define GEN4_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define GEN4_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define GEN4_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define GEN4_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define GEN4_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define GEN4_MATH_FUNCTION_INV 1
+#define GEN4_MATH_FUNCTION_LOG 2
+#define GEN4_MATH_FUNCTION_EXP 3
+#define GEN4_MATH_FUNCTION_SQRT 4
+#define GEN4_MATH_FUNCTION_RSQ 5
+#define GEN4_MATH_FUNCTION_SIN 6 /* was 7 */
+#define GEN4_MATH_FUNCTION_COS 7 /* was 8 */
+#define GEN4_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define GEN4_MATH_FUNCTION_TAN 9
+#define GEN4_MATH_FUNCTION_POW 10
+#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define GEN4_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define GEN4_MATH_INTEGER_UNSIGNED 0
+#define GEN4_MATH_INTEGER_SIGNED 1
+
+#define GEN4_MATH_PRECISION_FULL 0
+#define GEN4_MATH_PRECISION_PARTIAL 1
+
+#define GEN4_MATH_SATURATE_NONE 0
+#define GEN4_MATH_SATURATE_SATURATE 1
+
+#define GEN4_MATH_DATA_VECTOR 0
+#define GEN4_MATH_DATA_SCALAR 1
+
+#define GEN4_URB_OPCODE_WRITE 0
+
+#define GEN4_URB_SWIZZLE_NONE 0
+#define GEN4_URB_SWIZZLE_INTERLEAVE 1
+#define GEN4_URB_SWIZZLE_TRANSPOSE 2
+
+#define GEN4_SCRATCH_SPACE_SIZE_1K 0
+#define GEN4_SCRATCH_SPACE_SIZE_2K 1
+#define GEN4_SCRATCH_SPACE_SIZE_4K 2
+#define GEN4_SCRATCH_SPACE_SIZE_8K 3
+#define GEN4_SCRATCH_SPACE_SIZE_16K 4
+#define GEN4_SCRATCH_SPACE_SIZE_32K 5
+#define GEN4_SCRATCH_SPACE_SIZE_64K 6
+#define GEN4_SCRATCH_SPACE_SIZE_128K 7
+#define GEN4_SCRATCH_SPACE_SIZE_256K 8
+#define GEN4_SCRATCH_SPACE_SIZE_512K 9
+#define GEN4_SCRATCH_SPACE_SIZE_1M 10
+#define GEN4_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CONST_BUFFER_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT 0x6104
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+#define CMD_VERTEX_BUFFER 0x7808
+#define CMD_VERTEX_ELEMENT 0x7809
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS 0x780b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+/* media pipeline */
+
+#define GEN4_VFE_MODE_GENERIC 0x0
+#define GEN4_VFE_MODE_VLD_MPEG2 0x1
+#define GEN4_VFE_MODE_IS 0x2
+#define GEN4_VFE_MODE_AVC_MC 0x4
+#define GEN4_VFE_MODE_AVC_IT 0x7
+#define GEN4_VFE_MODE_VC1_IT 0xB
+
+#define GEN4_VFE_DEBUG_COUNTER_FREE 0
+#define GEN4_VFE_DEBUG_COUNTER_FROZEN 1
+#define GEN4_VFE_DEBUG_COUNTER_ONCE 2
+#define GEN4_VFE_DEBUG_COUNTER_ALWAYS 3
+
+/* VLD_STATE */
+#define GEN4_MPEG_TOP_FIELD 1
+#define GEN4_MPEG_BOTTOM_FIELD 2
+#define GEN4_MPEG_FRAME 3
+#define GEN4_MPEG_QSCALE_LINEAR 0
+#define GEN4_MPEG_QSCALE_NONLINEAR 1
+#define GEN4_MPEG_ZIGZAG_SCAN 0
+#define GEN4_MPEG_ALTER_VERTICAL_SCAN 1
+#define GEN4_MPEG_I_PICTURE 1
+#define GEN4_MPEG_P_PICTURE 2
+#define GEN4_MPEG_B_PICTURE 3
+
+/* Command packets:
+ */
+struct header
+{
+ unsigned int length:16;
+ unsigned int opcode:16;
+};
+
+
+union header_union
+{
+ struct header bits;
+ unsigned int dword;
+};
+
+struct gen4_3d_control
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int notify_enable:1;
+ unsigned int pad:3;
+ unsigned int wc_flush_enable:1;
+ unsigned int depth_stall_enable:1;
+ unsigned int operation:2;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int pad:2;
+ unsigned int dest_addr_type:1;
+ unsigned int dest_addr:29;
+ } dest;
+
+ unsigned int dword2;
+ unsigned int dword3;
+};
+
+
+struct gen4_3d_primitive
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int pad:2;
+ unsigned int topology:5;
+ unsigned int indexed:1;
+ unsigned int opcode:16;
+ } header;
+
+ unsigned int verts_per_instance;
+ unsigned int start_vert_location;
+ unsigned int instance_count;
+ unsigned int start_instance_location;
+ unsigned int base_vert_location;
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define GEN4_FLUSH_READ_CACHE 0x1
+#define GEN4_FLUSH_STATE_CACHE 0x2
+#define GEN4_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define GEN4_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct gen4_mi_flush
+{
+ unsigned int flags:4;
+ unsigned int pad:12;
+ unsigned int opcode:16;
+};
+
+struct gen4_vf_statistics
+{
+ unsigned int statistics_enable:1;
+ unsigned int pad:15;
+ unsigned int opcode:16;
+};
+
+
+
+struct gen4_binding_table_pointers
+{
+ struct header header;
+ unsigned int vs;
+ unsigned int gs;
+ unsigned int clp;
+ unsigned int sf;
+ unsigned int wm;
+};
+
+
+struct gen4_blend_constant_color
+{
+ struct header header;
+ float blend_constant_color[4];
+};
+
+
+struct gen4_depthbuffer
+{
+ union header_union header;
+
+ union {
+ struct {
+ unsigned int pitch:18;
+ unsigned int format:3;
+ unsigned int pad:4;
+ unsigned int depth_offset_disable:1;
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int pad2:1;
+ unsigned int surface_type:3;
+ } bits;
+ unsigned int dword;
+ } dword1;
+
+ unsigned int dword2_base_addr;
+
+ union {
+ struct {
+ unsigned int pad:1;
+ unsigned int mipmap_layout:1;
+ unsigned int lod:4;
+ unsigned int width:13;
+ unsigned int height:13;
+ } bits;
+ unsigned int dword;
+ } dword3;
+
+ union {
+ struct {
+ unsigned int pad:12;
+ unsigned int min_array_element:9;
+ unsigned int depth:11;
+ } bits;
+ unsigned int dword;
+ } dword4;
+};
+
+struct gen4_drawrect
+{
+ struct header header;
+ unsigned int xmin:16;
+ unsigned int ymin:16;
+ unsigned int xmax:16;
+ unsigned int ymax:16;
+ unsigned int xorg:16;
+ unsigned int yorg:16;
+};
+
+
+
+
+struct gen4_global_depth_offset_clamp
+{
+ struct header header;
+ float depth_offset_clamp;
+};
+
+struct gen4_indexbuffer
+{
+ union {
+ struct
+ {
+ unsigned int length:8;
+ unsigned int index_format:2;
+ unsigned int cut_index_enable:1;
+ unsigned int pad:5;
+ unsigned int opcode:16;
+ } bits;
+ unsigned int dword;
+
+ } header;
+
+ unsigned int buffer_start;
+ unsigned int buffer_end;
+};
+
+
+struct gen4_line_stipple
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int pattern:16;
+ unsigned int pad:16;
+ } bits0;
+
+ struct
+ {
+ unsigned int repeat_count:9;
+ unsigned int pad:7;
+ unsigned int inverse_repeat_count:16;
+ } bits1;
+};
+
+
+struct gen4_pipelined_state_pointers
+{
+ struct header header;
+
+ struct {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } vs;
+
+ struct
+ {
+ unsigned int enable:1;
+ unsigned int pad:4;
+ unsigned int offset:27;
+ } gs;
+
+ struct
+ {
+ unsigned int enable:1;
+ unsigned int pad:4;
+ unsigned int offset:27;
+ } clp;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } sf;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } wm;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27; /* KW: check me! */
+ } cc;
+};
+
+
+struct gen4_polygon_stipple_offset
+{
+ struct header header;
+
+ struct {
+ unsigned int y_offset:5;
+ unsigned int pad:3;
+ unsigned int x_offset:5;
+ unsigned int pad0:19;
+ } bits0;
+};
+
+
+
+struct gen4_polygon_stipple
+{
+ struct header header;
+ unsigned int stipple[32];
+};
+
+
+
+struct gen4_pipeline_select
+{
+ struct
+ {
+ unsigned int pipeline_select:1;
+ unsigned int pad:15;
+ unsigned int opcode:16;
+ } header;
+};
+
+
+struct gen4_pipe_control
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int notify_enable:1;
+ unsigned int pad:2;
+ unsigned int instruction_state_cache_flush_enable:1;
+ unsigned int write_cache_flush_enable:1;
+ unsigned int depth_stall_enable:1;
+ unsigned int post_sync_operation:2;
+
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int pad:2;
+ unsigned int dest_addr_type:1;
+ unsigned int dest_addr:29;
+ } bits1;
+
+ unsigned int data0;
+ unsigned int data1;
+};
+
+
+struct gen4_urb_fence
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int vs_realloc:1;
+ unsigned int gs_realloc:1;
+ unsigned int clp_realloc:1;
+ unsigned int sf_realloc:1;
+ unsigned int vfe_realloc:1;
+ unsigned int cs_realloc:1;
+ unsigned int pad:2;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int vs_fence:10;
+ unsigned int gs_fence:10;
+ unsigned int clp_fence:10;
+ unsigned int pad:2;
+ } bits0;
+
+ struct
+ {
+ unsigned int sf_fence:10;
+ unsigned int vf_fence:10;
+ unsigned int cs_fence:10;
+ unsigned int pad:2;
+ } bits1;
+};
+
+struct gen4_constant_buffer_state /* previously gen4_command_streamer */
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int nr_urb_entries:3;
+ unsigned int pad:1;
+ unsigned int urb_entry_size:5;
+ unsigned int pad0:23;
+ } bits0;
+};
+
+struct gen4_constant_buffer
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int valid:1;
+ unsigned int pad:7;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int buffer_length:6;
+ unsigned int buffer_address:26;
+ } bits0;
+};
+
+struct gen4_state_base_address
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int general_state_address:27;
+ } bits0;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int surface_state_address:27;
+ } bits1;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int indirect_object_state_address:27;
+ } bits2;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:11;
+ unsigned int general_state_upper_bound:20;
+ } bits3;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:11;
+ unsigned int indirect_object_state_upper_bound:20;
+ } bits4;
+};
+
+struct gen4_state_prefetch
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int prefetch_count:3;
+ unsigned int pad:3;
+ unsigned int prefetch_pointer:26;
+ } bits0;
+};
+
+struct gen4_system_instruction_pointer
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int pad:4;
+ unsigned int system_instruction_pointer:28;
+ } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ unsigned int pad0:1;
+ unsigned int grf_reg_count:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer:26;
+};
+
+struct thread1
+{
+ unsigned int ext_halt_exception_enable:1;
+ unsigned int sw_exception_enable:1;
+ unsigned int mask_stack_exception_enable:1;
+ unsigned int timeout_exception_enable:1;
+ unsigned int illegal_op_exception_enable:1;
+ unsigned int pad0:3;
+ unsigned int depth_coef_urb_read_offset:6; /* WM only */
+ unsigned int pad1:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int binding_table_entry_count:8;
+ unsigned int pad3:5;
+ unsigned int single_program_flow:1;
+};
+
+struct thread2
+{
+ unsigned int per_thread_scratch_space:4;
+ unsigned int pad0:6;
+ unsigned int scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ unsigned int dispatch_grf_start_reg:4;
+ unsigned int urb_entry_read_offset:6;
+ unsigned int pad0:1;
+ unsigned int urb_entry_read_length:6;
+ unsigned int pad1:1;
+ unsigned int const_urb_entry_read_offset:6;
+ unsigned int pad2:1;
+ unsigned int const_urb_entry_read_length:6;
+ unsigned int pad3:1;
+};
+
+
+
+struct gen4_clip_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:9;
+ unsigned int gs_output_stats:1; /* not always */
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:6; /* may be less */
+ unsigned int pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned int pad0:13;
+ unsigned int clip_mode:3;
+ unsigned int userclip_enable_flags:8;
+ unsigned int userclip_must_clip:1;
+ unsigned int pad1:1;
+ unsigned int guard_band_enable:1;
+ unsigned int viewport_z_clip_enable:1;
+ unsigned int viewport_xy_clip_enable:1;
+ unsigned int vertex_position_space:1;
+ unsigned int api_mode:1;
+ unsigned int pad2:1;
+ } clip5;
+
+ struct
+ {
+ unsigned int pad0:5;
+ unsigned int clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ float viewport_xmin;
+ float viewport_xmax;
+ float viewport_ymin;
+ float viewport_ymax;
+};
+
+
+
+struct gen4_cc_unit_state
+{
+ struct
+ {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } cc0;
+
+
+ struct
+ {
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ unsigned int stencil_ref:8;
+ } cc1;
+
+
+ struct
+ {
+ unsigned int logicop_enable:1;
+ unsigned int pad0:10;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_function:3;
+ unsigned int depth_test:1;
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct
+ {
+ unsigned int pad0:8;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test:1;
+ unsigned int blend_enable:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int pad1:1;
+ unsigned int alpha_test_format:1;
+ unsigned int pad2:16;
+ } cc3;
+
+ struct
+ {
+ unsigned int pad0:5;
+ unsigned int cc_viewport_state_offset:27;
+ } cc4;
+
+ struct
+ {
+ unsigned int pad0:2;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_src_blend_factor:5;
+ unsigned int ia_blend_function:3;
+ unsigned int statistics_enable:1;
+ unsigned int logicop_func:4;
+ unsigned int pad1:11;
+ unsigned int dither_enable:1;
+ } cc5;
+
+ struct
+ {
+ unsigned int clamp_post_alpha_blend:1;
+ unsigned int clamp_pre_alpha_blend:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:11;
+ unsigned int y_dither_offset:2;
+ unsigned int x_dither_offset:2;
+ unsigned int dest_blend_factor:5;
+ unsigned int src_blend_factor:5;
+ unsigned int blend_function:3;
+ } cc6;
+
+ struct {
+ union {
+ float f;
+ unsigned char ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+
+
+struct gen4_sf_unit_state
+{
+ struct thread0 thread0;
+ struct {
+ unsigned int pad0:7;
+ unsigned int sw_exception_enable:1;
+ unsigned int pad1:3;
+ unsigned int mask_stack_exception_enable:1;
+ unsigned int pad2:1;
+ unsigned int illegal_op_exception_enable:1;
+ unsigned int pad3:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int binding_table_entry_count:8;
+ unsigned int pad4:5;
+ unsigned int single_program_flow:1;
+ } sf1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:6;
+ unsigned int pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned int front_winding:1;
+ unsigned int viewport_transform:1;
+ unsigned int pad0:3;
+ unsigned int sf_viewport_state_offset:27;
+ } sf5;
+
+ struct
+ {
+ unsigned int pad0:9;
+ unsigned int dest_org_vbias:4;
+ unsigned int dest_org_hbias:4;
+ unsigned int scissor:1;
+ unsigned int disable_2x2_trifilter:1;
+ unsigned int disable_zero_pix_trifilter:1;
+ unsigned int point_rast_rule:2;
+ unsigned int line_endcap_aa_region_width:2;
+ unsigned int line_width:4;
+ unsigned int fast_scissor_disable:1;
+ unsigned int cull_mode:2;
+ unsigned int aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ unsigned int point_size:11;
+ unsigned int use_point_size_state:1;
+ unsigned int subpixel_precision:1;
+ unsigned int sprite_point:1;
+ unsigned int pad0:11;
+ unsigned int trifan_pv:2;
+ unsigned int linestrip_pv:2;
+ unsigned int tristrip_pv:2;
+ unsigned int line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+
+struct gen4_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:1;
+ unsigned int pad3:6;
+ } thread4;
+
+ struct
+ {
+ unsigned int sampler_count:3;
+ unsigned int pad0:2;
+ unsigned int sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ unsigned int max_vp_index:4;
+ unsigned int pad0:26;
+ unsigned int reorder_enable:1;
+ unsigned int pad1:1;
+ } gs6;
+};
+
+
+struct gen4_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:4;
+ unsigned int pad3:3;
+ } thread4;
+
+ struct
+ {
+ unsigned int sampler_count:3;
+ unsigned int pad0:2;
+ unsigned int sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ unsigned int vs_enable:1;
+ unsigned int vert_cache_disable:1;
+ unsigned int pad0:30;
+ } vs6;
+};
+
+
+struct gen4_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ unsigned int stats_enable:1;
+ unsigned int pad0:1;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ unsigned int enable_8_pix:1;
+ unsigned int enable_16_pix:1;
+ unsigned int enable_32_pix:1;
+ unsigned int pad0:7;
+ unsigned int legacy_global_depth_bias:1;
+ unsigned int line_stipple:1;
+ unsigned int depth_offset:1;
+ unsigned int polygon_stipple:1;
+ unsigned int line_aa_region_width:2;
+ unsigned int line_endcap_aa_region_width:2;
+ unsigned int early_depth_test:1;
+ unsigned int thread_dispatch_enable:1;
+ unsigned int program_uses_depth:1;
+ unsigned int program_computes_depth:1;
+ unsigned int program_uses_killpixel:1;
+ unsigned int legacy_line_rast: 1;
+ unsigned int transposed_urb_read:1;
+ unsigned int max_threads:7;
+ } wm5;
+
+ float global_depth_offset_constant;
+ float global_depth_offset_scale;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_1:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_2:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_3:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_3:26;
+ } wm10;
+};
+
+struct gen4_wm_unit_state_padded {
+ struct gen4_wm_unit_state state;
+ char pad[64 - sizeof(struct gen4_wm_unit_state)];
+};
+
+/* The hardware supports two different modes for border color. The
+ * default (OpenGL) mode uses floating-point color channels, while the
+ * legacy mode uses 4 bytes.
+ *
+ * More significantly, the legacy mode respects the components of the
+ * border color for channels not present in the source, (whereas the
+ * default mode will ignore the border color's alpha channel and use
+ * alpha==1 for an RGB source, for example).
+ *
+ * The legacy mode matches the semantics specified by the Render
+ * extension.
+ */
+struct gen4_sampler_default_border_color {
+ float color[4];
+};
+
+struct gen4_sampler_legacy_border_color {
+ uint8_t color[4];
+};
+
+struct gen4_sampler_state
+{
+
+ struct
+ {
+ unsigned int shadow_function:3;
+ unsigned int lod_bias:11;
+ unsigned int min_filter:3;
+ unsigned int mag_filter:3;
+ unsigned int mip_filter:2;
+ unsigned int base_level:5;
+ unsigned int pad:1;
+ unsigned int lod_preclamp:1;
+ unsigned int border_color_mode:1;
+ unsigned int pad0:1;
+ unsigned int disable:1;
+ } ss0;
+
+ struct
+ {
+ unsigned int r_wrap_mode:3;
+ unsigned int t_wrap_mode:3;
+ unsigned int s_wrap_mode:3;
+ unsigned int pad:3;
+ unsigned int max_lod:10;
+ unsigned int min_lod:10;
+ } ss1;
+
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int border_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ unsigned int pad:19;
+ unsigned int max_aniso:3;
+ unsigned int chroma_key_mode:1;
+ unsigned int chroma_key_index:2;
+ unsigned int chroma_key_enable:1;
+ unsigned int monochrome_filter_width:3;
+ unsigned int monochrome_filter_height:3;
+ } ss3;
+};
+
+
+struct gen4_clipper_viewport
+{
+ float xmin;
+ float xmax;
+ float ymin;
+ float ymax;
+};
+
+struct gen4_cc_viewport
+{
+ float min_depth;
+ float max_depth;
+};
+
+struct gen4_sf_viewport
+{
+ struct {
+ float m00;
+ float m11;
+ float m22;
+ float m30;
+ float m31;
+ float m32;
+ } viewport;
+
+ struct {
+ short xmin;
+ short ymin;
+ short xmax;
+ short ymax;
+ } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct gen4_surface_state
+{
+ struct {
+ unsigned int cube_pos_z:1;
+ unsigned int cube_neg_z:1;
+ unsigned int cube_pos_y:1;
+ unsigned int cube_neg_y:1;
+ unsigned int cube_pos_x:1;
+ unsigned int cube_neg_x:1;
+ unsigned int pad:3;
+ unsigned int render_cache_read_mode:1;
+ unsigned int mipmap_layout_mode:1;
+ unsigned int vert_line_stride_ofs:1;
+ unsigned int vert_line_stride:1;
+ unsigned int color_blend:1;
+ unsigned int writedisable_blue:1;
+ unsigned int writedisable_green:1;
+ unsigned int writedisable_red:1;
+ unsigned int writedisable_alpha:1;
+ unsigned int surface_format:9;
+ unsigned int data_return_format:1;
+ unsigned int pad0:1;
+ unsigned int surface_type:3;
+ } ss0;
+
+ struct {
+ unsigned int base_addr;
+ } ss1;
+
+ struct {
+ unsigned int render_target_rotation:2;
+ unsigned int mip_count:4;
+ unsigned int width:13;
+ unsigned int height:13;
+ } ss2;
+
+ struct {
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int pad:1;
+ unsigned int pitch:18;
+ unsigned int depth:11;
+ } ss3;
+
+ struct {
+ unsigned int pad:19;
+ unsigned int min_array_elt:9;
+ unsigned int min_lod:4;
+ } ss4;
+
+ struct {
+ unsigned int pad:20;
+ unsigned int y_offset:4;
+ unsigned int pad2:1;
+ unsigned int x_offset:7;
+ } ss5;
+};
+
+
+
+struct gen4_vertex_buffer_state
+{
+ struct {
+ unsigned int pitch:11;
+ unsigned int pad:15;
+ unsigned int access_type:1;
+ unsigned int vb_index:5;
+ } vb0;
+
+ unsigned int start_addr;
+ unsigned int max_index;
+#if 1
+ unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define GEN4_VBP_MAX 17
+
+struct gen4_vb_array_state {
+ struct header header;
+ struct gen4_vertex_buffer_state vb[GEN4_VBP_MAX];
+};
+
+
+struct gen4_vertex_element_state
+{
+ struct
+ {
+ unsigned int src_offset:11;
+ unsigned int pad:5;
+ unsigned int src_format:9;
+ unsigned int pad0:1;
+ unsigned int valid:1;
+ unsigned int vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ unsigned int dst_offset:8;
+ unsigned int pad:8;
+ unsigned int vfcomponent3:4;
+ unsigned int vfcomponent2:4;
+ unsigned int vfcomponent1:4;
+ unsigned int vfcomponent0:4;
+ } ve1;
+};
+
+#define GEN4_VEP_MAX 18
+
+struct gen4_vertex_element_packet {
+ struct header header;
+ struct gen4_vertex_element_state ve[GEN4_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct gen4_urb_immediate {
+ unsigned int opcode:4;
+ unsigned int offset:6;
+ unsigned int swizzle_control:2;
+ unsigned int pad:1;
+ unsigned int allocate:1;
+ unsigned int used:1;
+ unsigned int complete:1;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct gen4_instruction
+{
+ struct
+ {
+ unsigned int opcode:7;
+ unsigned int pad:1;
+ unsigned int access_mode:1;
+ unsigned int mask_control:1;
+ unsigned int dependency_control:2;
+ unsigned int compression_control:2;
+ unsigned int thread_control:2;
+ unsigned int predicate_control:4;
+ unsigned int predicate_inverse:1;
+ unsigned int execution_size:3;
+ unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+ unsigned int pad0:2;
+ unsigned int debug_control:1;
+ unsigned int saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int src1_reg_file:2;
+ unsigned int src1_reg_type:3;
+ unsigned int pad:1;
+ unsigned int dest_subreg_nr:5;
+ unsigned int dest_reg_nr:8;
+ unsigned int dest_horiz_stride:2;
+ unsigned int dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int pad:6;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ unsigned int dest_horiz_stride:2;
+ unsigned int dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int src1_reg_file:2;
+ unsigned int src1_reg_type:3;
+ unsigned int pad0:1;
+ unsigned int dest_writemask:4;
+ unsigned int dest_subreg_nr:1;
+ unsigned int dest_reg_nr:8;
+ unsigned int pad1:2;
+ unsigned int dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int pad0:6;
+ unsigned int dest_writemask:4;
+ int dest_indirect_offset:6;
+ unsigned int dest_subreg_nr:3;
+ unsigned int pad1:2;
+ unsigned int dest_address_mode:1;
+ } ia16;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ unsigned int src0_subreg_nr:5;
+ unsigned int src0_reg_nr:8;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_horiz_stride:2;
+ unsigned int src0_width:3;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad:6;
+ } da1;
+
+ struct
+ {
+ int src0_indirect_offset:10;
+ unsigned int src0_subreg_nr:3;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_horiz_stride:2;
+ unsigned int src0_width:3;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad:6;
+ } ia1;
+
+ struct
+ {
+ unsigned int src0_swz_x:2;
+ unsigned int src0_swz_y:2;
+ unsigned int src0_subreg_nr:1;
+ unsigned int src0_reg_nr:8;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_swz_z:2;
+ unsigned int src0_swz_w:2;
+ unsigned int pad0:1;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } da16;
+
+ struct
+ {
+ unsigned int src0_swz_x:2;
+ unsigned int src0_swz_y:2;
+ int src0_indirect_offset:6;
+ unsigned int src0_subreg_nr:3;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_swz_z:2;
+ unsigned int src0_swz_w:2;
+ unsigned int pad0:1;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } ia16;
+
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ unsigned int src1_subreg_nr:5;
+ unsigned int src1_reg_nr:8;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad:1;
+ unsigned int src1_horiz_stride:2;
+ unsigned int src1_width:3;
+ unsigned int src1_vert_stride:4;
+ unsigned int pad0:7;
+ } da1;
+
+ struct
+ {
+ unsigned int src1_swz_x:2;
+ unsigned int src1_swz_y:2;
+ unsigned int src1_subreg_nr:1;
+ unsigned int src1_reg_nr:8;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_swz_z:2;
+ unsigned int src1_swz_w:2;
+ unsigned int pad1:1;
+ unsigned int src1_vert_stride:4;
+ unsigned int pad2:7;
+ } da16;
+
+ struct
+ {
+ int src1_indirect_offset:10;
+ unsigned int src1_subreg_nr:3;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_horiz_stride:2;
+ unsigned int src1_width:3;
+ unsigned int src1_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } ia1;
+
+ struct
+ {
+ unsigned int src1_swz_x:2;
+ unsigned int src1_swz_y:2;
+ int src1_indirect_offset:6;
+ unsigned int src1_subreg_nr:3;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_swz_z:2;
+ unsigned int src1_swz_w:2;
+ unsigned int pad1:1;
+ unsigned int src1_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ int jump_count:16; /* note: signed */
+ unsigned int pop_count:4;
+ unsigned int pad0:12;
+ } if_else;
+
+ struct {
+ unsigned int function:4;
+ unsigned int int_type:1;
+ unsigned int precision:1;
+ unsigned int saturate:1;
+ unsigned int data_type:1;
+ unsigned int pad0:8;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } math;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int sampler:4;
+ unsigned int return_format:2;
+ unsigned int msg_type:2;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } sampler;
+
+ struct gen4_urb_immediate urb;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int msg_control:4;
+ unsigned int msg_type:2;
+ unsigned int target_cache:2;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } dp_read;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int msg_control:3;
+ unsigned int pixel_scoreboard_clear:1;
+ unsigned int msg_type:3;
+ unsigned int send_commit_msg:1;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } dp_write;
+
+ struct {
+ unsigned int pad:16;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } generic;
+
+ unsigned int ud;
+ } bits3;
+};
+
+/* media pipeline */
+
+struct gen4_vfe_state {
+ struct {
+ unsigned int per_thread_scratch_space:4;
+ unsigned int pad3:3;
+ unsigned int extend_vfe_state_present:1;
+ unsigned int pad2:2;
+ unsigned int scratch_base:22;
+ } vfe0;
+
+ struct {
+ unsigned int debug_counter_control:2;
+ unsigned int children_present:1;
+ unsigned int vfe_mode:4;
+ unsigned int pad2:2;
+ unsigned int num_urb_entries:7;
+ unsigned int urb_entry_alloc_size:9;
+ unsigned int max_threads:7;
+ } vfe1;
+
+ struct {
+ unsigned int pad4:4;
+ unsigned int interface_descriptor_base:28;
+ } vfe2;
+};
+
+struct gen4_vld_state {
+ struct {
+ unsigned int pad6:6;
+ unsigned int scan_order:1;
+ unsigned int intra_vlc_format:1;
+ unsigned int quantizer_scale_type:1;
+ unsigned int concealment_motion_vector:1;
+ unsigned int frame_predict_frame_dct:1;
+ unsigned int top_field_first:1;
+ unsigned int picture_structure:2;
+ unsigned int intra_dc_precision:2;
+ unsigned int f_code_0_0:4;
+ unsigned int f_code_0_1:4;
+ unsigned int f_code_1_0:4;
+ unsigned int f_code_1_1:4;
+ } vld0;
+
+ struct {
+ unsigned int pad2:9;
+ unsigned int picture_coding_type:2;
+ unsigned int pad:21;
+ } vld1;
+
+ struct {
+ unsigned int index_0:4;
+ unsigned int index_1:4;
+ unsigned int index_2:4;
+ unsigned int index_3:4;
+ unsigned int index_4:4;
+ unsigned int index_5:4;
+ unsigned int index_6:4;
+ unsigned int index_7:4;
+ } desc_remap_table0;
+
+ struct {
+ unsigned int index_8:4;
+ unsigned int index_9:4;
+ unsigned int index_10:4;
+ unsigned int index_11:4;
+ unsigned int index_12:4;
+ unsigned int index_13:4;
+ unsigned int index_14:4;
+ unsigned int index_15:4;
+ } desc_remap_table1;
+};
+
+struct gen4_interface_descriptor {
+ struct {
+ unsigned int grf_reg_blocks:4;
+ unsigned int pad:2;
+ unsigned int kernel_start_pointer:26;
+ } desc0;
+
+ struct {
+ unsigned int pad:7;
+ unsigned int software_exception:1;
+ unsigned int pad2:3;
+ unsigned int maskstack_exception:1;
+ unsigned int pad3:1;
+ unsigned int illegal_opcode_exception:1;
+ unsigned int pad4:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int single_program_flow:1;
+ unsigned int pad5:1;
+ unsigned int const_urb_entry_read_offset:6;
+ unsigned int const_urb_entry_read_len:6;
+ } desc1;
+
+ struct {
+ unsigned int pad:2;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } desc2;
+
+ struct {
+ unsigned int binding_table_entry_count:5;
+ unsigned int binding_table_pointer:27;
+ } desc3;
+};
+
+struct gen6_blend_state
+{
+ struct {
+ unsigned int dest_blend_factor:5;
+ unsigned int source_blend_factor:5;
+ unsigned int pad3:1;
+ unsigned int blend_func:3;
+ unsigned int pad2:1;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_source_blend_factor:5;
+ unsigned int pad1:1;
+ unsigned int ia_blend_func:3;
+ unsigned int pad0:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int blend_enable:1;
+ } blend0;
+
+ struct {
+ unsigned int post_blend_clamp_enable:1;
+ unsigned int pre_blend_clamp_enable:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:4;
+ unsigned int x_dither_offset:2;
+ unsigned int y_dither_offset:2;
+ unsigned int dither_enable:1;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test_enable:1;
+ unsigned int pad1:1;
+ unsigned int logic_op_func:4;
+ unsigned int logic_op_enable:1;
+ unsigned int pad2:1;
+ unsigned int write_disable_b:1;
+ unsigned int write_disable_g:1;
+ unsigned int write_disable_r:1;
+ unsigned int write_disable_a:1;
+ unsigned int pad3:1;
+ unsigned int alpha_to_coverage_dither:1;
+ unsigned int alpha_to_one:1;
+ unsigned int alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state
+{
+ struct {
+ unsigned int alpha_test_format:1;
+ unsigned int pad0:14;
+ unsigned int round_disable:1;
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_ref:8;
+ } cc0;
+
+ union {
+ float alpha_ref_f;
+ struct {
+ unsigned int ui:8;
+ unsigned int pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ float constant_r;
+ float constant_g;
+ float constant_b;
+ float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } ds0;
+
+ struct {
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ unsigned int pad0:26;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_func:3;
+ unsigned int pad1:1;
+ unsigned int depth_test_enable:1;
+ } ds2;
+};
+
+typedef enum {
+ SAMPLER_FILTER_NEAREST = 0,
+ SAMPLER_FILTER_BILINEAR,
+ FILTER_COUNT
+} sampler_filter_t;
+
+typedef enum {
+ SAMPLER_EXTEND_NONE = 0,
+ SAMPLER_EXTEND_REPEAT,
+ SAMPLER_EXTEND_PAD,
+ SAMPLER_EXTEND_REFLECT,
+ EXTEND_COUNT
+} sampler_extend_t;
+
+typedef enum {
+ WM_KERNEL = 0,
+ WM_KERNEL_PROJECTIVE,
+
+ WM_KERNEL_MASK,
+ WM_KERNEL_MASK_PROJECTIVE,
+
+ WM_KERNEL_MASKCA,
+ WM_KERNEL_MASKCA_PROJECTIVE,
+
+ WM_KERNEL_MASKCA_SRCALPHA,
+ WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+
+ WM_KERNEL_VIDEO_PLANAR,
+ WM_KERNEL_VIDEO_PACKED,
+ KERNEL_COUNT
+} wm_kernel_t;
+
+#endif
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
new file mode 100644
index 0000000..72afe98
--- a/dev/null
+++ b/src/sna/gen5_render.c
@@ -0,0 +1,2841 @@
+/*
+ * Copyright © 2006,2008,2011 Intel Corporation
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@sna.com>
+ * Eric Anholt <eric@anholt.net>
+ * Carl Worth <cworth@redhat.com>
+ * Keith Packard <keithp@keithp.com>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xf86.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "sna_video.h"
+
+#include "gen5_render.h"
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define GEN5_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
+
+/* Set up a default static partitioning of the URB, which is supposed to
+ * allow anything we would want to do, at potentially lower performance.
+ */
+#define URB_CS_ENTRY_SIZE 1
+#define URB_CS_ENTRIES 0
+
+#define URB_VS_ENTRY_SIZE 1 // each 512-bit row
+#define URB_VS_ENTRIES 8 // we needs at least 8 entries
+
+#define URB_GS_ENTRY_SIZE 0
+#define URB_GS_ENTRIES 0
+
+#define URB_CLIP_ENTRY_SIZE 0
+#define URB_CLIP_ENTRIES 0
+
+#define URB_SF_ENTRY_SIZE 2
+#define URB_SF_ENTRIES 1
+
+/*
+ * this program computes dA/dx and dA/dy for the texture coordinates along
+ * with the base texture coordinate. It was extracted from the Mesa driver
+ */
+
+#define SF_KERNEL_NUM_GRF 16
+#define SF_MAX_THREADS 2
+
+#define PS_KERNEL_NUM_GRF 32
+#define PS_MAX_THREADS 48
+
+static const uint32_t sf_kernel[][4] = {
+#include "exa_sf.g4b.gen5"
+};
+
+static const uint32_t sf_kernel_mask[][4] = {
+#include "exa_sf_mask.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_nomask_affine[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_nomask_projective[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_projective.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_maskca_affine[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_mask_affine.g4b.gen5"
+#include "exa_wm_mask_sample_argb.g4b.gen5"
+#include "exa_wm_ca.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_maskca_projective[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_projective.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_mask_projective.g4b.gen5"
+#include "exa_wm_mask_sample_argb.g4b.gen5"
+#include "exa_wm_ca.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_a.g4b.gen5"
+#include "exa_wm_mask_affine.g4b.gen5"
+#include "exa_wm_mask_sample_argb.g4b.gen5"
+#include "exa_wm_ca_srcalpha.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_projective.g4b.gen5"
+#include "exa_wm_src_sample_a.g4b.gen5"
+#include "exa_wm_mask_projective.g4b.gen5"
+#include "exa_wm_mask_sample_argb.g4b.gen5"
+#include "exa_wm_ca_srcalpha.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_masknoca_affine[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_mask_affine.g4b.gen5"
+#include "exa_wm_mask_sample_a.g4b.gen5"
+#include "exa_wm_noca.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_masknoca_projective[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_projective.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_mask_projective.g4b.gen5"
+#include "exa_wm_mask_sample_a.g4b.gen5"
+#include "exa_wm_noca.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_packed_static[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_yuv_rgb.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_planar_static[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_planar.g4b.gen5"
+#include "exa_wm_yuv_rgb.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+#define KERNEL(kernel_enum, kernel, masked) \
+ [kernel_enum] = {&kernel, sizeof(kernel), masked}
+static const struct wm_kernel_info {
+ const void *data;
+ unsigned int size;
+ Bool has_mask;
+} wm_kernels[] = {
+ KERNEL(WM_KERNEL, ps_kernel_nomask_affine, FALSE),
+ KERNEL(WM_KERNEL_PROJECTIVE, ps_kernel_nomask_projective, FALSE),
+
+ KERNEL(WM_KERNEL_MASK, ps_kernel_masknoca_affine, TRUE),
+ KERNEL(WM_KERNEL_MASK_PROJECTIVE, ps_kernel_masknoca_projective, TRUE),
+
+ KERNEL(WM_KERNEL_MASKCA, ps_kernel_maskca_affine, TRUE),
+ KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, ps_kernel_maskca_projective, TRUE),
+
+ KERNEL(WM_KERNEL_MASKCA_SRCALPHA,
+ ps_kernel_maskca_srcalpha_affine, TRUE),
+ KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+ ps_kernel_maskca_srcalpha_projective, TRUE),
+
+ KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, FALSE),
+ KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, FALSE),
+};
+#undef KERNEL
+
+static const struct blendinfo {
+ Bool src_alpha;
+ uint32_t src_blend;
+ uint32_t dst_blend;
+} gen5_blend_op[] = {
+ /* Clear */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ZERO},
+ /* Src */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ZERO},
+ /* Dst */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ONE},
+ /* Over */ {1, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
+ /* OverReverse */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ONE},
+ /* In */ {0, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
+ /* InReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_SRC_ALPHA},
+ /* Out */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
+ /* OutReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
+ /* Atop */ {1, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
+ /* AtopReverse */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_SRC_ALPHA},
+ /* Xor */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
+ /* Add */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ONE},
+};
+
+/**
+ * Highest-valued BLENDFACTOR used in gen5_blend_op.
+ *
+ * This leaves out GEN5_BLENDFACTOR_INV_DST_COLOR,
+ * GEN5_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
+ * GEN5_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
+ */
+#define GEN5_BLENDFACTOR_COUNT (GEN5_BLENDFACTOR_INV_DST_ALPHA + 1)
+
+/* FIXME: surface format defined in gen5_defines.h, shared Sampling engine
+ * 1.7.2
+ */
+static const struct formatinfo {
+ CARD32 pict_fmt;
+ uint32_t card_fmt;
+} gen5_tex_formats[] = {
+ {PICT_a8, GEN5_SURFACEFORMAT_A8_UNORM},
+ {PICT_a8r8g8b8, GEN5_SURFACEFORMAT_B8G8R8A8_UNORM},
+ {PICT_x8r8g8b8, GEN5_SURFACEFORMAT_B8G8R8X8_UNORM},
+ {PICT_a8b8g8r8, GEN5_SURFACEFORMAT_R8G8B8A8_UNORM},
+ {PICT_x8b8g8r8, GEN5_SURFACEFORMAT_R8G8B8X8_UNORM},
+ {PICT_r8g8b8, GEN5_SURFACEFORMAT_R8G8B8_UNORM},
+ {PICT_r5g6b5, GEN5_SURFACEFORMAT_B5G6R5_UNORM},
+ {PICT_a1r5g5b5, GEN5_SURFACEFORMAT_B5G5R5A1_UNORM},
+ {PICT_a2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10A2_UNORM},
+ {PICT_x2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10X2_UNORM},
+ {PICT_a2b10g10r10, GEN5_SURFACEFORMAT_R10G10B10A2_UNORM},
+ {PICT_x2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10X2_UNORM},
+ {PICT_a4r4g4b4, GEN5_SURFACEFORMAT_B4G4R4A4_UNORM},
+};
+
+#define BLEND_OFFSET(s, d) \
+ (((s) * GEN5_BLENDFACTOR_COUNT + (d)) * 64)
+
+#define SAMPLER_OFFSET(sf, se, mf, me, k) \
+ ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
+
+static bool
+gen5_emit_pipelined_pointers(struct sna *sna,
+ const struct sna_composite_op *op,
+ int blend, int kernel);
+
+#define OUT_BATCH(v) batch_emit(sna, v)
+#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
+#define OUT_VERTEX_F(v) vertex_emit(sna, v)
+
+static int
+gen5_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine)
+{
+ int base;
+
+ if (has_mask) {
+ if (is_ca) {
+ if (gen5_blend_op[op].src_alpha)
+ base = WM_KERNEL_MASKCA_SRCALPHA;
+ else
+ base = WM_KERNEL_MASKCA;
+ } else
+ base = WM_KERNEL_MASK;
+ } else
+ base = WM_KERNEL;
+
+ return base + !is_affine;
+}
+
+static void gen5_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ struct gen5_render_state *state = &sna->render_state.gen5;
+
+ if (!op->need_magic_ca_pass)
+ return;
+
+ DBG(("%s: CA fixup\n", __FUNCTION__));
+
+ gen5_emit_pipelined_pointers
+ (sna, op, PictOpAdd,
+ gen5_choose_composite_kernel(PictOpAdd,
+ TRUE, TRUE, op->is_affine));
+
+ OUT_BATCH(GEN5_3DPRIMITIVE |
+ GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) |
+ 4);
+ OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
+ OUT_BATCH(sna->render.vertex_start);
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ state->last_primitive = sna->kgem.nbatch;
+}
+
+static void gen5_vertex_flush(struct sna *sna)
+{
+ if (sna->render_state.gen5.vertex_offset == 0)
+ return;
+
+ DBG(("%s[%x] = %d\n", __FUNCTION__,
+ 4*sna->render_state.gen5.vertex_offset,
+ sna->render.vertex_index - sna->render.vertex_start));
+ sna->kgem.batch[sna->render_state.gen5.vertex_offset] =
+ sna->render.vertex_index - sna->render.vertex_start;
+ sna->render_state.gen5.vertex_offset = 0;
+
+ if (sna->render.op)
+ gen5_magic_ca_pass(sna, sna->render.op);
+}
+
+static void gen5_vertex_finish(struct sna *sna, Bool last)
+{
+ struct kgem_bo *bo;
+ int i, delta;
+
+ gen5_vertex_flush(sna);
+ if (!sna->render.vertex_used)
+ return;
+
+ /* Note: we only need dword alignment (currently) */
+
+ if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
+ DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
+ sna->render.vertex_used, sna->kgem.nbatch));
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ sna->render.vertex_data,
+ sna->render.vertex_used * 4);
+ delta = sna->kgem.nbatch * 4;
+ bo = NULL;
+ sna->kgem.nbatch += sna->render.vertex_used;
+ } else {
+ bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used);
+ if (bo && !kgem_bo_write(&sna->kgem, bo,
+ sna->render.vertex_data,
+ 4*sna->render.vertex_used)) {
+ kgem_bo_destroy(&sna->kgem, bo);
+ return;
+ }
+ delta = 0;
+ DBG(("%s: new vbo: %d\n", __FUNCTION__,
+ sna->render.vertex_used));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
+ if (sna->render.vertex_reloc[i]) {
+ DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
+ i, sna->render.vertex_reloc[i]));
+
+ sna->kgem.batch[sna->render.vertex_reloc[i]] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[i],
+ bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta);
+ sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[i]+1,
+ bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta + sna->render.vertex_used * 4 - 1);
+ sna->render.vertex_reloc[i] = 0;
+ }
+ }
+
+ if (bo)
+ kgem_bo_destroy(&sna->kgem, bo);
+
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ sna->render_state.gen5.vb_id = 0;
+}
+
+static uint32_t gen5_get_blend(int op,
+ Bool has_component_alpha,
+ uint32_t dst_format)
+{
+ uint32_t src, dst;
+
+ src = gen5_blend_op[op].src_blend;
+ dst = gen5_blend_op[op].dst_blend;
+
+ /* If there's no dst alpha channel, adjust the blend op so that we'll treat
+ * it as always 1.
+ */
+ if (PICT_FORMAT_A(dst_format) == 0) {
+ if (src == GEN5_BLENDFACTOR_DST_ALPHA)
+ src = GEN5_BLENDFACTOR_ONE;
+ else if (src == GEN5_BLENDFACTOR_INV_DST_ALPHA)
+ src = GEN5_BLENDFACTOR_ZERO;
+ }
+
+ /* If the source alpha is being used, then we should only be in a
+ * case where the source blend factor is 0, and the source blend
+ * value is the mask channels multiplied by the source picture's alpha.
+ */
+ if (has_component_alpha && gen5_blend_op[op].src_alpha) {
+ if (dst == GEN5_BLENDFACTOR_SRC_ALPHA)
+ dst = GEN5_BLENDFACTOR_SRC_COLOR;
+ else if (dst == GEN5_BLENDFACTOR_INV_SRC_ALPHA)
+ dst = GEN5_BLENDFACTOR_INV_SRC_COLOR;
+ }
+
+ DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
+ op, dst_format, PICT_FORMAT_A(dst_format),
+ src, dst, BLEND_OFFSET(src, dst)));
+ return BLEND_OFFSET(src, dst);
+}
+
+static uint32_t gen5_get_dest_format(PictFormat format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ default:
+ return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case PICT_r5g6b5:
+ return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
+ case PICT_a8:
+ return GEN5_SURFACEFORMAT_A8_UNORM;
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
+ }
+}
+
+static Bool gen5_check_dst_format(PictFormat format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_r5g6b5:
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static uint32_t gen5_get_dest_format_for_depth(int depth)
+{
+ switch (depth) {
+ case 32:
+ case 24:
+ default: return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case 30: return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case 16: return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
+ case 8: return GEN5_SURFACEFORMAT_A8_UNORM;
+ }
+}
+
+static uint32_t gen5_get_card_format_for_depth(int depth)
+{
+ switch (depth) {
+ case 32:
+ default: return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case 30: return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case 24: return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
+ case 16: return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
+ case 8: return GEN5_SURFACEFORMAT_A8_UNORM;
+ }
+}
+
+static bool gen5_format_is_dst(uint32_t format)
+{
+ switch (format) {
+ case GEN5_SURFACEFORMAT_B8G8R8A8_UNORM:
+ case GEN5_SURFACEFORMAT_R8G8B8A8_UNORM:
+ case GEN5_SURFACEFORMAT_B10G10R10A2_UNORM:
+ case GEN5_SURFACEFORMAT_B5G6R5_UNORM:
+ case GEN5_SURFACEFORMAT_B5G5R5A1_UNORM:
+ case GEN5_SURFACEFORMAT_A8_UNORM:
+ case GEN5_SURFACEFORMAT_B4G4R4A4_UNORM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+typedef struct gen5_surface_state_padded {
+ struct gen5_surface_state state;
+ char pad[32 - sizeof(struct gen5_surface_state)];
+} gen5_surface_state_padded;
+
+static void null_create(struct sna_static_stream *stream)
+{
+ /* A bunch of zeros useful for legacy border color and depth-stencil */
+ sna_static_stream_map(stream, 64, 64);
+}
+
+static void
+sampler_state_init(struct gen5_sampler_state *sampler_state,
+ sampler_filter_t filter,
+ sampler_extend_t extend)
+{
+ sampler_state->ss0.lod_preclamp = 1; /* GL mode */
+
+ /* We use the legacy mode to get the semantics specified by
+ * the Render extension. */
+ sampler_state->ss0.border_color_mode = GEN5_BORDER_COLOR_MODE_LEGACY;
+
+ switch (filter) {
+ default:
+ case SAMPLER_FILTER_NEAREST:
+ sampler_state->ss0.min_filter = GEN5_MAPFILTER_NEAREST;
+ sampler_state->ss0.mag_filter = GEN5_MAPFILTER_NEAREST;
+ break;
+ case SAMPLER_FILTER_BILINEAR:
+ sampler_state->ss0.min_filter = GEN5_MAPFILTER_LINEAR;
+ sampler_state->ss0.mag_filter = GEN5_MAPFILTER_LINEAR;
+ break;
+ }
+
+ switch (extend) {
+ default:
+ case SAMPLER_EXTEND_NONE:
+ sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
+ break;
+ case SAMPLER_EXTEND_REPEAT:
+ sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
+ break;
+ case SAMPLER_EXTEND_PAD:
+ sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
+ sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
+ sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
+ break;
+ case SAMPLER_EXTEND_REFLECT:
+ sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
+ sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
+ sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
+ break;
+ }
+}
+
+static uint32_t gen5_get_card_format(PictFormat format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(gen5_tex_formats); i++) {
+ if (gen5_tex_formats[i].pict_fmt == format)
+ return gen5_tex_formats[i].card_fmt;
+ }
+ return -1;
+}
+
+static uint32_t gen5_filter(uint32_t filter)
+{
+ switch (filter) {
+ default:
+ assert(0);
+ case PictFilterNearest:
+ return SAMPLER_FILTER_NEAREST;
+ case PictFilterBilinear:
+ return SAMPLER_FILTER_BILINEAR;
+ }
+}
+
+static uint32_t gen5_check_filter(PicturePtr picture)
+{
+ switch (picture->filter) {
+ case PictFilterNearest:
+ case PictFilterBilinear:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static uint32_t gen5_repeat(uint32_t repeat)
+{
+ switch (repeat) {
+ default:
+ assert(0);
+ case RepeatNone:
+ return SAMPLER_EXTEND_NONE;
+ case RepeatNormal:
+ return SAMPLER_EXTEND_REPEAT;
+ case RepeatPad:
+ return SAMPLER_EXTEND_PAD;
+ case RepeatReflect:
+ return SAMPLER_EXTEND_REFLECT;
+ }
+}
+
+static bool gen5_check_repeat(PicturePtr picture)
+{
+ if (!picture->repeat)
+ return TRUE;
+
+ switch (picture->repeatType) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+/**
+ * Sets up the common fields for a surface state buffer for the given
+ * picture in the given surface state buffer.
+ */
+static int
+gen5_bind_bo(struct sna *sna,
+ struct kgem_bo *bo,
+ uint32_t width,
+ uint32_t height,
+ uint32_t format,
+ Bool is_dst)
+{
+ struct gen5_surface_state *ss;
+ uint32_t domains;
+ uint16_t offset;
+
+ /* After the first bind, we manage the cache domains within the batch */
+ if (is_dst) {
+ domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
+ kgem_bo_mark_dirty(bo);
+ } else {
+ domains = I915_GEM_DOMAIN_SAMPLER << 16;
+ is_dst = gen5_format_is_dst(format);
+ }
+
+ offset = sna->kgem.surface - sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+ offset *= sizeof(uint32_t);
+
+ if (is_dst) {
+ if (bo->dst_bound)
+ return bo->dst_bound;
+
+ bo->dst_bound = offset;
+ } else {
+ if (bo->src_bound)
+ return bo->src_bound;
+
+ bo->src_bound = offset;
+ }
+
+ sna->kgem.surface -=
+ sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+ ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
+
+ ss->ss0.surface_type = GEN5_SURFACE_2D;
+ ss->ss0.surface_format = format;
+
+ ss->ss0.data_return_format = GEN5_SURFACERETURNFORMAT_FLOAT32;
+ ss->ss0.color_blend = 1;
+ ss->ss1.base_addr =
+ kgem_add_reloc(&sna->kgem,
+ sna->kgem.surface + 1,
+ bo, domains, 0);
+
+ ss->ss2.height = height - 1;
+ ss->ss2.width = width - 1;
+ ss->ss3.pitch = bo->pitch - 1;
+ ss->ss3.tile_walk = bo->tiling == I915_TILING_Y;
+ ss->ss3.tiled_surface = bo->tiling != I915_TILING_NONE;
+
+ DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
+ offset, bo->handle, ss->ss1.base_addr,
+ ss->ss0.surface_format, width, height, bo->pitch, bo->tiling,
+ domains & 0xffff ? "render" : "sampler"));
+
+ return offset;
+}
+
+fastcall static void
+gen5_emit_composite_primitive_solid(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = 1.;
+ v[2] = 1.;
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ v[4] = 0.;
+ v[5] = 1.;
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ v[7] = 0.;
+ v[8] = 0.;
+}
+
+fastcall static void
+gen5_emit_composite_primitive_identity_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ const float *sf = op->src.scale;
+ float sx, sy, *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ sx = r->src.x + op->src.offset[0];
+ sy = r->src.y + op->src.offset[1];
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (sx + r->width) * sf[0];
+ v[2] = (sy + r->height) * sf[1];
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ v[4] = sx * sf[0];
+ v[5] = v[2];
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ v[7] = v[4];
+ v[8] = sy * sf[1];
+}
+
+fastcall static void
+gen5_emit_composite_primitive_affine_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform,
+ &v[1], &v[2]);
+ v[1] *= op->src.scale[0];
+ v[2] *= op->src.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform,
+ &v[4], &v[5]);
+ v[4] *= op->src.scale[0];
+ v[5] *= op->src.scale[1];
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y,
+ op->src.transform,
+ &v[7], &v[8]);
+ v[7] *= op->src.scale[0];
+ v[8] *= op->src.scale[1];
+}
+
+fastcall static void
+gen5_emit_composite_primitive_identity_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float src_x, src_y;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (src_x + w) * op->src.scale[0];
+ v[2] = (src_y + h) * op->src.scale[1];
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[5] = dst.f;
+ v[6] = src_x * op->src.scale[0];
+ v[7] = v[2];
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
+
+ dst.p.y = r->dst.y;
+ v[10] = dst.f;
+ v[11] = v[6];
+ v[12] = src_y * op->src.scale[1];
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
+}
+
+fastcall static void
+gen5_emit_composite_primitive(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
+ Bool is_affine = op->is_affine;
+ const float *src_sf = op->src.scale;
+ const float *mask_sf = op->mask.scale;
+
+ if (is_affine) {
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1],
+ op->src.transform,
+ &src_x[0],
+ &src_y[0]);
+
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[1],
+ &src_y[1]);
+
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width,
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[2],
+ &src_y[2]);
+ } else {
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1],
+ op->src.transform,
+ &src_x[0],
+ &src_y[0],
+ &src_w[0]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[1],
+ &src_y[1],
+ &src_w[1]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width,
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[2],
+ &src_y[2],
+ &src_w[2]))
+ return;
+ }
+
+ if (op->mask.bo) {
+ if (is_affine) {
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1],
+ op->mask.transform,
+ &mask_x[0],
+ &mask_y[0]);
+
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[1],
+ &mask_y[1]);
+
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width,
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[2],
+ &mask_y[2]);
+ } else {
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1],
+ op->mask.transform,
+ &mask_x[0],
+ &mask_y[0],
+ &mask_w[0]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[1],
+ &mask_y[1],
+ &mask_w[1]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width,
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[2],
+ &mask_y[2],
+ &mask_w[2]))
+ return;
+ }
+ }
+
+ OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height);
+ OUT_VERTEX_F(src_x[2] * src_sf[0]);
+ OUT_VERTEX_F(src_y[2] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[2]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[2] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[2] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[2]);
+ }
+
+ OUT_VERTEX(r->dst.x, r->dst.y + r->height);
+ OUT_VERTEX_F(src_x[1] * src_sf[0]);
+ OUT_VERTEX_F(src_y[1] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[1]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[1] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[1] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[1]);
+ }
+
+ OUT_VERTEX(r->dst.x, r->dst.y);
+ OUT_VERTEX_F(src_x[0] * src_sf[0]);
+ OUT_VERTEX_F(src_y[0] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[0]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[0] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[0] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[0]);
+ }
+}
+
+static void gen5_emit_vertex_buffer(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int id = op->u.gen5.ve_id;
+
+ OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3);
+ OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
+ (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
+ sna->render.vertex_reloc[id] = sna->kgem.nbatch;
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ sna->render_state.gen5.vb_id |= 1 << id;
+}
+
+static void gen5_emit_primitive(struct sna *sna)
+{
+ if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) {
+ sna->render_state.gen5.vertex_offset = sna->kgem.nbatch - 5;
+ return;
+ }
+
+ OUT_BATCH(GEN5_3DPRIMITIVE |
+ GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) |
+ 4);
+ sna->render_state.gen5.vertex_offset = sna->kgem.nbatch;
+ OUT_BATCH(0); /* vertex count, to be filled in later */
+ OUT_BATCH(sna->render.vertex_index);
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+ sna->render.vertex_start = sna->render.vertex_index;
+
+ sna->render_state.gen5.last_primitive = sna->kgem.nbatch;
+}
+
+static bool gen5_rectangle_begin(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int id = op->u.gen5.ve_id;
+ int ndwords;
+
+ ndwords = 0;
+ if ((sna->render_state.gen5.vb_id & (1 << id)) == 0)
+ ndwords += 5;
+ if (sna->render_state.gen5.vertex_offset == 0)
+ ndwords += op->need_magic_ca_pass ? 20 : 6;
+ if (ndwords == 0)
+ return true;
+
+ if (!kgem_check_batch(&sna->kgem, ndwords))
+ return false;
+
+ if ((sna->render_state.gen5.vb_id & (1 << id)) == 0)
+ gen5_emit_vertex_buffer(sna, op);
+ if (sna->render_state.gen5.vertex_offset == 0)
+ gen5_emit_primitive(sna);
+
+ return true;
+}
+
+static int gen5_get_rectangles__flush(struct sna *sna)
+{
+ if (!kgem_check_batch(&sna->kgem, 25))
+ return 0;
+ if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 1)
+ return 0;
+ if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1)
+ return 0;
+
+ gen5_vertex_finish(sna, FALSE);
+ sna->render.vertex_index = 0;
+
+ return ARRAY_SIZE(sna->render.vertex_data);
+}
+
+inline static int gen5_get_rectangles(struct sna *sna,
+ const struct sna_composite_op *op,
+ int want)
+{
+ int rem = vertex_space(sna);
+
+ if (rem < 3*op->floats_per_vertex) {
+ DBG(("flushing vbo for %s: %d < %d\n",
+ __FUNCTION__, rem, 3*op->floats_per_vertex));
+ rem = gen5_get_rectangles__flush(sna);
+ if (rem == 0)
+ return 0;
+ }
+
+ if (!gen5_rectangle_begin(sna, op))
+ return 0;
+
+ if (want * op->floats_per_vertex*3 > rem)
+ want = rem / (3*op->floats_per_vertex);
+
+ sna->render.vertex_index += 3*want;
+ return want;
+}
+
+static uint32_t *
+gen5_composite_get_binding_table(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint16_t *offset)
+{
+ uint32_t *table;
+
+ sna->kgem.surface -=
+ sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+ /* Clear all surplus entries to zero in case of prefetch */
+ table = memset(sna->kgem.batch + sna->kgem.surface,
+ 0, sizeof(struct gen5_surface_state_padded));
+ *offset = sna->kgem.surface;
+
+ DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
+
+ return table;
+}
+
+static void
+gen5_emit_sip(struct sna *sna)
+{
+ /* Set system instruction pointer */
+ OUT_BATCH(GEN5_STATE_SIP | 0);
+ OUT_BATCH(0);
+}
+
+static void
+gen5_emit_urb(struct sna *sna)
+{
+ int urb_vs_start, urb_vs_size;
+ int urb_gs_start, urb_gs_size;
+ int urb_clip_start, urb_clip_size;
+ int urb_sf_start, urb_sf_size;
+ int urb_cs_start, urb_cs_size;
+
+ urb_vs_start = 0;
+ urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
+ urb_gs_start = urb_vs_start + urb_vs_size;
+ urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
+ urb_clip_start = urb_gs_start + urb_gs_size;
+ urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
+ urb_sf_start = urb_clip_start + urb_clip_size;
+ urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
+ urb_cs_start = urb_sf_start + urb_sf_size;
+ urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
+
+ OUT_BATCH(GEN5_URB_FENCE |
+ UF0_CS_REALLOC |
+ UF0_SF_REALLOC |
+ UF0_CLIP_REALLOC |
+ UF0_GS_REALLOC |
+ UF0_VS_REALLOC |
+ 1);
+ OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
+ ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
+ ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
+ OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
+ ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
+
+ /* Constant buffer state */
+ OUT_BATCH(GEN5_CS_URB_STATE | 0);
+ OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
+}
+
+static void
+gen5_emit_state_base_address(struct sna *sna)
+{
+ assert(sna->render_state.gen5.general_bo->proxy == NULL);
+ OUT_BATCH(GEN5_STATE_BASE_ADDRESS | 6);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
+ sna->kgem.nbatch,
+ sna->render_state.gen5.general_bo,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
+ sna->kgem.nbatch,
+ NULL,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+ OUT_BATCH(0); /* media */
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
+ sna->kgem.nbatch,
+ sna->render_state.gen5.general_bo,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+
+ /* upper bounds, all disabled */
+ OUT_BATCH(BASE_ADDRESS_MODIFY);
+ OUT_BATCH(0);
+ OUT_BATCH(BASE_ADDRESS_MODIFY);
+}
+
+static void
+gen5_emit_invariant(struct sna *sna)
+{
+ /* Ironlake errata workaround: Before disabling the clipper,
+ * you have to MI_FLUSH to get the pipeline idle.
+ */
+ OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
+ OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+ gen5_emit_sip(sna);
+ gen5_emit_state_base_address(sna);
+
+ sna->render_state.gen5.needs_invariant = FALSE;
+}
+
+static void
+gen5_get_batch(struct sna *sna)
+{
+ kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
+ DBG(("%s: flushing batch: %d < %d+%d\n",
+ __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
+ 150, 4*8));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen5.needs_invariant)
+ gen5_emit_invariant(sna);
+}
+
+static void
+gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op)
+{
+ if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
+ DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
+ sna->render_state.gen5.floats_per_vertex,
+ op->floats_per_vertex,
+ sna->render.vertex_index,
+ (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
+ sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
+ sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
+ sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex;
+ }
+}
+
+static void
+gen5_emit_binding_table(struct sna *sna, uint16_t offset)
+{
+ if (sna->render_state.gen5.surface_table == offset)
+ return;
+
+ sna->render_state.gen5.surface_table = offset;
+
+ /* Binding table pointers */
+ OUT_BATCH(GEN5_3DSTATE_BINDING_TABLE_POINTERS | 4);
+ OUT_BATCH(0); /* vs */
+ OUT_BATCH(0); /* gs */
+ OUT_BATCH(0); /* clip */
+ OUT_BATCH(0); /* sf */
+ /* Only the PS uses the binding table */
+ OUT_BATCH(offset*4);
+}
+
+static bool
+gen5_emit_pipelined_pointers(struct sna *sna,
+ const struct sna_composite_op *op,
+ int blend, int kernel)
+{
+ uint16_t offset = sna->kgem.nbatch, last;
+
+ OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
+ OUT_BATCH(sna->render_state.gen5.vs);
+ OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
+ OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */
+ OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]);
+ OUT_BATCH(sna->render_state.gen5.wm +
+ SAMPLER_OFFSET(op->src.filter, op->src.repeat,
+ op->mask.filter, op->mask.repeat,
+ kernel));
+ OUT_BATCH(sna->render_state.gen5.cc +
+ gen5_get_blend(blend, op->has_component_alpha, op->dst.format));
+
+ last = sna->render_state.gen5.last_pipelined_pointers;
+ if (last &&
+ sna->kgem.batch[offset + 1] == sna->kgem.batch[last + 1] &&
+ sna->kgem.batch[offset + 3] == sna->kgem.batch[last + 3] &&
+ sna->kgem.batch[offset + 4] == sna->kgem.batch[last + 4] &&
+ sna->kgem.batch[offset + 5] == sna->kgem.batch[last + 5] &&
+ sna->kgem.batch[offset + 6] == sna->kgem.batch[last + 6]) {
+ sna->kgem.nbatch = offset;
+ return false;
+ } else {
+ sna->render_state.gen5.last_pipelined_pointers = offset;
+ return true;
+ }
+}
+
+static void
+gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
+{
+ uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
+ uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
+
+ if (sna->render_state.gen5.drawrect_limit == limit &&
+ sna->render_state.gen5.drawrect_offset == offset)
+ return;
+ sna->render_state.gen5.drawrect_offset = offset;
+ sna->render_state.gen5.drawrect_limit = limit;
+
+ OUT_BATCH(GEN5_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
+ OUT_BATCH(0x00000000);
+ OUT_BATCH(limit);
+ OUT_BATCH(offset);
+}
+
+static void
+gen5_emit_vertex_elements(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ /*
+ * vertex data in vertex buffer
+ * position: (x, y)
+ * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
+ * texture coordinate 1 if (has_mask is TRUE): same as above
+ */
+ struct gen5_render_state *render = &sna->render_state.gen5;
+ Bool has_mask = op->mask.bo != NULL;
+ Bool is_affine = op->is_affine;
+ int nelem = has_mask ? 2 : 1;
+ int selem = is_affine ? 2 : 3;
+ uint32_t w_component;
+ uint32_t src_format;
+ int id = op->u.gen5.ve_id;;
+
+ if (render->ve_id == id)
+ return;
+
+ render->ve_id = id;
+
+ if (is_affine) {
+ src_format = GEN5_SURFACEFORMAT_R32G32_FLOAT;
+ w_component = GEN5_VFCOMPONENT_STORE_1_FLT;
+ } else {
+ src_format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT;
+ w_component = GEN5_VFCOMPONENT_STORE_SRC;
+ }
+
+ /* The VUE layout
+ * dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
+ * dword 4-7: position (x, y, 1.0, 1.0),
+ * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
+ * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
+ *
+ * dword 4-15 are fetched from vertex buffer
+ */
+ OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS |
+ ((2 * (2 + nelem)) + 1 - 2));
+
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_BATCH((GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
+
+ /* x,y */
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */
+ OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+
+ /* u0, v0, w0 */
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (src_format << VE0_FORMAT_SHIFT) |
+ (4 << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
+ OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+
+ /* u1, v1, w1 */
+ if (has_mask) {
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (src_format << VE0_FORMAT_SHIFT) |
+ (((1 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
+ OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ }
+}
+
+static void
+gen5_emit_state(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint16_t offset)
+{
+ gen5_emit_binding_table(sna, offset);
+ if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel))
+ gen5_emit_urb(sna);
+ gen5_emit_vertex_elements(sna, op);
+ gen5_emit_drawing_rectangle(sna, op);
+}
+
+static void gen5_bind_surfaces(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen5_get_batch(sna);
+
+ binding_table = gen5_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen5_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen5_get_dest_format(op->dst.format),
+ TRUE);
+ binding_table[1] =
+ gen5_bind_bo(sna,
+ op->src.bo, op->src.width, op->src.height,
+ op->src.card_format,
+ FALSE);
+ if (op->mask.bo)
+ binding_table[2] =
+ gen5_bind_bo(sna,
+ op->mask.bo,
+ op->mask.width,
+ op->mask.height,
+ op->mask.card_format,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table &&
+ (op->mask.bo == NULL ||
+ sna->kgem.batch[sna->render_state.gen5.surface_table+2] == binding_table[2])) {
+ sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+ offset = sna->render_state.gen5.surface_table;
+ }
+
+ gen5_emit_state(sna, op, offset);
+}
+
+
+fastcall static void
+gen5_render_composite_blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
+ r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
+ r->dst.x, r->dst.y, op->dst.x, op->dst.y,
+ r->width, r->height));
+
+ if (!gen5_get_rectangles(sna, op, 1)) {
+ gen5_bind_surfaces(sna, op);
+ gen5_get_rectangles(sna, op, 1);
+ }
+
+ op->prim_emit(sna, op, r);
+}
+
+static void
+gen5_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
+ __FUNCTION__, nbox, op->dst.x, op->dst.y,
+ op->src.offset[0], op->src.offset[1],
+ op->src.width, op->src.height,
+ op->mask.offset[0], op->mask.offset[1],
+ op->mask.width, op->mask.height));
+
+ do {
+ int nbox_this_time = gen5_get_rectangles(sna, op, nbox);
+ if (nbox_this_time == 0) {
+ gen5_bind_surfaces(sna, op);
+ nbox_this_time = gen5_get_rectangles(sna, op, nbox);
+ }
+ nbox -= nbox_this_time;
+
+ do {
+ struct sna_composite_rectangles r;
+
+ DBG((" %s: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2));
+
+ r.dst.x = box->x1;
+ r.dst.y = box->y1;
+ r.width = box->x2 - box->x1;
+ r.height = box->y2 - box->y1;
+ r.mask = r.src = r.dst;
+ op->prim_emit(sna, op, &r);
+ box++;
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+#ifndef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
+static uint32_t gen5_bind_video_source(struct sna *sna,
+ struct kgem_bo *src_bo,
+ uint32_t src_offset,
+ int src_width,
+ int src_height,
+ int src_pitch,
+ uint32_t src_surf_format)
+{
+ struct gen5_surface_state *ss;
+
+ sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+
+ ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
+ ss->ss0.surface_type = GEN5_SURFACE_2D;
+ ss->ss0.surface_format = src_surf_format;
+ ss->ss0.color_blend = 1;
+
+ ss->ss1.base_addr =
+ kgem_add_reloc(&sna->kgem,
+ sna->kgem.surface + 1,
+ src_bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ src_offset);
+
+ ss->ss2.width = src_width - 1;
+ ss->ss2.height = src_height - 1;
+ ss->ss3.pitch = src_pitch - 1;
+
+ return sna->kgem.surface * sizeof(uint32_t);
+}
+
+static void gen5_video_bind_surfaces(struct sna *sna,
+ struct sna_composite_op *op,
+ struct sna_video_frame *frame)
+{
+ uint32_t src_surf_format;
+ uint32_t src_surf_base[6];
+ int src_width[6];
+ int src_height[6];
+ int src_pitch[6];
+ uint32_t *binding_table;
+ int n_src, n;
+ uint16_t offset;
+
+
+ src_surf_base[0] = frame->YBufOffset;
+ src_surf_base[1] = frame->YBufOffset;
+ src_surf_base[2] = frame->VBufOffset;
+ src_surf_base[3] = frame->VBufOffset;
+ src_surf_base[4] = frame->UBufOffset;
+ src_surf_base[5] = frame->UBufOffset;
+
+ if (is_planar_fourcc(frame->id)) {
+ src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM;
+ src_width[1] = src_width[0] = frame->width;
+ src_height[1] = src_height[0] = frame->height;
+ src_pitch[1] = src_pitch[0] = frame->pitch[1];
+ src_width[4] = src_width[5] = src_width[2] = src_width[3] =
+ frame->width / 2;
+ src_height[4] = src_height[5] = src_height[2] = src_height[3] =
+ frame->height / 2;
+ src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
+ frame->pitch[0];
+ n_src = 6;
+ } else {
+ if (frame->id == FOURCC_UYVY)
+ src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY;
+ else
+ src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL;
+
+ src_width[0] = frame->width;
+ src_height[0] = frame->height;
+ src_pitch[0] = frame->pitch[0];
+ n_src = 1;
+ }
+
+ gen5_get_batch(sna);
+ binding_table = gen5_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen5_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen5_get_dest_format_for_depth(op->dst.format),
+ TRUE);
+ for (n = 0; n < n_src; n++) {
+ binding_table[1+n] =
+ gen5_bind_video_source(sna,
+ frame->bo,
+ src_surf_base[n],
+ src_width[n],
+ src_height[n],
+ src_pitch[n],
+ src_surf_format);
+ }
+
+ gen5_emit_state(sna, op, offset);
+}
+
+static Bool
+gen5_render_video(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ RegionPtr dstRegion,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ PixmapPtr pixmap)
+{
+ struct sna_composite_op tmp;
+ int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ float src_scale_x, src_scale_y;
+ struct sna_pixmap *priv;
+ BoxPtr box;
+
+ DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h));
+
+ priv = sna_pixmap_force_to_gpu(pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = PictOpSrc;
+ tmp.dst.pixmap = pixmap;
+ tmp.dst.width = pixmap->drawable.width;
+ tmp.dst.height = pixmap->drawable.height;
+ tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
+ tmp.dst.bo = priv->gpu_bo;
+
+ tmp.src.filter = SAMPLER_FILTER_BILINEAR;
+ tmp.src.repeat = SAMPLER_EXTEND_NONE;
+ tmp.u.gen5.wm_kernel =
+ is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
+ tmp.u.gen5.ve_id = 1;
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+
+ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, frame->bo))
+ kgem_submit(&sna->kgem);
+
+ if (!kgem_bo_is_dirty(frame->bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen5_video_bind_surfaces(sna, &tmp, frame);
+ gen5_align_vertex(sna, &tmp);
+
+ /* Set up the offset for translating from the given region (in screen
+ * coordinates) to the backing pixmap.
+ */
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+
+ dxo = dstRegion->extents.x1;
+ dyo = dstRegion->extents.y1;
+
+ /* Use normalized texture coordinates */
+ src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
+ src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
+
+ box = REGION_RECTS(dstRegion);
+ nbox = REGION_NUM_RECTS(dstRegion);
+ while (nbox--) {
+ BoxRec r;
+
+ r.x1 = box->x1 + pix_xoff;
+ r.x2 = box->x2 + pix_xoff;
+ r.y1 = box->y1 + pix_yoff;
+ r.y2 = box->y2 + pix_yoff;
+
+ if (!gen5_get_rectangles(sna, &tmp, 1)) {
+ gen5_video_bind_surfaces(sna, &tmp, frame);
+ gen5_get_rectangles(sna, &tmp, 1);
+ }
+
+ OUT_VERTEX(r.x2, r.y2);
+ OUT_VERTEX_F((box->x2 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+
+ OUT_VERTEX(r.x1, r.y2);
+ OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+
+ OUT_VERTEX(r.x1, r.y1);
+ OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
+
+ sna_damage_add_box(&priv->gpu_damage, &r);
+ sna_damage_subtract_box(&priv->cpu_damage, &r);
+ box++;
+ }
+
+ return TRUE;
+}
+
+static int
+gen5_composite_solid_init(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color)
+{
+ channel->filter = PictFilterNearest;
+ channel->repeat = RepeatNormal;
+ channel->is_affine = TRUE;
+ channel->is_solid = TRUE;
+ channel->transform = NULL;
+ channel->width = 1;
+ channel->height = 1;
+ channel->card_format = GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ channel->bo = sna_render_get_solid(sna, color);
+
+ channel->scale[0] = channel->scale[1] = 1;
+ channel->offset[0] = channel->offset[1] = 0;
+ return channel->bo != NULL;
+}
+
+static int
+gen5_composite_picture(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int w, int h,
+ int dst_x, int dst_y)
+{
+ PixmapPtr pixmap;
+ uint32_t color;
+ int16_t dx, dy;
+
+ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ channel->is_solid = FALSE;
+ channel->card_format = -1;
+
+ if (sna_picture_is_solid(picture, &color))
+ return gen5_composite_solid_init(sna, channel, color);
+
+ if (picture->pDrawable == NULL)
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen5_check_repeat(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen5_check_filter(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+ channel->filter = picture->filter;
+
+ pixmap = get_drawable_pixmap(picture->pDrawable);
+ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
+
+ x += dx + picture->pDrawable->x;
+ y += dy + picture->pDrawable->y;
+
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ x += dx;
+ y += dy;
+ channel->transform = NULL;
+ channel->filter = PictFilterNearest;
+ } else
+ channel->transform = picture->transform;
+
+ channel->card_format = gen5_get_card_format(picture->format);
+ if (channel->card_format == -1)
+ return sna_render_picture_convert(sna, picture, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+
+ if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192)
+ return sna_render_picture_extract(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ return sna_render_pixmap_bo(sna, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+}
+
+static void gen5_composite_channel_convert(struct sna_composite_channel *channel)
+{
+ channel->repeat = gen5_repeat(channel->repeat);
+ channel->filter = gen5_filter(channel->filter);
+ if (channel->card_format == -1)
+ channel->card_format = gen5_get_card_format(channel->pict_format);
+}
+
+static void
+gen5_render_composite_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ gen5_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ sna->render.op = NULL;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ sna_render_composite_redirect_done(sna, op);
+
+ if (op->src.bo)
+ kgem_bo_destroy(&sna->kgem, op->src.bo);
+ if (op->mask.bo)
+ kgem_bo_destroy(&sna->kgem, op->mask.bo);
+}
+
+static Bool
+gen5_composite_set_target(struct sna *sna,
+ PicturePtr dst,
+ struct sna_composite_op *op)
+{
+ struct sna_pixmap *priv;
+
+ DBG(("%s: dst=%p\n", __FUNCTION__, dst));
+
+ if (!gen5_check_dst_format(dst->format)) {
+ DBG(("%s: incompatible dst format %08x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+ op->dst.width = op->dst.pixmap->drawable.width;
+ op->dst.height = op->dst.pixmap->drawable.height;
+ op->dst.format = dst->format;
+ priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ DBG(("%s: pixmap=%p, format=%08x\n", __FUNCTION__,
+ op->dst.pixmap, (unsigned int)op->dst.format));
+
+
+ op->dst.bo = priv->gpu_bo;
+ if (!priv->gpu_only)
+ op->damage = &priv->gpu_damage;
+
+ DBG(("%s: bo=%p, damage=%p\n", __FUNCTION__, op->dst.bo, op->damage));
+
+ get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
+ &op->dst.x, &op->dst.y);
+ return TRUE;
+}
+
+static inline Bool
+picture_is_cpu(PicturePtr picture)
+{
+ if (!picture->pDrawable)
+ return FALSE;
+
+ /* If it is a solid, try to use the render paths */
+ if (picture->pDrawable->width == 1 &&
+ picture->pDrawable->height == 1 &&
+ picture->repeat)
+ return FALSE;
+
+ return is_cpu(picture->pDrawable);
+}
+
+static Bool
+try_blt(struct sna *sna,
+ PicturePtr dst,
+ PicturePtr source,
+ int width, int height)
+{
+ if (sna->kgem.mode == KGEM_BLT) {
+ DBG(("%s: already performing BLT\n", __FUNCTION__));
+ return TRUE;
+ }
+
+ if (width > 8192 || height > 8192) {
+ DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
+ __FUNCTION__, width, height));
+ return TRUE;
+ }
+
+ /* is the source picture only in cpu memory e.g. a shm pixmap? */
+ return picture_is_cpu(source);
+}
+
+static Bool
+gen5_render_composite(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t msk_x, int16_t msk_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
+ width, height, sna->kgem.mode));
+
+ if (mask == NULL &&
+ try_blt(sna, dst, src, width, height) &&
+ sna_blt_composite(sna, op,
+ src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height, tmp))
+ return TRUE;
+
+ if (op >= ARRAY_SIZE(gen5_blend_op)) {
+ DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (need_tiling(sna, width, height))
+ return sna_tiling_composite(sna,
+ op, src, mask, dst,
+ src_x, src_y,
+ msk_x, msk_y,
+ dst_x, dst_y,
+ width, height,
+ tmp);
+
+ if (!gen5_composite_set_target(sna, dst, tmp)) {
+ DBG(("%s: failed to set composite target\n", __FUNCTION__));
+ return FALSE;
+ }
+
+ if (tmp->dst.width > 8192 || tmp->dst.height > 8192) {
+ if (!sna_render_composite_redirect(sna, tmp,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ DBG(("%s: preparing source\n", __FUNCTION__));
+ switch (gen5_composite_picture(sna, src, &tmp->src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
+ goto cleanup_dst;
+ case 0:
+ gen5_composite_solid_init(sna, &tmp->src, 0);
+ case 1:
+ gen5_composite_channel_convert(&tmp->src);
+ break;
+ }
+
+ tmp->op = op;
+ tmp->is_affine = tmp->src.is_affine;
+ tmp->has_component_alpha = FALSE;
+ tmp->need_magic_ca_pass = FALSE;
+
+ tmp->prim_emit = gen5_emit_composite_primitive;
+ if (mask) {
+ if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
+ tmp->has_component_alpha = TRUE;
+
+ /* Check if it's component alpha that relies on a source alpha and on
+ * the source value. We can only get one of those into the single
+ * source value that we get to blend with.
+ */
+ if (gen5_blend_op[op].src_alpha &&
+ (gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) {
+ if (op != PictOpOver) {
+ DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op));
+ goto cleanup_src;
+ }
+
+ tmp->need_magic_ca_pass = TRUE;
+ tmp->op = PictOpOutReverse;
+ }
+ }
+
+ DBG(("%s: preparing mask\n", __FUNCTION__));
+ switch (gen5_composite_picture(sna, mask, &tmp->mask,
+ msk_x, msk_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
+ goto cleanup_src;
+ case 0:
+ gen5_composite_solid_init(sna, &tmp->mask, 0);
+ case 1:
+ gen5_composite_channel_convert(&tmp->mask);
+ break;
+ }
+
+ tmp->is_affine &= tmp->mask.is_affine;
+
+ if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
+ tmp->prim_emit = gen5_emit_composite_primitive_identity_source_mask;
+
+ tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
+ } else {
+ if (tmp->src.is_solid)
+ tmp->prim_emit = gen5_emit_composite_primitive_solid;
+ else if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen5_emit_composite_primitive_identity_source;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen5_emit_composite_primitive_affine_source;
+
+ tmp->floats_per_vertex = 3 + !tmp->is_affine;
+ }
+
+ tmp->u.gen5.wm_kernel =
+ gen5_choose_composite_kernel(tmp->op,
+ tmp->mask.bo != NULL,
+ tmp->has_component_alpha,
+ tmp->is_affine);
+ tmp->u.gen5.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine;
+
+ tmp->blt = gen5_render_composite_blt;
+ tmp->boxes = gen5_render_composite_boxes;
+ tmp->done = gen5_render_composite_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->src.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->mask.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen5_bind_surfaces(sna, tmp);
+ gen5_align_vertex(sna, tmp);
+
+ sna->render.op = tmp;
+ return TRUE;
+
+cleanup_src:
+ if (tmp->src.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+cleanup_dst:
+ if (tmp->redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
+ return FALSE;
+}
+
+static void
+gen5_copy_bind_surfaces(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen5_get_batch(sna);
+
+ binding_table = gen5_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen5_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen5_get_dest_format_for_depth(op->dst.pixmap->drawable.depth),
+ TRUE);
+ binding_table[1] =
+ gen5_bind_bo(sna,
+ op->src.bo, op->src.width, op->src.height,
+ op->src.card_format,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table) {
+ sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+ offset = sna->render_state.gen5.surface_table;
+ }
+
+ gen5_emit_state(sna, op,offset);
+}
+
+static Bool
+gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
+ src->drawable.width > 8192 || src->drawable.height > 8192 ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+
+ DBG(("%s (%d, %d)->(%d, %d) x %d\n",
+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp.dst.bo = dst_bo;
+
+ tmp.src.bo = src_bo;
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_NONE;
+ tmp.src.card_format =
+ gen5_get_card_format_for_depth(src->drawable.depth),
+ tmp.src.width = src->drawable.width;
+ tmp.src.height = src->drawable.height;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.u.gen5.wm_kernel = WM_KERNEL;
+ tmp.u.gen5.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen5_get_batch(sna);
+ gen5_copy_bind_surfaces(sna, &tmp);
+ gen5_align_vertex(sna, &tmp);
+
+ tmp.src.scale[0] = 1. / src->drawable.width;
+ tmp.src.scale[1] = 1. / src->drawable.height;
+ do {
+ int n_this_time = gen5_get_rectangles(sna, &tmp, n);
+ if (n_this_time == 0) {
+ gen5_copy_bind_surfaces(sna, &tmp);
+ n_this_time = gen5_get_rectangles(sna, &tmp, n);
+ }
+ n -= n_this_time;
+
+ do {
+ DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1));
+ OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
+ OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
+
+ OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
+ OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
+
+ OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
+ OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
+
+ box++;
+ } while (--n_this_time);
+ } while (n);
+
+ gen5_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen5_render_copy_blt(struct sna *sna,
+ const struct sna_copy_op *op,
+ int16_t sx, int16_t sy,
+ int16_t w, int16_t h,
+ int16_t dx, int16_t dy)
+{
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", __FUNCTION__,
+ sx, sy, dx, dy, w, h));
+
+ if (!gen5_get_rectangles(sna, &op->base, 1)) {
+ gen5_copy_bind_surfaces(sna, &op->base);
+ gen5_get_rectangles(sna, &op->base, 1);
+ }
+
+ OUT_VERTEX(dx+w, dy+h);
+ OUT_VERTEX_F((sx+w)*op->base.src.scale[0]);
+ OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
+
+ OUT_VERTEX(dx, dy+h);
+ OUT_VERTEX_F(sx*op->base.src.scale[0]);
+ OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
+
+ OUT_VERTEX(dx, dy);
+ OUT_VERTEX_F(sx*op->base.src.scale[0]);
+ OUT_VERTEX_F(sy*op->base.src.scale[1]);
+}
+
+static void
+gen5_render_copy_done(struct sna *sna,
+ const struct sna_copy_op *op)
+{
+ gen5_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ DBG(("%s()\n", __FUNCTION__));
+}
+
+static Bool
+gen5_render_copy(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ struct sna_copy_op *op)
+{
+ DBG(("%s (alu=%d)\n", __FUNCTION__, alu));
+
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_copy(sna, alu,
+ src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
+ src->drawable.width > 8192 || src->drawable.height > 8192 ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_copy(sna, alu, src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op);
+
+ op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ op->base.dst.pixmap = dst;
+ op->base.dst.width = dst->drawable.width;
+ op->base.dst.height = dst->drawable.height;
+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ op->base.dst.bo = dst_bo;
+
+ op->base.src.bo = src_bo;
+ op->base.src.card_format =
+ gen5_get_card_format_for_depth(src->drawable.depth),
+ op->base.src.width = src->drawable.width;
+ op->base.src.height = src->drawable.height;
+ op->base.src.scale[0] = 1./src->drawable.width;
+ op->base.src.scale[1] = 1./src->drawable.height;
+ op->base.src.filter = SAMPLER_FILTER_NEAREST;
+ op->base.src.repeat = SAMPLER_EXTEND_NONE;
+
+ op->base.is_affine = true;
+ op->base.floats_per_vertex = 3;
+ op->base.u.gen5.wm_kernel = WM_KERNEL;
+ op->base.u.gen5.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen5_copy_bind_surfaces(sna, &op->base);
+ gen5_align_vertex(sna, &op->base);
+
+ op->blt = gen5_render_copy_blt;
+ op->done = gen5_render_copy_done;
+ return TRUE;
+}
+
+static void
+gen5_fill_bind_surfaces(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen5_get_batch(sna);
+
+ binding_table = gen5_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen5_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen5_get_dest_format(op->dst.format),
+ TRUE);
+ binding_table[1] =
+ gen5_bind_bo(sna,
+ op->src.bo, 1, 1,
+ GEN5_SURFACEFORMAT_B8G8R8A8_UNORM,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table) {
+ sna->kgem.surface +=
+ sizeof(struct gen5_surface_state_padded)/sizeof(uint32_t);
+ offset = sna->render_state.gen5.surface_table;
+ }
+
+ gen5_emit_state(sna, op, offset);
+}
+
+static Bool
+gen5_render_fill_boxes(struct sna *sna,
+ CARD8 op,
+ PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+ uint32_t pixel;
+
+ DBG(("%s op=%x, color=%08x, boxes=%d x [((%d, %d), (%d, %d))...]\n",
+ __FUNCTION__, op, pixel, n, box->x1, box->y1, box->x2, box->y2));
+
+ if (op >= ARRAY_SIZE(gen5_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (sna->kgem.mode == KGEM_BLT ||
+ dst->drawable.width > 8192 ||
+ dst->drawable.height > 8192 ||
+ !gen5_check_dst_format(format)) {
+ uint8_t alu = GXcopy;
+
+ if (op == PictOpClear) {
+ alu = GXclear;
+ pixel = 0;
+ op = PictOpSrc;
+ }
+
+ if (op == PictOpOver && color->alpha >= 0xff00)
+ op = PictOpSrc;
+
+ if (op == PictOpSrc &&
+ sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ format) &&
+ sna_blt_fill_boxes(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ pixel, box, n))
+ return TRUE;
+
+ if (dst->drawable.width > 8192 ||
+ dst->drawable.height > 8192 ||
+ !gen5_check_dst_format(format))
+ return FALSE;
+ }
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ PICT_a8r8g8b8))
+ return FALSE;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = op;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = format;
+ tmp.dst.bo = dst_bo;
+
+ tmp.src.bo = sna_render_get_solid(sna, pixel);
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.u.gen5.wm_kernel = WM_KERNEL;
+ tmp.u.gen5.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen5_fill_bind_surfaces(sna, &tmp);
+ gen5_align_vertex(sna, &tmp);
+
+ do {
+ int n_this_time = gen5_get_rectangles(sna, &tmp, n);
+ if (n_this_time == 0) {
+ gen5_fill_bind_surfaces(sna, &tmp);
+ n_this_time = gen5_get_rectangles(sna, &tmp, n);
+ }
+ n -= n_this_time;
+ do {
+ DBG((" (%d, %d), (%d, %d)\n",
+ box->x1, box->y1, box->x2, box->y2));
+ OUT_VERTEX(box->x2, box->y2);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(box->x1, box->y2);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(box->x1, box->y1);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
+
+ box++;
+ } while (--n_this_time);
+ } while (n);
+
+ gen5_vertex_flush(sna);
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen5_render_fill_blt(struct sna *sna,
+ const struct sna_fill_op *op,
+ int16_t x, int16_t y, int16_t w, int16_t h)
+{
+ DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x,y,w,h));
+
+ if (!gen5_get_rectangles(sna, &op->base, 1)) {
+ gen5_fill_bind_surfaces(sna, &op->base);
+ gen5_get_rectangles(sna, &op->base, 1);
+ }
+
+ OUT_VERTEX(x+w, y+h);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x, y+h);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x, y);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
+}
+
+static void
+gen5_render_fill_done(struct sna *sna,
+ const struct sna_fill_op *op)
+{
+ gen5_vertex_flush(sna);
+ kgem_bo_destroy(&sna->kgem, op->base.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ DBG(("%s()\n", __FUNCTION__));
+}
+
+static Bool
+gen5_render_fill(struct sna *sna, uint8_t alu,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint32_t color,
+ struct sna_fill_op *op)
+{
+ DBG(("%s(alu=%d, color=%08x)\n", __FUNCTION__, alu, color));
+
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op);
+
+ if (alu == GXclear)
+ color = 0;
+
+ op->base.op = color == 0 ? PictOpClear : PictOpSrc;
+
+ op->base.dst.pixmap = dst;
+ op->base.dst.width = dst->drawable.width;
+ op->base.dst.height = dst->drawable.height;
+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ op->base.dst.bo = dst_bo;
+
+ op->base.src.bo =
+ sna_render_get_solid(sna,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
+ op->base.src.filter = SAMPLER_FILTER_NEAREST;
+ op->base.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ op->base.is_affine = TRUE;
+ op->base.floats_per_vertex = 3;
+ op->base.u.gen5.wm_kernel = WM_KERNEL;
+ op->base.u.gen5.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen5_fill_bind_surfaces(sna, &op->base);
+ gen5_align_vertex(sna, &op->base);
+
+ op->blt = gen5_render_fill_blt;
+ op->done = gen5_render_fill_done;
+ return TRUE;
+}
+
+static void
+gen5_render_flush(struct sna *sna)
+{
+ gen5_vertex_finish(sna, TRUE);
+}
+
+static void
+gen5_render_context_switch(struct sna *sna,
+ int new_mode)
+{
+ if (sna->kgem.mode == 0)
+ return;
+
+ /* Ironlake has a limitation that a 3D or Media command can't
+ * be the first command after a BLT, unless it's
+ * non-pipelined. Instead of trying to track it and emit a
+ * command at the right time, we just emit a dummy
+ * non-pipelined 3D instruction after each blit.
+ */
+ if (new_mode == KGEM_BLT) {
+#if 0
+ OUT_BATCH(MI_FLUSH |
+ MI_STATE_INSTRUCTION_CACHE_FLUSH |
+ MI_INHIBIT_RENDER_CACHE_FLUSH);
+#endif
+ } else {
+ OUT_BATCH(CMD_POLY_STIPPLE_OFFSET << 16);
+ OUT_BATCH(0);
+ }
+}
+
+static void gen5_render_reset(struct sna *sna)
+{
+ sna->render_state.gen5.needs_invariant = TRUE;
+ sna->render_state.gen5.vb_id = 0;
+ sna->render_state.gen5.ve_id = -1;
+ sna->render_state.gen5.last_primitive = -1;
+ sna->render_state.gen5.last_pipelined_pointers = 0;
+
+ sna->render_state.gen5.drawrect_offset = -1;
+ sna->render_state.gen5.drawrect_limit = -1;
+ sna->render_state.gen5.surface_table = -1;
+}
+
+static void gen5_render_fini(struct sna *sna)
+{
+ kgem_bo_destroy(&sna->kgem, sna->render_state.gen5.general_bo);
+}
+
+static uint32_t gen5_create_vs_unit_state(struct sna_static_stream *stream)
+{
+ struct gen5_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
+
+ /* Set up the vertex shader to be disabled (passthrough) */
+ vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
+ vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
+ vs->vs6.vs_enable = 0;
+ vs->vs6.vert_cache_disable = 1;
+
+ return sna_static_stream_offsetof(stream, vs);
+}
+
+static uint32_t gen5_create_sf_state(struct sna_static_stream *stream,
+ uint32_t kernel)
+{
+ struct gen5_sf_unit_state *sf_state;
+
+ sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
+
+ sf_state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
+ sf_state->thread0.kernel_start_pointer = kernel >> 6;
+ sf_state->sf1.single_program_flow = 1;
+ /* scratch space is not used in our kernel */
+ sf_state->thread2.scratch_space_base_pointer = 0;
+ sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
+ sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
+ sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
+ /* don't smash vertex header, read start from dw8 */
+ sf_state->thread3.urb_entry_read_offset = 1;
+ sf_state->thread3.dispatch_grf_start_reg = 3;
+ sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
+ sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
+ sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
+ sf_state->thread4.stats_enable = 1;
+ sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
+ sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE;
+ sf_state->sf6.scissor = 0;
+ sf_state->sf7.trifan_pv = 2;
+ sf_state->sf6.dest_org_vbias = 0x8;
+ sf_state->sf6.dest_org_hbias = 0x8;
+
+ return sna_static_stream_offsetof(stream, sf_state);
+}
+
+static uint32_t gen5_create_sampler_state(struct sna_static_stream *stream,
+ sampler_filter_t src_filter,
+ sampler_extend_t src_extend,
+ sampler_filter_t mask_filter,
+ sampler_extend_t mask_extend)
+{
+ struct gen5_sampler_state *sampler_state;
+
+ sampler_state = sna_static_stream_map(stream,
+ sizeof(struct gen5_sampler_state) * 2,
+ 32);
+ sampler_state_init(&sampler_state[0], src_filter, src_extend);
+ sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
+
+ return sna_static_stream_offsetof(stream, sampler_state);
+}
+
+static void gen5_init_wm_state(struct gen5_wm_unit_state *state,
+ Bool has_mask,
+ uint32_t kernel,
+ uint32_t sampler)
+{
+ state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
+ state->thread0.kernel_start_pointer = kernel >> 6;
+
+ state->thread1.single_program_flow = 0;
+
+ /* scratch space is not used in our kernel */
+ state->thread2.scratch_space_base_pointer = 0;
+ state->thread2.per_thread_scratch_space = 0;
+
+ state->thread3.const_urb_entry_read_length = 0;
+ state->thread3.const_urb_entry_read_offset = 0;
+
+ state->thread3.urb_entry_read_offset = 0;
+ /* wm kernel use urb from 3, see wm_program in compiler module */
+ state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
+
+ state->wm4.sampler_count = 0; /* hardware requirement */
+
+ state->wm4.sampler_state_pointer = sampler >> 5;
+ state->wm5.max_threads = PS_MAX_THREADS - 1;
+ state->wm5.transposed_urb_read = 0;
+ state->wm5.thread_dispatch_enable = 1;
+ /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
+ * start point
+ */
+ state->wm5.enable_16_pix = 1;
+ state->wm5.enable_8_pix = 0;
+ state->wm5.early_depth_test = 1;
+
+ /* Each pair of attributes (src/mask coords) is two URB entries */
+ if (has_mask) {
+ state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
+ state->thread3.urb_entry_read_length = 4;
+ } else {
+ state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
+ state->thread3.urb_entry_read_length = 2;
+ }
+
+ /* binding table entry count is only used for prefetching,
+ * and it has to be set 0 for Ironlake
+ */
+ state->thread1.binding_table_entry_count = 0;
+}
+
+static uint32_t gen5_create_cc_viewport(struct sna_static_stream *stream)
+{
+ struct gen5_cc_viewport vp;
+
+ vp.min_depth = -1.e35;
+ vp.max_depth = 1.e35;
+
+ return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
+}
+
+static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
+{
+ uint8_t *ptr, *base;
+ uint32_t vp;
+ int i, j;
+
+ vp = gen5_create_cc_viewport(stream);
+ base = ptr =
+ sna_static_stream_map(stream,
+ GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64,
+ 64);
+
+ for (i = 0; i < GEN5_BLENDFACTOR_COUNT; i++) {
+ for (j = 0; j < GEN5_BLENDFACTOR_COUNT; j++) {
+ struct gen5_cc_unit_state *state =
+ (struct gen5_cc_unit_state *)ptr;
+
+ state->cc3.blend_enable = 1; /* enable color blend */
+ state->cc4.cc_viewport_state_offset = vp >> 5;
+
+ state->cc5.logicop_func = 0xc; /* COPY */
+ state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD;
+
+ /* Fill in alpha blend factors same as color, for the future. */
+ state->cc5.ia_src_blend_factor = i;
+ state->cc5.ia_dest_blend_factor = j;
+
+ state->cc6.blend_function = GEN5_BLENDFUNCTION_ADD;
+ state->cc6.clamp_post_alpha_blend = 1;
+ state->cc6.clamp_pre_alpha_blend = 1;
+ state->cc6.src_blend_factor = i;
+ state->cc6.dest_blend_factor = j;
+
+ ptr += 64;
+ }
+ }
+
+ return sna_static_stream_offsetof(stream, base);
+}
+
+static Bool gen5_render_setup(struct sna *sna)
+{
+ struct gen5_render_state *state = &sna->render_state.gen5;
+ struct sna_static_stream general;
+ struct gen5_wm_unit_state_padded *wm_state;
+ uint32_t sf[2], wm[KERNEL_COUNT];
+ int i, j, k, l, m;
+
+ sna_static_stream_init(&general);
+
+ /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
+ * dumps, you know it points to zero.
+ */
+ null_create(&general);
+
+ /* Set up the two SF states (one for blending with a mask, one without) */
+ sf[0] = sna_static_stream_add(&general,
+ sf_kernel,
+ sizeof(sf_kernel),
+ 64);
+ sf[1] = sna_static_stream_add(&general,
+ sf_kernel_mask,
+ sizeof(sf_kernel_mask),
+ 64);
+ for (m = 0; m < KERNEL_COUNT; m++) {
+ wm[m] = sna_static_stream_add(&general,
+ wm_kernels[m].data,
+ wm_kernels[m].size,
+ 64);
+ }
+
+ state->vs = gen5_create_vs_unit_state(&general);
+
+ state->sf[0] = gen5_create_sf_state(&general, sf[0]);
+ state->sf[1] = gen5_create_sf_state(&general, sf[1]);
+
+
+ /* Set up the WM states: each filter/extend type for source and mask, per
+ * kernel.
+ */
+ wm_state = sna_static_stream_map(&general,
+ sizeof(*wm_state) * KERNEL_COUNT *
+ FILTER_COUNT * EXTEND_COUNT *
+ FILTER_COUNT * EXTEND_COUNT,
+ 64);
+ state->wm = sna_static_stream_offsetof(&general, wm_state);
+ for (i = 0; i < FILTER_COUNT; i++) {
+ for (j = 0; j < EXTEND_COUNT; j++) {
+ for (k = 0; k < FILTER_COUNT; k++) {
+ for (l = 0; l < EXTEND_COUNT; l++) {
+ uint32_t sampler_state;
+
+ sampler_state =
+ gen5_create_sampler_state(&general,
+ i, j,
+ k, l);
+
+ for (m = 0; m < KERNEL_COUNT; m++) {
+ gen5_init_wm_state(&wm_state->state,
+ wm_kernels[m].has_mask,
+ wm[m],
+ sampler_state);
+ wm_state++;
+ }
+ }
+ }
+ }
+ }
+
+ state->cc = gen5_create_cc_unit_state(&general);
+
+ state->general_bo = sna_static_stream_fini(sna, &general);
+ return state->general_bo != NULL;
+}
+
+Bool gen5_render_init(struct sna *sna)
+{
+ if (!gen5_render_setup(sna))
+ return FALSE;
+
+ gen5_render_reset(sna);
+
+ sna->render.composite = gen5_render_composite;
+ sna->render.video = gen5_render_video;
+
+ sna->render.copy_boxes = gen5_render_copy_boxes;
+ sna->render.copy = gen5_render_copy;
+
+ sna->render.fill_boxes = gen5_render_fill_boxes;
+ sna->render.fill = gen5_render_fill;
+
+ sna->render.flush = gen5_render_flush;
+ sna->render.context_switch = gen5_render_context_switch;
+ sna->render.reset = gen5_render_reset;
+ sna->render.fini = gen5_render_fini;
+
+ sna->render.max_3d_size = 8192;
+ return TRUE;
+}
diff --git a/src/sna/gen5_render.h b/src/sna/gen5_render.h
new file mode 100644
index 0000000..190580e
--- a/dev/null
+++ b/src/sna/gen5_render.h
@@ -0,0 +1,2730 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef GEN5_RENDER_H
+#define GEN5_RENDER_H
+
+#define GEN5_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
+ ((Pipeline) << 27) | \
+ ((Opcode) << 24) | \
+ ((Subopcode) << 16))
+
+#define GEN5_URB_FENCE GEN5_3D(0, 0, 0)
+#define GEN5_CS_URB_STATE GEN5_3D(0, 0, 1)
+#define GEN5_CONSTANT_BUFFER GEN5_3D(0, 0, 2)
+#define GEN5_STATE_PREFETCH GEN5_3D(0, 0, 3)
+
+#define GEN5_STATE_BASE_ADDRESS GEN5_3D(0, 1, 1)
+#define GEN5_STATE_SIP GEN5_3D(0, 1, 2)
+
+#define GEN5_PIPELINE_SELECT GEN5_3D(1, 1, 4)
+
+#define GEN5_MEDIA_STATE_POINTERS GEN5_3D(2, 0, 0)
+#define GEN5_MEDIA_OBJECT GEN5_3D(2, 1, 0)
+
+#define GEN5_3DSTATE_PIPELINED_POINTERS GEN5_3D(3, 0, 0)
+#define GEN5_3DSTATE_BINDING_TABLE_POINTERS GEN5_3D(3, 0, 1)
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
+
+#define GEN5_3DSTATE_VERTEX_BUFFERS GEN5_3D(3, 0, 8)
+#define GEN5_3DSTATE_VERTEX_ELEMENTS GEN5_3D(3, 0, 9)
+#define GEN5_3DSTATE_INDEX_BUFFER GEN5_3D(3, 0, 0xa)
+#define GEN5_3DSTATE_VF_STATISTICS GEN5_3D(3, 0, 0xb)
+
+#define GEN5_3DSTATE_DRAWING_RECTANGLE GEN5_3D(3, 1, 0)
+#define GEN5_3DSTATE_CONSTANT_COLOR GEN5_3D(3, 1, 1)
+#define GEN5_3DSTATE_SAMPLER_PALETTE_LOAD GEN5_3D(3, 1, 2)
+#define GEN5_3DSTATE_CHROMA_KEY GEN5_3D(3, 1, 4)
+#define GEN5_3DSTATE_DEPTH_BUFFER GEN5_3D(3, 1, 5)
+# define GEN5_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
+# define GEN5_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
+
+#define GEN5_3DSTATE_POLY_STIPPLE_OFFSET GEN5_3D(3, 1, 6)
+#define GEN5_3DSTATE_POLY_STIPPLE_PATTERN GEN5_3D(3, 1, 7)
+#define GEN5_3DSTATE_LINE_STIPPLE GEN5_3D(3, 1, 8)
+#define GEN5_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN5_3D(3, 1, 9)
+/* These two are BLC and CTG only, not BW or CL */
+#define GEN5_3DSTATE_AA_LINE_PARAMS GEN5_3D(3, 1, 0xa)
+#define GEN5_3DSTATE_GS_SVB_INDEX GEN5_3D(3, 1, 0xb)
+
+#define GEN5_PIPE_CONTROL GEN5_3D(3, 2, 0)
+
+#define GEN5_3DPRIMITIVE GEN5_3D(3, 3, 0)
+
+#define GEN5_3DSTATE_CLEAR_PARAMS GEN5_3D(3, 1, 0x10)
+/* DW1 */
+# define GEN5_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
+
+/* for GEN6+ */
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN5_3D(3, 0, 0x02)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
+
+#define GEN6_3DSTATE_URB GEN5_3D(3, 0, 0x05)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
+
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN5_3D(3, 0, 0x0d)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS GEN5_3D(3, 0, 0x0e)
+
+#define GEN6_3DSTATE_VS GEN5_3D(3, 0, 0x10)
+
+#define GEN6_3DSTATE_GS GEN5_3D(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
+
+#define GEN6_3DSTATE_CLIP GEN5_3D(3, 0, 0x12)
+
+#define GEN6_3DSTATE_SF GEN5_3D(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
+
+
+#define GEN6_3DSTATE_WM GEN5_3D(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+
+
+#define GEN6_3DSTATE_CONSTANT_VS GEN5_3D(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS GEN5_3D(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS GEN5_3D(3, 0, 0x17)
+
+#define GEN6_3DSTATE_SAMPLE_MASK GEN5_3D(3, 0, 0x18)
+
+#define GEN6_3DSTATE_MULTISAMPLE GEN5_3D(3, 1, 0x0d)
+/* DW1 */
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
+
+#define PIPELINE_SELECT_3D 0
+#define PIPELINE_SELECT_MEDIA 1
+
+#define UF0_CS_REALLOC (1 << 13)
+#define UF0_VFE_REALLOC (1 << 12)
+#define UF0_SF_REALLOC (1 << 11)
+#define UF0_CLIP_REALLOC (1 << 10)
+#define UF0_GS_REALLOC (1 << 9)
+#define UF0_VS_REALLOC (1 << 8)
+#define UF1_CLIP_FENCE_SHIFT 20
+#define UF1_GS_FENCE_SHIFT 10
+#define UF1_VS_FENCE_SHIFT 0
+#define UF2_CS_FENCE_SHIFT 20
+#define UF2_VFE_FENCE_SHIFT 10
+#define UF2_SF_FENCE_SHIFT 0
+
+/* for GEN5_STATE_BASE_ADDRESS */
+#define BASE_ADDRESS_MODIFY (1 << 0)
+
+/* for GEN5_3DSTATE_PIPELINED_POINTERS */
+#define GEN5_GS_DISABLE 0
+#define GEN5_GS_ENABLE 1
+#define GEN5_CLIP_DISABLE 0
+#define GEN5_CLIP_ENABLE 1
+
+/* for GEN5_PIPE_CONTROL */
+#define GEN5_PIPE_CONTROL_NOWRITE (0 << 14)
+#define GEN5_PIPE_CONTROL_WRITE_QWORD (1 << 14)
+#define GEN5_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
+#define GEN5_PIPE_CONTROL_WRITE_TIME (3 << 14)
+#define GEN5_PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define GEN5_PIPE_CONTROL_WC_FLUSH (1 << 12)
+#define GEN5_PIPE_CONTROL_IS_FLUSH (1 << 11)
+#define GEN5_PIPE_CONTROL_TC_FLUSH (1 << 10)
+#define GEN5_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
+#define GEN5_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
+#define GEN5_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define GEN5_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
+
+/* VERTEX_BUFFER_STATE Structure */
+#define VB0_BUFFER_INDEX_SHIFT 27
+#define GEN6_VB0_BUFFER_INDEX_SHIFT 26
+#define VB0_VERTEXDATA (0 << 26)
+#define VB0_INSTANCEDATA (1 << 26)
+#define GEN6_VB0_VERTEXDATA (0 << 20)
+#define GEN6_VB0_INSTANCEDATA (1 << 20)
+#define VB0_BUFFER_PITCH_SHIFT 0
+
+/* VERTEX_ELEMENT_STATE Structure */
+#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
+#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
+#define VE0_VALID (1 << 26)
+#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */
+#define VE0_FORMAT_SHIFT 16
+#define VE0_OFFSET_SHIFT 0
+#define VE1_VFCOMPONENT_0_SHIFT 28
+#define VE1_VFCOMPONENT_1_SHIFT 24
+#define VE1_VFCOMPONENT_2_SHIFT 20
+#define VE1_VFCOMPONENT_3_SHIFT 16
+#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
+
+/* 3DPRIMITIVE bits */
+#define GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
+#define GEN5_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
+/* Primitive types are in gen5_defines.h */
+#define GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT 10
+
+#define GEN5_SVG_CTL 0x7400
+
+#define GEN5_SVG_CTL_GS_BA (0 << 8)
+#define GEN5_SVG_CTL_SS_BA (1 << 8)
+#define GEN5_SVG_CTL_IO_BA (2 << 8)
+#define GEN5_SVG_CTL_GS_AUB (3 << 8)
+#define GEN5_SVG_CTL_IO_AUB (4 << 8)
+#define GEN5_SVG_CTL_SIP (5 << 8)
+
+#define GEN5_SVG_RDATA 0x7404
+#define GEN5_SVG_WORK_CTL 0x7408
+
+#define GEN5_VF_CTL 0x7500
+
+#define GEN5_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN5_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
+#define GEN5_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
+#define GEN5_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
+#define GEN5_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
+#define GEN5_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
+#define GEN5_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
+#define GEN5_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
+#define GEN5_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN5_VF_STRG_VAL 0x7504
+#define GEN5_VF_STR_VL_OVR 0x7508
+#define GEN5_VF_VC_OVR 0x750c
+#define GEN5_VF_STR_PSKIP 0x7510
+#define GEN5_VF_MAX_PRIM 0x7514
+#define GEN5_VF_RDATA 0x7518
+
+#define GEN5_VS_CTL 0x7600
+#define GEN5_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN5_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
+#define GEN5_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
+#define GEN5_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
+#define GEN5_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
+#define GEN5_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN5_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN5_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN5_VS_STRG_VAL 0x7604
+#define GEN5_VS_RDATA 0x7608
+
+#define GEN5_SF_CTL 0x7b00
+#define GEN5_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
+#define GEN5_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
+#define GEN5_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
+#define GEN5_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN5_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN5_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN5_SF_STRG_VAL 0x7b04
+#define GEN5_SF_RDATA 0x7b18
+
+#define GEN5_WIZ_CTL 0x7c00
+#define GEN5_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN5_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
+#define GEN5_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
+#define GEN5_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
+#define GEN5_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
+#define GEN5_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
+#define GEN5_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
+#define GEN5_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
+#define GEN5_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
+#define GEN5_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN5_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN5_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN5_WIZ_STRG_VAL 0x7c04
+#define GEN5_WIZ_RDATA 0x7c18
+
+#define GEN5_TS_CTL 0x7e00
+#define GEN5_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN5_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
+#define GEN5_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
+#define GEN5_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
+#define GEN5_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
+#define GEN5_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN5_TS_STRG_VAL 0x7e04
+#define GEN5_TS_RDATA 0x7e08
+
+#define GEN5_TD_CTL 0x8000
+#define GEN5_TD_CTL_MUX_SHIFT 8
+#define GEN5_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
+#define GEN5_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
+#define GEN5_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
+#define GEN5_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
+#define GEN5_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
+#define GEN5_TD_CTL2 0x8004
+#define GEN5_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
+#define GEN5_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
+#define GEN5_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
+#define GEN5_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
+#define GEN5_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
+#define GEN5_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
+#define GEN5_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
+#define GEN5_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
+#define GEN5_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
+#define GEN5_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
+#define GEN5_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
+#define GEN5_TD_VF_VS_EMSK 0x8008
+#define GEN5_TD_GS_EMSK 0x800c
+#define GEN5_TD_CLIP_EMSK 0x8010
+#define GEN5_TD_SF_EMSK 0x8014
+#define GEN5_TD_WIZ_EMSK 0x8018
+#define GEN5_TD_0_6_EHTRG_VAL 0x801c
+#define GEN5_TD_0_7_EHTRG_VAL 0x8020
+#define GEN5_TD_0_6_EHTRG_MSK 0x8024
+#define GEN5_TD_0_7_EHTRG_MSK 0x8028
+#define GEN5_TD_RDATA 0x802c
+#define GEN5_TD_TS_EMSK 0x8030
+
+#define GEN5_EU_CTL 0x8800
+#define GEN5_EU_CTL_SELECT_SHIFT 16
+#define GEN5_EU_CTL_DATA_MUX_SHIFT 8
+#define GEN5_EU_ATT_0 0x8810
+#define GEN5_EU_ATT_1 0x8814
+#define GEN5_EU_ATT_DATA_0 0x8820
+#define GEN5_EU_ATT_DATA_1 0x8824
+#define GEN5_EU_ATT_CLR_0 0x8830
+#define GEN5_EU_ATT_CLR_1 0x8834
+#define GEN5_EU_RDATA 0x8840
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+#define _3DPRIMITIVE 0x00
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define GEN5_ANISORATIO_2 0
+#define GEN5_ANISORATIO_4 1
+#define GEN5_ANISORATIO_6 2
+#define GEN5_ANISORATIO_8 3
+#define GEN5_ANISORATIO_10 4
+#define GEN5_ANISORATIO_12 5
+#define GEN5_ANISORATIO_14 6
+#define GEN5_ANISORATIO_16 7
+
+#define GEN5_BLENDFACTOR_ONE 0x1
+#define GEN5_BLENDFACTOR_SRC_COLOR 0x2
+#define GEN5_BLENDFACTOR_SRC_ALPHA 0x3
+#define GEN5_BLENDFACTOR_DST_ALPHA 0x4
+#define GEN5_BLENDFACTOR_DST_COLOR 0x5
+#define GEN5_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define GEN5_BLENDFACTOR_CONST_COLOR 0x7
+#define GEN5_BLENDFACTOR_CONST_ALPHA 0x8
+#define GEN5_BLENDFACTOR_SRC1_COLOR 0x9
+#define GEN5_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define GEN5_BLENDFACTOR_ZERO 0x11
+#define GEN5_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define GEN5_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define GEN5_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define GEN5_BLENDFACTOR_INV_DST_COLOR 0x15
+#define GEN5_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define GEN5_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define GEN5_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define GEN5_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define GEN5_BLENDFUNCTION_ADD 0
+#define GEN5_BLENDFUNCTION_SUBTRACT 1
+#define GEN5_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define GEN5_BLENDFUNCTION_MIN 3
+#define GEN5_BLENDFUNCTION_MAX 4
+
+#define GEN5_ALPHATEST_FORMAT_UNORM8 0
+#define GEN5_ALPHATEST_FORMAT_FLOAT32 1
+
+#define GEN5_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define GEN5_CHROMAKEY_REPLACE_BLACK 1
+
+#define GEN5_CLIP_API_OGL 0
+#define GEN5_CLIP_API_DX 1
+
+#define GEN5_CLIPMODE_NORMAL 0
+#define GEN5_CLIPMODE_CLIP_ALL 1
+#define GEN5_CLIPMODE_CLIP_NON_REJECTED 2
+#define GEN5_CLIPMODE_REJECT_ALL 3
+#define GEN5_CLIPMODE_ACCEPT_ALL 4
+
+#define GEN5_CLIP_NDCSPACE 0
+#define GEN5_CLIP_SCREENSPACE 1
+
+#define GEN5_COMPAREFUNCTION_ALWAYS 0
+#define GEN5_COMPAREFUNCTION_NEVER 1
+#define GEN5_COMPAREFUNCTION_LESS 2
+#define GEN5_COMPAREFUNCTION_EQUAL 3
+#define GEN5_COMPAREFUNCTION_LEQUAL 4
+#define GEN5_COMPAREFUNCTION_GREATER 5
+#define GEN5_COMPAREFUNCTION_NOTEQUAL 6
+#define GEN5_COMPAREFUNCTION_GEQUAL 7
+
+#define GEN5_COVERAGE_PIXELS_HALF 0
+#define GEN5_COVERAGE_PIXELS_1 1
+#define GEN5_COVERAGE_PIXELS_2 2
+#define GEN5_COVERAGE_PIXELS_4 3
+
+#define GEN5_CULLMODE_BOTH 0
+#define GEN5_CULLMODE_NONE 1
+#define GEN5_CULLMODE_FRONT 2
+#define GEN5_CULLMODE_BACK 3
+
+#define GEN5_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define GEN5_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define GEN5_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define GEN5_DEPTHFORMAT_D32_FLOAT 1
+#define GEN5_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define GEN5_DEPTHFORMAT_D16_UNORM 5
+
+#define GEN5_FLOATING_POINT_IEEE_754 0
+#define GEN5_FLOATING_POINT_NON_IEEE_754 1
+
+#define GEN5_FRONTWINDING_CW 0
+#define GEN5_FRONTWINDING_CCW 1
+
+#define GEN5_INDEX_BYTE 0
+#define GEN5_INDEX_WORD 1
+#define GEN5_INDEX_DWORD 2
+
+#define GEN5_LOGICOPFUNCTION_CLEAR 0
+#define GEN5_LOGICOPFUNCTION_NOR 1
+#define GEN5_LOGICOPFUNCTION_AND_INVERTED 2
+#define GEN5_LOGICOPFUNCTION_COPY_INVERTED 3
+#define GEN5_LOGICOPFUNCTION_AND_REVERSE 4
+#define GEN5_LOGICOPFUNCTION_INVERT 5
+#define GEN5_LOGICOPFUNCTION_XOR 6
+#define GEN5_LOGICOPFUNCTION_NAND 7
+#define GEN5_LOGICOPFUNCTION_AND 8
+#define GEN5_LOGICOPFUNCTION_EQUIV 9
+#define GEN5_LOGICOPFUNCTION_NOOP 10
+#define GEN5_LOGICOPFUNCTION_OR_INVERTED 11
+#define GEN5_LOGICOPFUNCTION_COPY 12
+#define GEN5_LOGICOPFUNCTION_OR_REVERSE 13
+#define GEN5_LOGICOPFUNCTION_OR 14
+#define GEN5_LOGICOPFUNCTION_SET 15
+
+#define GEN5_MAPFILTER_NEAREST 0x0
+#define GEN5_MAPFILTER_LINEAR 0x1
+#define GEN5_MAPFILTER_ANISOTROPIC 0x2
+
+#define GEN5_MIPFILTER_NONE 0
+#define GEN5_MIPFILTER_NEAREST 1
+#define GEN5_MIPFILTER_LINEAR 3
+
+#define GEN5_POLYGON_FRONT_FACING 0
+#define GEN5_POLYGON_BACK_FACING 1
+
+#define GEN5_PREFILTER_ALWAYS 0x0
+#define GEN5_PREFILTER_NEVER 0x1
+#define GEN5_PREFILTER_LESS 0x2
+#define GEN5_PREFILTER_EQUAL 0x3
+#define GEN5_PREFILTER_LEQUAL 0x4
+#define GEN5_PREFILTER_GREATER 0x5
+#define GEN5_PREFILTER_NOTEQUAL 0x6
+#define GEN5_PREFILTER_GEQUAL 0x7
+
+#define GEN5_PROVOKING_VERTEX_0 0
+#define GEN5_PROVOKING_VERTEX_1 1
+#define GEN5_PROVOKING_VERTEX_2 2
+
+#define GEN5_RASTRULE_UPPER_LEFT 0
+#define GEN5_RASTRULE_UPPER_RIGHT 1
+
+#define GEN5_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define GEN5_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define GEN5_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define GEN5_STENCILOP_KEEP 0
+#define GEN5_STENCILOP_ZERO 1
+#define GEN5_STENCILOP_REPLACE 2
+#define GEN5_STENCILOP_INCRSAT 3
+#define GEN5_STENCILOP_DECRSAT 4
+#define GEN5_STENCILOP_INCR 5
+#define GEN5_STENCILOP_DECR 6
+#define GEN5_STENCILOP_INVERT 7
+
+#define GEN5_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define GEN5_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define GEN5_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define GEN5_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define GEN5_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define GEN5_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define GEN5_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define GEN5_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define GEN5_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define GEN5_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define GEN5_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define GEN5_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define GEN5_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define GEN5_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define GEN5_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define GEN5_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define GEN5_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define GEN5_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define GEN5_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define GEN5_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define GEN5_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define GEN5_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define GEN5_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define GEN5_SURFACEFORMAT_R32G32_SINT 0x086
+#define GEN5_SURFACEFORMAT_R32G32_UINT 0x087
+#define GEN5_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define GEN5_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define GEN5_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define GEN5_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define GEN5_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define GEN5_SURFACEFORMAT_R64_FLOAT 0x08D
+#define GEN5_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define GEN5_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define GEN5_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define GEN5_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define GEN5_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define GEN5_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define GEN5_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define GEN5_SURFACEFORMAT_R32G32_USCALED 0x096
+#define GEN5_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define GEN5_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define GEN5_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define GEN5_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define GEN5_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define GEN5_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define GEN5_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define GEN5_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define GEN5_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define GEN5_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define GEN5_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define GEN5_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define GEN5_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define GEN5_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define GEN5_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define GEN5_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define GEN5_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define GEN5_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define GEN5_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define GEN5_SURFACEFORMAT_R32_SINT 0x0D6
+#define GEN5_SURFACEFORMAT_R32_UINT 0x0D7
+#define GEN5_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define GEN5_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define GEN5_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define GEN5_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define GEN5_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define GEN5_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define GEN5_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define GEN5_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define GEN5_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define GEN5_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define GEN5_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define GEN5_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define GEN5_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define GEN5_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define GEN5_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define GEN5_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define GEN5_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define GEN5_SURFACEFORMAT_R32_UNORM 0x0F1
+#define GEN5_SURFACEFORMAT_R32_SNORM 0x0F2
+#define GEN5_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define GEN5_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define GEN5_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define GEN5_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define GEN5_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define GEN5_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define GEN5_SURFACEFORMAT_R32_USCALED 0x0F9
+#define GEN5_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define GEN5_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define GEN5_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define GEN5_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define GEN5_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define GEN5_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define GEN5_SURFACEFORMAT_R8G8_UNORM 0x106
+#define GEN5_SURFACEFORMAT_R8G8_SNORM 0x107
+#define GEN5_SURFACEFORMAT_R8G8_SINT 0x108
+#define GEN5_SURFACEFORMAT_R8G8_UINT 0x109
+#define GEN5_SURFACEFORMAT_R16_UNORM 0x10A
+#define GEN5_SURFACEFORMAT_R16_SNORM 0x10B
+#define GEN5_SURFACEFORMAT_R16_SINT 0x10C
+#define GEN5_SURFACEFORMAT_R16_UINT 0x10D
+#define GEN5_SURFACEFORMAT_R16_FLOAT 0x10E
+#define GEN5_SURFACEFORMAT_I16_UNORM 0x111
+#define GEN5_SURFACEFORMAT_L16_UNORM 0x112
+#define GEN5_SURFACEFORMAT_A16_UNORM 0x113
+#define GEN5_SURFACEFORMAT_L8A8_UNORM 0x114
+#define GEN5_SURFACEFORMAT_I16_FLOAT 0x115
+#define GEN5_SURFACEFORMAT_L16_FLOAT 0x116
+#define GEN5_SURFACEFORMAT_A16_FLOAT 0x117
+#define GEN5_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define GEN5_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define GEN5_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define GEN5_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define GEN5_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define GEN5_SURFACEFORMAT_R16_SSCALED 0x11E
+#define GEN5_SURFACEFORMAT_R16_USCALED 0x11F
+#define GEN5_SURFACEFORMAT_R8_UNORM 0x140
+#define GEN5_SURFACEFORMAT_R8_SNORM 0x141
+#define GEN5_SURFACEFORMAT_R8_SINT 0x142
+#define GEN5_SURFACEFORMAT_R8_UINT 0x143
+#define GEN5_SURFACEFORMAT_A8_UNORM 0x144
+#define GEN5_SURFACEFORMAT_I8_UNORM 0x145
+#define GEN5_SURFACEFORMAT_L8_UNORM 0x146
+#define GEN5_SURFACEFORMAT_P4A4_UNORM 0x147
+#define GEN5_SURFACEFORMAT_A4P4_UNORM 0x148
+#define GEN5_SURFACEFORMAT_R8_SSCALED 0x149
+#define GEN5_SURFACEFORMAT_R8_USCALED 0x14A
+#define GEN5_SURFACEFORMAT_R1_UINT 0x181
+#define GEN5_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define GEN5_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define GEN5_SURFACEFORMAT_BC1_UNORM 0x186
+#define GEN5_SURFACEFORMAT_BC2_UNORM 0x187
+#define GEN5_SURFACEFORMAT_BC3_UNORM 0x188
+#define GEN5_SURFACEFORMAT_BC4_UNORM 0x189
+#define GEN5_SURFACEFORMAT_BC5_UNORM 0x18A
+#define GEN5_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define GEN5_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define GEN5_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define GEN5_SURFACEFORMAT_MONO8 0x18E
+#define GEN5_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define GEN5_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define GEN5_SURFACEFORMAT_DXT1_RGB 0x191
+#define GEN5_SURFACEFORMAT_FXT1 0x192
+#define GEN5_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define GEN5_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define GEN5_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define GEN5_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define GEN5_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define GEN5_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define GEN5_SURFACEFORMAT_BC4_SNORM 0x199
+#define GEN5_SURFACEFORMAT_BC5_SNORM 0x19A
+#define GEN5_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define GEN5_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define GEN5_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define GEN5_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+
+#define GEN5_SURFACERETURNFORMAT_FLOAT32 0
+#define GEN5_SURFACERETURNFORMAT_S1 1
+
+#define GEN5_SURFACE_1D 0
+#define GEN5_SURFACE_2D 1
+#define GEN5_SURFACE_3D 2
+#define GEN5_SURFACE_CUBE 3
+#define GEN5_SURFACE_BUFFER 4
+#define GEN5_SURFACE_NULL 7
+
+#define GEN5_BORDER_COLOR_MODE_DEFAULT 0
+#define GEN5_BORDER_COLOR_MODE_LEGACY 1
+
+#define GEN5_TEXCOORDMODE_WRAP 0
+#define GEN5_TEXCOORDMODE_MIRROR 1
+#define GEN5_TEXCOORDMODE_CLAMP 2
+#define GEN5_TEXCOORDMODE_CUBE 3
+#define GEN5_TEXCOORDMODE_CLAMP_BORDER 4
+#define GEN5_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define GEN5_THREAD_PRIORITY_NORMAL 0
+#define GEN5_THREAD_PRIORITY_HIGH 1
+
+#define GEN5_TILEWALK_XMAJOR 0
+#define GEN5_TILEWALK_YMAJOR 1
+
+#define GEN5_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define GEN5_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+#define GEN5_VERTEXBUFFER_ACCESS_VERTEXDATA 0
+#define GEN5_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
+
+#define GEN5_VFCOMPONENT_NOSTORE 0
+#define GEN5_VFCOMPONENT_STORE_SRC 1
+#define GEN5_VFCOMPONENT_STORE_0 2
+#define GEN5_VFCOMPONENT_STORE_1_FLT 3
+#define GEN5_VFCOMPONENT_STORE_1_INT 4
+#define GEN5_VFCOMPONENT_STORE_VID 5
+#define GEN5_VFCOMPONENT_STORE_IID 6
+#define GEN5_VFCOMPONENT_STORE_PID 7
+
+
+
+/* Execution Unit (EU) defines
+ */
+
+#define GEN5_ALIGN_1 0
+#define GEN5_ALIGN_16 1
+
+#define GEN5_ADDRESS_DIRECT 0
+#define GEN5_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define GEN5_CHANNEL_X 0
+#define GEN5_CHANNEL_Y 1
+#define GEN5_CHANNEL_Z 2
+#define GEN5_CHANNEL_W 3
+
+#define GEN5_COMPRESSION_NONE 0
+#define GEN5_COMPRESSION_2NDHALF 1
+#define GEN5_COMPRESSION_COMPRESSED 2
+
+#define GEN5_CONDITIONAL_NONE 0
+#define GEN5_CONDITIONAL_Z 1
+#define GEN5_CONDITIONAL_NZ 2
+#define GEN5_CONDITIONAL_EQ 1 /* Z */
+#define GEN5_CONDITIONAL_NEQ 2 /* NZ */
+#define GEN5_CONDITIONAL_G 3
+#define GEN5_CONDITIONAL_GE 4
+#define GEN5_CONDITIONAL_L 5
+#define GEN5_CONDITIONAL_LE 6
+#define GEN5_CONDITIONAL_C 7
+#define GEN5_CONDITIONAL_O 8
+
+#define GEN5_DEBUG_NONE 0
+#define GEN5_DEBUG_BREAKPOINT 1
+
+#define GEN5_DEPENDENCY_NORMAL 0
+#define GEN5_DEPENDENCY_NOTCLEARED 1
+#define GEN5_DEPENDENCY_NOTCHECKED 2
+#define GEN5_DEPENDENCY_DISABLE 3
+
+#define GEN5_EXECUTE_1 0
+#define GEN5_EXECUTE_2 1
+#define GEN5_EXECUTE_4 2
+#define GEN5_EXECUTE_8 3
+#define GEN5_EXECUTE_16 4
+#define GEN5_EXECUTE_32 5
+
+#define GEN5_HORIZONTAL_STRIDE_0 0
+#define GEN5_HORIZONTAL_STRIDE_1 1
+#define GEN5_HORIZONTAL_STRIDE_2 2
+#define GEN5_HORIZONTAL_STRIDE_4 3
+
+#define GEN5_INSTRUCTION_NORMAL 0
+#define GEN5_INSTRUCTION_SATURATE 1
+
+#define GEN5_MASK_ENABLE 0
+#define GEN5_MASK_DISABLE 1
+
+#define GEN5_OPCODE_MOV 1
+#define GEN5_OPCODE_SEL 2
+#define GEN5_OPCODE_NOT 4
+#define GEN5_OPCODE_AND 5
+#define GEN5_OPCODE_OR 6
+#define GEN5_OPCODE_XOR 7
+#define GEN5_OPCODE_SHR 8
+#define GEN5_OPCODE_SHL 9
+#define GEN5_OPCODE_RSR 10
+#define GEN5_OPCODE_RSL 11
+#define GEN5_OPCODE_ASR 12
+#define GEN5_OPCODE_CMP 16
+#define GEN5_OPCODE_JMPI 32
+#define GEN5_OPCODE_IF 34
+#define GEN5_OPCODE_IFF 35
+#define GEN5_OPCODE_ELSE 36
+#define GEN5_OPCODE_ENDIF 37
+#define GEN5_OPCODE_DO 38
+#define GEN5_OPCODE_WHILE 39
+#define GEN5_OPCODE_BREAK 40
+#define GEN5_OPCODE_CONTINUE 41
+#define GEN5_OPCODE_HALT 42
+#define GEN5_OPCODE_MSAVE 44
+#define GEN5_OPCODE_MRESTORE 45
+#define GEN5_OPCODE_PUSH 46
+#define GEN5_OPCODE_POP 47
+#define GEN5_OPCODE_WAIT 48
+#define GEN5_OPCODE_SEND 49
+#define GEN5_OPCODE_ADD 64
+#define GEN5_OPCODE_MUL 65
+#define GEN5_OPCODE_AVG 66
+#define GEN5_OPCODE_FRC 67
+#define GEN5_OPCODE_RNDU 68
+#define GEN5_OPCODE_RNDD 69
+#define GEN5_OPCODE_RNDE 70
+#define GEN5_OPCODE_RNDZ 71
+#define GEN5_OPCODE_MAC 72
+#define GEN5_OPCODE_MACH 73
+#define GEN5_OPCODE_LZD 74
+#define GEN5_OPCODE_SAD2 80
+#define GEN5_OPCODE_SADA2 81
+#define GEN5_OPCODE_DP4 84
+#define GEN5_OPCODE_DPH 85
+#define GEN5_OPCODE_DP3 86
+#define GEN5_OPCODE_DP2 87
+#define GEN5_OPCODE_DPA2 88
+#define GEN5_OPCODE_LINE 89
+#define GEN5_OPCODE_NOP 126
+
+#define GEN5_PREDICATE_NONE 0
+#define GEN5_PREDICATE_NORMAL 1
+#define GEN5_PREDICATE_ALIGN1_ANYV 2
+#define GEN5_PREDICATE_ALIGN1_ALLV 3
+#define GEN5_PREDICATE_ALIGN1_ANY2H 4
+#define GEN5_PREDICATE_ALIGN1_ALL2H 5
+#define GEN5_PREDICATE_ALIGN1_ANY4H 6
+#define GEN5_PREDICATE_ALIGN1_ALL4H 7
+#define GEN5_PREDICATE_ALIGN1_ANY8H 8
+#define GEN5_PREDICATE_ALIGN1_ALL8H 9
+#define GEN5_PREDICATE_ALIGN1_ANY16H 10
+#define GEN5_PREDICATE_ALIGN1_ALL16H 11
+#define GEN5_PREDICATE_ALIGN16_REPLICATE_X 2
+#define GEN5_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define GEN5_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define GEN5_PREDICATE_ALIGN16_REPLICATE_W 5
+#define GEN5_PREDICATE_ALIGN16_ANY4H 6
+#define GEN5_PREDICATE_ALIGN16_ALL4H 7
+
+#define GEN5_ARCHITECTURE_REGISTER_FILE 0
+#define GEN5_GENERAL_REGISTER_FILE 1
+#define GEN5_MESSAGE_REGISTER_FILE 2
+#define GEN5_IMMEDIATE_VALUE 3
+
+#define GEN5_REGISTER_TYPE_UD 0
+#define GEN5_REGISTER_TYPE_D 1
+#define GEN5_REGISTER_TYPE_UW 2
+#define GEN5_REGISTER_TYPE_W 3
+#define GEN5_REGISTER_TYPE_UB 4
+#define GEN5_REGISTER_TYPE_B 5
+#define GEN5_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define GEN5_REGISTER_TYPE_HF 6
+#define GEN5_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define GEN5_REGISTER_TYPE_F 7
+
+#define GEN5_ARF_NULL 0x00
+#define GEN5_ARF_ADDRESS 0x10
+#define GEN5_ARF_ACCUMULATOR 0x20
+#define GEN5_ARF_FLAG 0x30
+#define GEN5_ARF_MASK 0x40
+#define GEN5_ARF_MASK_STACK 0x50
+#define GEN5_ARF_MASK_STACK_DEPTH 0x60
+#define GEN5_ARF_STATE 0x70
+#define GEN5_ARF_CONTROL 0x80
+#define GEN5_ARF_NOTIFICATION_COUNT 0x90
+#define GEN5_ARF_IP 0xA0
+
+#define GEN5_AMASK 0
+#define GEN5_IMASK 1
+#define GEN5_LMASK 2
+#define GEN5_CMASK 3
+
+
+
+#define GEN5_THREAD_NORMAL 0
+#define GEN5_THREAD_ATOMIC 1
+#define GEN5_THREAD_SWITCH 2
+
+#define GEN5_VERTICAL_STRIDE_0 0
+#define GEN5_VERTICAL_STRIDE_1 1
+#define GEN5_VERTICAL_STRIDE_2 2
+#define GEN5_VERTICAL_STRIDE_4 3
+#define GEN5_VERTICAL_STRIDE_8 4
+#define GEN5_VERTICAL_STRIDE_16 5
+#define GEN5_VERTICAL_STRIDE_32 6
+#define GEN5_VERTICAL_STRIDE_64 7
+#define GEN5_VERTICAL_STRIDE_128 8
+#define GEN5_VERTICAL_STRIDE_256 9
+#define GEN5_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define GEN5_WIDTH_1 0
+#define GEN5_WIDTH_2 1
+#define GEN5_WIDTH_4 2
+#define GEN5_WIDTH_8 3
+#define GEN5_WIDTH_16 4
+
+#define GEN5_STATELESS_BUFFER_BOUNDARY_1K 0
+#define GEN5_STATELESS_BUFFER_BOUNDARY_2K 1
+#define GEN5_STATELESS_BUFFER_BOUNDARY_4K 2
+#define GEN5_STATELESS_BUFFER_BOUNDARY_8K 3
+#define GEN5_STATELESS_BUFFER_BOUNDARY_16K 4
+#define GEN5_STATELESS_BUFFER_BOUNDARY_32K 5
+#define GEN5_STATELESS_BUFFER_BOUNDARY_64K 6
+#define GEN5_STATELESS_BUFFER_BOUNDARY_128K 7
+#define GEN5_STATELESS_BUFFER_BOUNDARY_256K 8
+#define GEN5_STATELESS_BUFFER_BOUNDARY_512K 9
+#define GEN5_STATELESS_BUFFER_BOUNDARY_1M 10
+#define GEN5_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define GEN5_POLYGON_FACING_FRONT 0
+#define GEN5_POLYGON_FACING_BACK 1
+
+#define GEN5_MESSAGE_TARGET_NULL 0
+#define GEN5_MESSAGE_TARGET_MATH 1
+#define GEN5_MESSAGE_TARGET_SAMPLER 2
+#define GEN5_MESSAGE_TARGET_GATEWAY 3
+#define GEN5_MESSAGE_TARGET_DATAPORT_READ 4
+#define GEN5_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define GEN5_MESSAGE_TARGET_URB 6
+#define GEN5_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define GEN5_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define GEN5_SAMPLER_RETURN_FORMAT_UINT32 2
+#define GEN5_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define GEN5_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define GEN5_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define GEN5_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define GEN5_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define GEN5_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define GEN5_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define GEN5_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define GEN5_SAMPLER_MESSAGE_SIMD8_LD 3
+#define GEN5_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define GEN5_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define GEN5_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define GEN5_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define GEN5_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define GEN5_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define GEN5_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define GEN5_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define GEN5_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define GEN5_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define GEN5_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define GEN5_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define GEN5_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define GEN5_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define GEN5_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define GEN5_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define GEN5_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define GEN5_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define GEN5_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define GEN5_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define GEN5_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define GEN5_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define GEN5_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define GEN5_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define GEN5_MATH_FUNCTION_INV 1
+#define GEN5_MATH_FUNCTION_LOG 2
+#define GEN5_MATH_FUNCTION_EXP 3
+#define GEN5_MATH_FUNCTION_SQRT 4
+#define GEN5_MATH_FUNCTION_RSQ 5
+#define GEN5_MATH_FUNCTION_SIN 6 /* was 7 */
+#define GEN5_MATH_FUNCTION_COS 7 /* was 8 */
+#define GEN5_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define GEN5_MATH_FUNCTION_TAN 9
+#define GEN5_MATH_FUNCTION_POW 10
+#define GEN5_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define GEN5_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define GEN5_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define GEN5_MATH_INTEGER_UNSIGNED 0
+#define GEN5_MATH_INTEGER_SIGNED 1
+
+#define GEN5_MATH_PRECISION_FULL 0
+#define GEN5_MATH_PRECISION_PARTIAL 1
+
+#define GEN5_MATH_SATURATE_NONE 0
+#define GEN5_MATH_SATURATE_SATURATE 1
+
+#define GEN5_MATH_DATA_VECTOR 0
+#define GEN5_MATH_DATA_SCALAR 1
+
+#define GEN5_URB_OPCODE_WRITE 0
+
+#define GEN5_URB_SWIZZLE_NONE 0
+#define GEN5_URB_SWIZZLE_INTERLEAVE 1
+#define GEN5_URB_SWIZZLE_TRANSPOSE 2
+
+#define GEN5_SCRATCH_SPACE_SIZE_1K 0
+#define GEN5_SCRATCH_SPACE_SIZE_2K 1
+#define GEN5_SCRATCH_SPACE_SIZE_4K 2
+#define GEN5_SCRATCH_SPACE_SIZE_8K 3
+#define GEN5_SCRATCH_SPACE_SIZE_16K 4
+#define GEN5_SCRATCH_SPACE_SIZE_32K 5
+#define GEN5_SCRATCH_SPACE_SIZE_64K 6
+#define GEN5_SCRATCH_SPACE_SIZE_128K 7
+#define GEN5_SCRATCH_SPACE_SIZE_256K 8
+#define GEN5_SCRATCH_SPACE_SIZE_512K 9
+#define GEN5_SCRATCH_SPACE_SIZE_1M 10
+#define GEN5_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CONST_BUFFER_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT 0x6104
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+#define CMD_VERTEX_BUFFER 0x7808
+#define CMD_VERTEX_ELEMENT 0x7809
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS 0x780b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+/* media pipeline */
+
+#define GEN5_VFE_MODE_GENERIC 0x0
+#define GEN5_VFE_MODE_VLD_MPEG2 0x1
+#define GEN5_VFE_MODE_IS 0x2
+#define GEN5_VFE_MODE_AVC_MC 0x4
+#define GEN5_VFE_MODE_AVC_IT 0x7
+#define GEN5_VFE_MODE_VC1_IT 0xB
+
+#define GEN5_VFE_DEBUG_COUNTER_FREE 0
+#define GEN5_VFE_DEBUG_COUNTER_FROZEN 1
+#define GEN5_VFE_DEBUG_COUNTER_ONCE 2
+#define GEN5_VFE_DEBUG_COUNTER_ALWAYS 3
+
+/* VLD_STATE */
+#define GEN5_MPEG_TOP_FIELD 1
+#define GEN5_MPEG_BOTTOM_FIELD 2
+#define GEN5_MPEG_FRAME 3
+#define GEN5_MPEG_QSCALE_LINEAR 0
+#define GEN5_MPEG_QSCALE_NONLINEAR 1
+#define GEN5_MPEG_ZIGZAG_SCAN 0
+#define GEN5_MPEG_ALTER_VERTICAL_SCAN 1
+#define GEN5_MPEG_I_PICTURE 1
+#define GEN5_MPEG_P_PICTURE 2
+#define GEN5_MPEG_B_PICTURE 3
+
+/* Command packets:
+ */
+struct header
+{
+ unsigned int length:16;
+ unsigned int opcode:16;
+};
+
+
+union header_union
+{
+ struct header bits;
+ unsigned int dword;
+};
+
+struct gen5_3d_control
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int notify_enable:1;
+ unsigned int pad:3;
+ unsigned int wc_flush_enable:1;
+ unsigned int depth_stall_enable:1;
+ unsigned int operation:2;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int pad:2;
+ unsigned int dest_addr_type:1;
+ unsigned int dest_addr:29;
+ } dest;
+
+ unsigned int dword2;
+ unsigned int dword3;
+};
+
+
+struct gen5_3d_primitive
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int pad:2;
+ unsigned int topology:5;
+ unsigned int indexed:1;
+ unsigned int opcode:16;
+ } header;
+
+ unsigned int verts_per_instance;
+ unsigned int start_vert_location;
+ unsigned int instance_count;
+ unsigned int start_instance_location;
+ unsigned int base_vert_location;
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define GEN5_FLUSH_READ_CACHE 0x1
+#define GEN5_FLUSH_STATE_CACHE 0x2
+#define GEN5_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define GEN5_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct gen5_mi_flush
+{
+ unsigned int flags:4;
+ unsigned int pad:12;
+ unsigned int opcode:16;
+};
+
+struct gen5_vf_statistics
+{
+ unsigned int statistics_enable:1;
+ unsigned int pad:15;
+ unsigned int opcode:16;
+};
+
+
+
+struct gen5_binding_table_pointers
+{
+ struct header header;
+ unsigned int vs;
+ unsigned int gs;
+ unsigned int clp;
+ unsigned int sf;
+ unsigned int wm;
+};
+
+
+struct gen5_blend_constant_color
+{
+ struct header header;
+ float blend_constant_color[4];
+};
+
+
+struct gen5_depthbuffer
+{
+ union header_union header;
+
+ union {
+ struct {
+ unsigned int pitch:18;
+ unsigned int format:3;
+ unsigned int pad:4;
+ unsigned int depth_offset_disable:1;
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int pad2:1;
+ unsigned int surface_type:3;
+ } bits;
+ unsigned int dword;
+ } dword1;
+
+ unsigned int dword2_base_addr;
+
+ union {
+ struct {
+ unsigned int pad:1;
+ unsigned int mipmap_layout:1;
+ unsigned int lod:4;
+ unsigned int width:13;
+ unsigned int height:13;
+ } bits;
+ unsigned int dword;
+ } dword3;
+
+ union {
+ struct {
+ unsigned int pad:12;
+ unsigned int min_array_element:9;
+ unsigned int depth:11;
+ } bits;
+ unsigned int dword;
+ } dword4;
+};
+
+struct gen5_drawrect
+{
+ struct header header;
+ unsigned int xmin:16;
+ unsigned int ymin:16;
+ unsigned int xmax:16;
+ unsigned int ymax:16;
+ unsigned int xorg:16;
+ unsigned int yorg:16;
+};
+
+
+
+
+struct gen5_global_depth_offset_clamp
+{
+ struct header header;
+ float depth_offset_clamp;
+};
+
+struct gen5_indexbuffer
+{
+ union {
+ struct
+ {
+ unsigned int length:8;
+ unsigned int index_format:2;
+ unsigned int cut_index_enable:1;
+ unsigned int pad:5;
+ unsigned int opcode:16;
+ } bits;
+ unsigned int dword;
+
+ } header;
+
+ unsigned int buffer_start;
+ unsigned int buffer_end;
+};
+
+
+struct gen5_line_stipple
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int pattern:16;
+ unsigned int pad:16;
+ } bits0;
+
+ struct
+ {
+ unsigned int repeat_count:9;
+ unsigned int pad:7;
+ unsigned int inverse_repeat_count:16;
+ } bits1;
+};
+
+
+struct gen5_pipelined_state_pointers
+{
+ struct header header;
+
+ struct {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } vs;
+
+ struct
+ {
+ unsigned int enable:1;
+ unsigned int pad:4;
+ unsigned int offset:27;
+ } gs;
+
+ struct
+ {
+ unsigned int enable:1;
+ unsigned int pad:4;
+ unsigned int offset:27;
+ } clp;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } sf;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } wm;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27; /* KW: check me! */
+ } cc;
+};
+
+
+struct gen5_polygon_stipple_offset
+{
+ struct header header;
+
+ struct {
+ unsigned int y_offset:5;
+ unsigned int pad:3;
+ unsigned int x_offset:5;
+ unsigned int pad0:19;
+ } bits0;
+};
+
+
+
+struct gen5_polygon_stipple
+{
+ struct header header;
+ unsigned int stipple[32];
+};
+
+
+
+struct gen5_pipeline_select
+{
+ struct
+ {
+ unsigned int pipeline_select:1;
+ unsigned int pad:15;
+ unsigned int opcode:16;
+ } header;
+};
+
+
+struct gen5_pipe_control
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int notify_enable:1;
+ unsigned int pad:2;
+ unsigned int instruction_state_cache_flush_enable:1;
+ unsigned int write_cache_flush_enable:1;
+ unsigned int depth_stall_enable:1;
+ unsigned int post_sync_operation:2;
+
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int pad:2;
+ unsigned int dest_addr_type:1;
+ unsigned int dest_addr:29;
+ } bits1;
+
+ unsigned int data0;
+ unsigned int data1;
+};
+
+
+struct gen5_urb_fence
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int vs_realloc:1;
+ unsigned int gs_realloc:1;
+ unsigned int clp_realloc:1;
+ unsigned int sf_realloc:1;
+ unsigned int vfe_realloc:1;
+ unsigned int cs_realloc:1;
+ unsigned int pad:2;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int vs_fence:10;
+ unsigned int gs_fence:10;
+ unsigned int clp_fence:10;
+ unsigned int pad:2;
+ } bits0;
+
+ struct
+ {
+ unsigned int sf_fence:10;
+ unsigned int vf_fence:10;
+ unsigned int cs_fence:10;
+ unsigned int pad:2;
+ } bits1;
+};
+
+struct gen5_constant_buffer_state /* previously gen5_command_streamer */
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int nr_urb_entries:3;
+ unsigned int pad:1;
+ unsigned int urb_entry_size:5;
+ unsigned int pad0:23;
+ } bits0;
+};
+
+struct gen5_constant_buffer
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int valid:1;
+ unsigned int pad:7;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int buffer_length:6;
+ unsigned int buffer_address:26;
+ } bits0;
+};
+
+struct gen5_state_base_address
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int general_state_address:27;
+ } bits0;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int surface_state_address:27;
+ } bits1;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int indirect_object_state_address:27;
+ } bits2;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:11;
+ unsigned int general_state_upper_bound:20;
+ } bits3;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:11;
+ unsigned int indirect_object_state_upper_bound:20;
+ } bits4;
+};
+
+struct gen5_state_prefetch
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int prefetch_count:3;
+ unsigned int pad:3;
+ unsigned int prefetch_pointer:26;
+ } bits0;
+};
+
+struct gen5_system_instruction_pointer
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int pad:4;
+ unsigned int system_instruction_pointer:28;
+ } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ unsigned int pad0:1;
+ unsigned int grf_reg_count:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer:26;
+};
+
+struct thread1
+{
+ unsigned int ext_halt_exception_enable:1;
+ unsigned int sw_exception_enable:1;
+ unsigned int mask_stack_exception_enable:1;
+ unsigned int timeout_exception_enable:1;
+ unsigned int illegal_op_exception_enable:1;
+ unsigned int pad0:3;
+ unsigned int depth_coef_urb_read_offset:6; /* WM only */
+ unsigned int pad1:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int binding_table_entry_count:8;
+ unsigned int pad3:5;
+ unsigned int single_program_flow:1;
+};
+
+struct thread2
+{
+ unsigned int per_thread_scratch_space:4;
+ unsigned int pad0:6;
+ unsigned int scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ unsigned int dispatch_grf_start_reg:4;
+ unsigned int urb_entry_read_offset:6;
+ unsigned int pad0:1;
+ unsigned int urb_entry_read_length:6;
+ unsigned int pad1:1;
+ unsigned int const_urb_entry_read_offset:6;
+ unsigned int pad2:1;
+ unsigned int const_urb_entry_read_length:6;
+ unsigned int pad3:1;
+};
+
+
+
+struct gen5_clip_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:9;
+ unsigned int gs_output_stats:1; /* not always */
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:6; /* may be less */
+ unsigned int pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned int pad0:13;
+ unsigned int clip_mode:3;
+ unsigned int userclip_enable_flags:8;
+ unsigned int userclip_must_clip:1;
+ unsigned int pad1:1;
+ unsigned int guard_band_enable:1;
+ unsigned int viewport_z_clip_enable:1;
+ unsigned int viewport_xy_clip_enable:1;
+ unsigned int vertex_position_space:1;
+ unsigned int api_mode:1;
+ unsigned int pad2:1;
+ } clip5;
+
+ struct
+ {
+ unsigned int pad0:5;
+ unsigned int clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ float viewport_xmin;
+ float viewport_xmax;
+ float viewport_ymin;
+ float viewport_ymax;
+};
+
+
+
+struct gen5_cc_unit_state
+{
+ struct
+ {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } cc0;
+
+
+ struct
+ {
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ unsigned int stencil_ref:8;
+ } cc1;
+
+
+ struct
+ {
+ unsigned int logicop_enable:1;
+ unsigned int pad0:10;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_function:3;
+ unsigned int depth_test:1;
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct
+ {
+ unsigned int pad0:8;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test:1;
+ unsigned int blend_enable:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int pad1:1;
+ unsigned int alpha_test_format:1;
+ unsigned int pad2:16;
+ } cc3;
+
+ struct
+ {
+ unsigned int pad0:5;
+ unsigned int cc_viewport_state_offset:27;
+ } cc4;
+
+ struct
+ {
+ unsigned int pad0:2;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_src_blend_factor:5;
+ unsigned int ia_blend_function:3;
+ unsigned int statistics_enable:1;
+ unsigned int logicop_func:4;
+ unsigned int pad1:11;
+ unsigned int dither_enable:1;
+ } cc5;
+
+ struct
+ {
+ unsigned int clamp_post_alpha_blend:1;
+ unsigned int clamp_pre_alpha_blend:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:11;
+ unsigned int y_dither_offset:2;
+ unsigned int x_dither_offset:2;
+ unsigned int dest_blend_factor:5;
+ unsigned int src_blend_factor:5;
+ unsigned int blend_function:3;
+ } cc6;
+
+ struct {
+ union {
+ float f;
+ unsigned char ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+
+
+struct gen5_sf_unit_state
+{
+ struct thread0 thread0;
+ struct {
+ unsigned int pad0:7;
+ unsigned int sw_exception_enable:1;
+ unsigned int pad1:3;
+ unsigned int mask_stack_exception_enable:1;
+ unsigned int pad2:1;
+ unsigned int illegal_op_exception_enable:1;
+ unsigned int pad3:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int binding_table_entry_count:8;
+ unsigned int pad4:5;
+ unsigned int single_program_flow:1;
+ } sf1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:6;
+ unsigned int pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned int front_winding:1;
+ unsigned int viewport_transform:1;
+ unsigned int pad0:3;
+ unsigned int sf_viewport_state_offset:27;
+ } sf5;
+
+ struct
+ {
+ unsigned int pad0:9;
+ unsigned int dest_org_vbias:4;
+ unsigned int dest_org_hbias:4;
+ unsigned int scissor:1;
+ unsigned int disable_2x2_trifilter:1;
+ unsigned int disable_zero_pix_trifilter:1;
+ unsigned int point_rast_rule:2;
+ unsigned int line_endcap_aa_region_width:2;
+ unsigned int line_width:4;
+ unsigned int fast_scissor_disable:1;
+ unsigned int cull_mode:2;
+ unsigned int aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ unsigned int point_size:11;
+ unsigned int use_point_size_state:1;
+ unsigned int subpixel_precision:1;
+ unsigned int sprite_point:1;
+ unsigned int pad0:11;
+ unsigned int trifan_pv:2;
+ unsigned int linestrip_pv:2;
+ unsigned int tristrip_pv:2;
+ unsigned int line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+
+struct gen5_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:1;
+ unsigned int pad3:6;
+ } thread4;
+
+ struct
+ {
+ unsigned int sampler_count:3;
+ unsigned int pad0:2;
+ unsigned int sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ unsigned int max_vp_index:4;
+ unsigned int pad0:26;
+ unsigned int reorder_enable:1;
+ unsigned int pad1:1;
+ } gs6;
+};
+
+
+struct gen5_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:4;
+ unsigned int pad3:3;
+ } thread4;
+
+ struct
+ {
+ unsigned int sampler_count:3;
+ unsigned int pad0:2;
+ unsigned int sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ unsigned int vs_enable:1;
+ unsigned int vert_cache_disable:1;
+ unsigned int pad0:30;
+ } vs6;
+};
+
+
+struct gen5_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ unsigned int stats_enable:1;
+ unsigned int pad0:1;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ unsigned int enable_8_pix:1;
+ unsigned int enable_16_pix:1;
+ unsigned int enable_32_pix:1;
+ unsigned int pad0:7;
+ unsigned int legacy_global_depth_bias:1;
+ unsigned int line_stipple:1;
+ unsigned int depth_offset:1;
+ unsigned int polygon_stipple:1;
+ unsigned int line_aa_region_width:2;
+ unsigned int line_endcap_aa_region_width:2;
+ unsigned int early_depth_test:1;
+ unsigned int thread_dispatch_enable:1;
+ unsigned int program_uses_depth:1;
+ unsigned int program_computes_depth:1;
+ unsigned int program_uses_killpixel:1;
+ unsigned int legacy_line_rast: 1;
+ unsigned int transposed_urb_read:1;
+ unsigned int max_threads:7;
+ } wm5;
+
+ float global_depth_offset_constant;
+ float global_depth_offset_scale;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_1:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_2:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_3:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_3:26;
+ } wm10;
+};
+
+struct gen5_wm_unit_state_padded {
+ struct gen5_wm_unit_state state;
+ char pad[64 - sizeof(struct gen5_wm_unit_state)];
+};
+
+/* The hardware supports two different modes for border color. The
+ * default (OpenGL) mode uses floating-point color channels, while the
+ * legacy mode uses 4 bytes.
+ *
+ * More significantly, the legacy mode respects the components of the
+ * border color for channels not present in the source, (whereas the
+ * default mode will ignore the border color's alpha channel and use
+ * alpha==1 for an RGB source, for example).
+ *
+ * The legacy mode matches the semantics specified by the Render
+ * extension.
+ */
+struct gen5_sampler_default_border_color {
+ float color[4];
+};
+
+struct gen5_sampler_legacy_border_color {
+ uint8_t color[4];
+};
+
+struct gen5_sampler_state
+{
+
+ struct
+ {
+ unsigned int shadow_function:3;
+ unsigned int lod_bias:11;
+ unsigned int min_filter:3;
+ unsigned int mag_filter:3;
+ unsigned int mip_filter:2;
+ unsigned int base_level:5;
+ unsigned int pad:1;
+ unsigned int lod_preclamp:1;
+ unsigned int border_color_mode:1;
+ unsigned int pad0:1;
+ unsigned int disable:1;
+ } ss0;
+
+ struct
+ {
+ unsigned int r_wrap_mode:3;
+ unsigned int t_wrap_mode:3;
+ unsigned int s_wrap_mode:3;
+ unsigned int pad:3;
+ unsigned int max_lod:10;
+ unsigned int min_lod:10;
+ } ss1;
+
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int border_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ unsigned int pad:19;
+ unsigned int max_aniso:3;
+ unsigned int chroma_key_mode:1;
+ unsigned int chroma_key_index:2;
+ unsigned int chroma_key_enable:1;
+ unsigned int monochrome_filter_width:3;
+ unsigned int monochrome_filter_height:3;
+ } ss3;
+};
+
+
+struct gen5_clipper_viewport
+{
+ float xmin;
+ float xmax;
+ float ymin;
+ float ymax;
+};
+
+struct gen5_cc_viewport
+{
+ float min_depth;
+ float max_depth;
+};
+
+struct gen5_sf_viewport
+{
+ struct {
+ float m00;
+ float m11;
+ float m22;
+ float m30;
+ float m31;
+ float m32;
+ } viewport;
+
+ struct {
+ short xmin;
+ short ymin;
+ short xmax;
+ short ymax;
+ } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct gen5_surface_state
+{
+ struct {
+ unsigned int cube_pos_z:1;
+ unsigned int cube_neg_z:1;
+ unsigned int cube_pos_y:1;
+ unsigned int cube_neg_y:1;
+ unsigned int cube_pos_x:1;
+ unsigned int cube_neg_x:1;
+ unsigned int pad:3;
+ unsigned int render_cache_read_mode:1;
+ unsigned int mipmap_layout_mode:1;
+ unsigned int vert_line_stride_ofs:1;
+ unsigned int vert_line_stride:1;
+ unsigned int color_blend:1;
+ unsigned int writedisable_blue:1;
+ unsigned int writedisable_green:1;
+ unsigned int writedisable_red:1;
+ unsigned int writedisable_alpha:1;
+ unsigned int surface_format:9;
+ unsigned int data_return_format:1;
+ unsigned int pad0:1;
+ unsigned int surface_type:3;
+ } ss0;
+
+ struct {
+ unsigned int base_addr;
+ } ss1;
+
+ struct {
+ unsigned int render_target_rotation:2;
+ unsigned int mip_count:4;
+ unsigned int width:13;
+ unsigned int height:13;
+ } ss2;
+
+ struct {
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int pad:1;
+ unsigned int pitch:18;
+ unsigned int depth:11;
+ } ss3;
+
+ struct {
+ unsigned int pad:19;
+ unsigned int min_array_elt:9;
+ unsigned int min_lod:4;
+ } ss4;
+
+ struct {
+ unsigned int pad:20;
+ unsigned int y_offset:4;
+ unsigned int pad2:1;
+ unsigned int x_offset:7;
+ } ss5;
+};
+
+
+
+struct gen5_vertex_buffer_state
+{
+ struct {
+ unsigned int pitch:11;
+ unsigned int pad:15;
+ unsigned int access_type:1;
+ unsigned int vb_index:5;
+ } vb0;
+
+ unsigned int start_addr;
+ unsigned int max_index;
+#if 1
+ unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define GEN5_VBP_MAX 17
+
+struct gen5_vb_array_state {
+ struct header header;
+ struct gen5_vertex_buffer_state vb[GEN5_VBP_MAX];
+};
+
+
+struct gen5_vertex_element_state
+{
+ struct
+ {
+ unsigned int src_offset:11;
+ unsigned int pad:5;
+ unsigned int src_format:9;
+ unsigned int pad0:1;
+ unsigned int valid:1;
+ unsigned int vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ unsigned int dst_offset:8;
+ unsigned int pad:8;
+ unsigned int vfcomponent3:4;
+ unsigned int vfcomponent2:4;
+ unsigned int vfcomponent1:4;
+ unsigned int vfcomponent0:4;
+ } ve1;
+};
+
+#define GEN5_VEP_MAX 18
+
+struct gen5_vertex_element_packet {
+ struct header header;
+ struct gen5_vertex_element_state ve[GEN5_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct gen5_urb_immediate {
+ unsigned int opcode:4;
+ unsigned int offset:6;
+ unsigned int swizzle_control:2;
+ unsigned int pad:1;
+ unsigned int allocate:1;
+ unsigned int used:1;
+ unsigned int complete:1;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct gen5_instruction
+{
+ struct
+ {
+ unsigned int opcode:7;
+ unsigned int pad:1;
+ unsigned int access_mode:1;
+ unsigned int mask_control:1;
+ unsigned int dependency_control:2;
+ unsigned int compression_control:2;
+ unsigned int thread_control:2;
+ unsigned int predicate_control:4;
+ unsigned int predicate_inverse:1;
+ unsigned int execution_size:3;
+ unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+ unsigned int pad0:2;
+ unsigned int debug_control:1;
+ unsigned int saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int src1_reg_file:2;
+ unsigned int src1_reg_type:3;
+ unsigned int pad:1;
+ unsigned int dest_subreg_nr:5;
+ unsigned int dest_reg_nr:8;
+ unsigned int dest_horiz_stride:2;
+ unsigned int dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int pad:6;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ unsigned int dest_horiz_stride:2;
+ unsigned int dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int src1_reg_file:2;
+ unsigned int src1_reg_type:3;
+ unsigned int pad0:1;
+ unsigned int dest_writemask:4;
+ unsigned int dest_subreg_nr:1;
+ unsigned int dest_reg_nr:8;
+ unsigned int pad1:2;
+ unsigned int dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int pad0:6;
+ unsigned int dest_writemask:4;
+ int dest_indirect_offset:6;
+ unsigned int dest_subreg_nr:3;
+ unsigned int pad1:2;
+ unsigned int dest_address_mode:1;
+ } ia16;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ unsigned int src0_subreg_nr:5;
+ unsigned int src0_reg_nr:8;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_horiz_stride:2;
+ unsigned int src0_width:3;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad:6;
+ } da1;
+
+ struct
+ {
+ int src0_indirect_offset:10;
+ unsigned int src0_subreg_nr:3;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_horiz_stride:2;
+ unsigned int src0_width:3;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad:6;
+ } ia1;
+
+ struct
+ {
+ unsigned int src0_swz_x:2;
+ unsigned int src0_swz_y:2;
+ unsigned int src0_subreg_nr:1;
+ unsigned int src0_reg_nr:8;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_swz_z:2;
+ unsigned int src0_swz_w:2;
+ unsigned int pad0:1;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } da16;
+
+ struct
+ {
+ unsigned int src0_swz_x:2;
+ unsigned int src0_swz_y:2;
+ int src0_indirect_offset:6;
+ unsigned int src0_subreg_nr:3;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_swz_z:2;
+ unsigned int src0_swz_w:2;
+ unsigned int pad0:1;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } ia16;
+
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ unsigned int src1_subreg_nr:5;
+ unsigned int src1_reg_nr:8;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad:1;
+ unsigned int src1_horiz_stride:2;
+ unsigned int src1_width:3;
+ unsigned int src1_vert_stride:4;
+ unsigned int pad0:7;
+ } da1;
+
+ struct
+ {
+ unsigned int src1_swz_x:2;
+ unsigned int src1_swz_y:2;
+ unsigned int src1_subreg_nr:1;
+ unsigned int src1_reg_nr:8;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_swz_z:2;
+ unsigned int src1_swz_w:2;
+ unsigned int pad1:1;
+ unsigned int src1_vert_stride:4;
+ unsigned int pad2:7;
+ } da16;
+
+ struct
+ {
+ int src1_indirect_offset:10;
+ unsigned int src1_subreg_nr:3;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_horiz_stride:2;
+ unsigned int src1_width:3;
+ unsigned int src1_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } ia1;
+
+ struct
+ {
+ unsigned int src1_swz_x:2;
+ unsigned int src1_swz_y:2;
+ int src1_indirect_offset:6;
+ unsigned int src1_subreg_nr:3;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_swz_z:2;
+ unsigned int src1_swz_w:2;
+ unsigned int pad1:1;
+ unsigned int src1_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ int jump_count:16; /* note: signed */
+ unsigned int pop_count:4;
+ unsigned int pad0:12;
+ } if_else;
+
+ struct {
+ unsigned int function:4;
+ unsigned int int_type:1;
+ unsigned int precision:1;
+ unsigned int saturate:1;
+ unsigned int data_type:1;
+ unsigned int pad0:8;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } math;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int sampler:4;
+ unsigned int return_format:2;
+ unsigned int msg_type:2;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } sampler;
+
+ struct gen5_urb_immediate urb;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int msg_control:4;
+ unsigned int msg_type:2;
+ unsigned int target_cache:2;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } dp_read;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int msg_control:3;
+ unsigned int pixel_scoreboard_clear:1;
+ unsigned int msg_type:3;
+ unsigned int send_commit_msg:1;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } dp_write;
+
+ struct {
+ unsigned int pad:16;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } generic;
+
+ unsigned int ud;
+ } bits3;
+};
+
+/* media pipeline */
+
+struct gen5_vfe_state {
+ struct {
+ unsigned int per_thread_scratch_space:4;
+ unsigned int pad3:3;
+ unsigned int extend_vfe_state_present:1;
+ unsigned int pad2:2;
+ unsigned int scratch_base:22;
+ } vfe0;
+
+ struct {
+ unsigned int debug_counter_control:2;
+ unsigned int children_present:1;
+ unsigned int vfe_mode:4;
+ unsigned int pad2:2;
+ unsigned int num_urb_entries:7;
+ unsigned int urb_entry_alloc_size:9;
+ unsigned int max_threads:7;
+ } vfe1;
+
+ struct {
+ unsigned int pad4:4;
+ unsigned int interface_descriptor_base:28;
+ } vfe2;
+};
+
+struct gen5_vld_state {
+ struct {
+ unsigned int pad6:6;
+ unsigned int scan_order:1;
+ unsigned int intra_vlc_format:1;
+ unsigned int quantizer_scale_type:1;
+ unsigned int concealment_motion_vector:1;
+ unsigned int frame_predict_frame_dct:1;
+ unsigned int top_field_first:1;
+ unsigned int picture_structure:2;
+ unsigned int intra_dc_precision:2;
+ unsigned int f_code_0_0:4;
+ unsigned int f_code_0_1:4;
+ unsigned int f_code_1_0:4;
+ unsigned int f_code_1_1:4;
+ } vld0;
+
+ struct {
+ unsigned int pad2:9;
+ unsigned int picture_coding_type:2;
+ unsigned int pad:21;
+ } vld1;
+
+ struct {
+ unsigned int index_0:4;
+ unsigned int index_1:4;
+ unsigned int index_2:4;
+ unsigned int index_3:4;
+ unsigned int index_4:4;
+ unsigned int index_5:4;
+ unsigned int index_6:4;
+ unsigned int index_7:4;
+ } desc_remap_table0;
+
+ struct {
+ unsigned int index_8:4;
+ unsigned int index_9:4;
+ unsigned int index_10:4;
+ unsigned int index_11:4;
+ unsigned int index_12:4;
+ unsigned int index_13:4;
+ unsigned int index_14:4;
+ unsigned int index_15:4;
+ } desc_remap_table1;
+};
+
+struct gen5_interface_descriptor {
+ struct {
+ unsigned int grf_reg_blocks:4;
+ unsigned int pad:2;
+ unsigned int kernel_start_pointer:26;
+ } desc0;
+
+ struct {
+ unsigned int pad:7;
+ unsigned int software_exception:1;
+ unsigned int pad2:3;
+ unsigned int maskstack_exception:1;
+ unsigned int pad3:1;
+ unsigned int illegal_opcode_exception:1;
+ unsigned int pad4:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int single_program_flow:1;
+ unsigned int pad5:1;
+ unsigned int const_urb_entry_read_offset:6;
+ unsigned int const_urb_entry_read_len:6;
+ } desc1;
+
+ struct {
+ unsigned int pad:2;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } desc2;
+
+ struct {
+ unsigned int binding_table_entry_count:5;
+ unsigned int binding_table_pointer:27;
+ } desc3;
+};
+
+struct gen6_blend_state
+{
+ struct {
+ unsigned int dest_blend_factor:5;
+ unsigned int source_blend_factor:5;
+ unsigned int pad3:1;
+ unsigned int blend_func:3;
+ unsigned int pad2:1;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_source_blend_factor:5;
+ unsigned int pad1:1;
+ unsigned int ia_blend_func:3;
+ unsigned int pad0:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int blend_enable:1;
+ } blend0;
+
+ struct {
+ unsigned int post_blend_clamp_enable:1;
+ unsigned int pre_blend_clamp_enable:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:4;
+ unsigned int x_dither_offset:2;
+ unsigned int y_dither_offset:2;
+ unsigned int dither_enable:1;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test_enable:1;
+ unsigned int pad1:1;
+ unsigned int logic_op_func:4;
+ unsigned int logic_op_enable:1;
+ unsigned int pad2:1;
+ unsigned int write_disable_b:1;
+ unsigned int write_disable_g:1;
+ unsigned int write_disable_r:1;
+ unsigned int write_disable_a:1;
+ unsigned int pad3:1;
+ unsigned int alpha_to_coverage_dither:1;
+ unsigned int alpha_to_one:1;
+ unsigned int alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state
+{
+ struct {
+ unsigned int alpha_test_format:1;
+ unsigned int pad0:14;
+ unsigned int round_disable:1;
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_ref:8;
+ } cc0;
+
+ union {
+ float alpha_ref_f;
+ struct {
+ unsigned int ui:8;
+ unsigned int pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ float constant_r;
+ float constant_g;
+ float constant_b;
+ float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;