summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Packard <keithp@keithp.com>2008-05-05 12:05:00 -0700
committerKeith Packard <keithp@keithp.com>2008-05-05 12:05:00 -0700
commit65877d32bb2cd71ccc8d6a66244f029fc3577f69 (patch)
tree158de49aae01ee22c5efcc0cc73eafa55317e6fb
parentb9d12da91eb1afb05bd78a40e15f4333e697093d (diff)
parentfff17b9d1b58cb53032d153094826dd306836d59 (diff)
Merge branch 'master' into drm-gem
-rw-r--r--acinclude.m414
-rw-r--r--configure.ac15
-rw-r--r--src/Makefile.am2
-rw-r--r--src/exa_wm.g4i16
-rw-r--r--src/exa_wm_projective.g4i2
-rw-r--r--src/exa_wm_src_sample_planar.g4a11
-rw-r--r--src/exa_wm_src_sample_planar.g4b4
-rw-r--r--src/exa_wm_write.g4a10
-rw-r--r--src/exa_wm_write.g4b10
-rw-r--r--src/exa_wm_yuv_rgb.g4a44
-rw-r--r--src/exa_wm_yuv_rgb.g4b24
-rw-r--r--src/i830.h1
-rw-r--r--src/i830_accel.c3
-rw-r--r--src/i830_debug.c557
-rw-r--r--src/i830_display.c6
-rw-r--r--src/i830_driver.c18
-rw-r--r--src/i830_lvds.c5
-rw-r--r--src/i830_video.c24
-rw-r--r--src/i965_render.c1005
-rw-r--r--src/i965_video.c35
20 files changed, 1208 insertions, 598 deletions
diff --git a/acinclude.m4 b/acinclude.m4
index 17b36d8a..254b3529 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -8,8 +8,7 @@ dnl To use dolt, invoke the DOLT macro immediately after the libtool macros.
dnl Optionally, copy this file into acinclude.m4, to avoid the need to have it
dnl installed when running autoconf on your project.
dnl
-dnl git snapshot: 198a3026b347b9220a2f2e2ae23a3049c35af262
-
+dnl git snapshot: d91f2b4e9041538400e2703a2a6fbeecdb8ee27d
AC_DEFUN([DOLT], [
AC_REQUIRE([AC_CANONICAL_HOST])
# dolt, a replacement for libtool
@@ -27,11 +26,13 @@ if test x$GCC != xyes; then
fi
case $host in
i?86-*-linux*|x86_64-*-linux*|powerpc-*-linux*) ;;
-amd64-*-freebsd*|i386-*-freebsd*|ia64-*-freebsd*) ;;
+amd64-*-freebsd*|i?86-*-freebsd*|ia64-*-freebsd*) ;;
*) dolt_supported=no ;;
esac
if test x$dolt_supported = xno ; then
AC_MSG_RESULT([no, falling back to libtool])
+ LTCOMPILE='$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(COMPILE)'
+ LTCXXCOMPILE='$(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXXCOMPILE)'
else
AC_MSG_RESULT([yes, replacing libtool])
@@ -65,9 +66,10 @@ dnl Write out shared compilation code.
cat <<'__DOLTCOMPILE__EOF__' >>doltcompile
libobjdir="${obj%$objbase}.libs"
if test ! -d "$libobjdir" ; then
- mkdir -p "$libobjdir"
+ mkdir_out="$(mkdir "$libobjdir" 2>&1)"
mkdir_ret=$?
if test "$mkdir_ret" -ne 0 && test ! -d "$libobjdir" ; then
+ echo "$mkdir_out" 1>&2
exit $mkdir_ret
fi
fi
@@ -130,9 +132,9 @@ __DOLTCOMPILE__EOF__
dnl Done writing out doltcompile; substitute it for libtool compilation.
chmod +x doltcompile
LTCOMPILE='$(top_builddir)/doltcompile $(COMPILE)'
- AC_SUBST(LTCOMPILE)
LTCXXCOMPILE='$(top_builddir)/doltcompile $(CXXCOMPILE)'
- AC_SUBST(LTCXXCOMPILE)
fi
+AC_SUBST(LTCOMPILE)
+AC_SUBST(LTCXXCOMPILE)
# end dolt
])
diff --git a/configure.ac b/configure.ac
index f203d658..00f075e8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -44,6 +44,9 @@ AM_PROG_CC_C_O
AC_CHECK_PROG(gen4asm, [intel-gen4asm], yes, no)
AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
+AC_CHECK_HEADERS(sys/mman.h)
+AC_CHECK_FUNCS(mprotect)
+
AH_TOP([#include "xorg-server.h"])
AC_ARG_WITH(xorg-module-dir,
@@ -171,6 +174,18 @@ fi
AC_SUBST([XMODES_CFLAGS])
+SAVE_CPPFLAGS="$CPPFLAGS"
+CPPFLAGS="$CPPFLAGS $XORG_CFLAGS"
+
+AC_CHECK_DECL(xf86RotateFreeShadow,
+ [AC_DEFINE(HAVE_FREE_SHADOW, 1, [have new FreeShadow API])],
+ [],
+ [#include <xorg-server.h>
+ #include <windowstr.h>
+ #include <xf86Crtc.h>])
+
+CPPFLAGS="$SAVE_CPPFLAGS"
+
dnl Use lots of warning flags with GCC
WARN_CFLAGS=""
diff --git a/src/Makefile.am b/src/Makefile.am
index 91f59954..0784c064 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -71,8 +71,6 @@ INTEL_XVMC_SRCS = \
intel_drv_la_SOURCES = \
brw_defines.h \
brw_structs.h \
- sf_prog.h \
- wm_prog.h \
common.h \
i2c_vid.h \
i810_accel.c \
diff --git a/src/exa_wm.g4i b/src/exa_wm.g4i
index ee8e3ad0..5d3d45b1 100644
--- a/src/exa_wm.g4i
+++ b/src/exa_wm.g4i
@@ -103,23 +103,39 @@ define(`mask_w_1', `src_w_1')
/* sample src to these registers */
define(`src_sample_base', `g14')
+
+define(`src_sample_r', `g14')
define(`src_sample_r_01', `g14')
define(`src_sample_r_23', `g15')
+
+define(`src_sample_g', `g16')
define(`src_sample_g_01', `g16')
define(`src_sample_g_23', `g17')
+
+define(`src_sample_b', `g18')
define(`src_sample_b_01', `g18')
define(`src_sample_b_23', `g19')
+
+define(`src_sample_a', `g20')
define(`src_sample_a_01', `g20')
define(`src_sample_a_23', `g21')
/* sample mask to these registers */
define(`mask_sample_base', `g22')
+
+define(`mask_sample_r', `g22')
define(`mask_sample_r_01', `g22')
define(`mask_sample_r_23', `g23')
+
+define(`mask_sample_g', `g24')
define(`mask_sample_g_01', `g24')
define(`mask_sample_g_23', `g25')
+
+define(`mask_sample_b', `g26')
define(`mask_sample_b_01', `g26')
define(`mask_sample_b_23', `g27')
+
+define(`mask_sample_a', `g28')
define(`mask_sample_a_01', `g28')
define(`mask_sample_a_23', `g29')
diff --git a/src/exa_wm_projective.g4i b/src/exa_wm_projective.g4i
index 3c3bbf0c..7e2e0a82 100644
--- a/src/exa_wm_projective.g4i
+++ b/src/exa_wm_projective.g4i
@@ -32,7 +32,7 @@ mul (16) temp_y<1>F dst_y<8,8,1>F dw_dy { compr align1 };
add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 };
add (16) temp_x<1>F temp_x<8,8,1>F wo { compr align1 };
send (8) 0 w_0<1>F temp_x_0<8,8,1>F math inv mlen 1 rlen 1 { align1 };
-send (8) 0 w_1<1>F temp_x_1<8,8,1>F math inv mlen 1 rlen 1 { align1 };
+send (8) 0 w_1<1>F temp_x_1<8,8,1>F math inv mlen 1 rlen 1 { sechalf align1 };
/********** Compute u *************/
diff --git a/src/exa_wm_src_sample_planar.g4a b/src/exa_wm_src_sample_planar.g4a
index 10b15ebc..ca77b484 100644
--- a/src/exa_wm_src_sample_planar.g4a
+++ b/src/exa_wm_src_sample_planar.g4a
@@ -42,7 +42,7 @@ mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable };
/* sample Y */
send (16) src_msg_ind /* msg reg index */
- src_sample_g_01<1>UW /* readback */
+ src_sample_g<1>UW /* readback */
g0<8,8,1>UW /* copy to msg start reg*/
sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
/* here(src->dst) we should use src_sampler and src_surface */
@@ -50,17 +50,16 @@ send (16) src_msg_ind /* msg reg index */
/* sample U (Cr) */
send (16) src_msg_ind /* msg reg index */
- src_sample_r_01<1>UW /* readback */
+ src_sample_r<1>UW /* readback */
g0<8,8,1>UW /* copy to msg start reg*/
- sampler (2,1,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype)
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
/* sample V (Cb) */
send (16) src_msg_ind /* msg reg index */
- src_sample_b_01<1>UW /* readback */
+ src_sample_b<1>UW /* readback */
g0<8,8,1>UW /* copy to msg start reg*/
- sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ sampler (5,4,F) /* sampler message description, (binding_table,sampler_index,datatype)
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
-
diff --git a/src/exa_wm_src_sample_planar.g4b b/src/exa_wm_src_sample_planar.g4b
index d2b9cfe5..77a5c234 100644
--- a/src/exa_wm_src_sample_planar.g4b
+++ b/src/exa_wm_src_sample_planar.g4b
@@ -1,4 +1,4 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x01800031, 0x22001d29, 0x008d0000, 0x02520001 },
- { 0x01800031, 0x21c01d29, 0x008d0000, 0x02520102 },
- { 0x01800031, 0x22401d29, 0x008d0000, 0x02520203 },
+ { 0x01800031, 0x21c01d29, 0x008d0000, 0x02520203 },
+ { 0x01800031, 0x22401d29, 0x008d0000, 0x02520405 },
diff --git a/src/exa_wm_write.g4a b/src/exa_wm_write.g4a
index b16e6497..faee80b3 100644
--- a/src/exa_wm_write.g4a
+++ b/src/exa_wm_write.g4a
@@ -40,13 +40,13 @@ mov (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F { align1 };
mov (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F { align1 };
mov (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F { align1 };
-mov (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F { align1 };
-mov (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F { align1 };
-mov (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F { align1 };
-mov (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F { align1 };
+mov (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F { sechalf align1 };
+mov (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F { sechalf align1 };
+mov (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F { sechalf align1 };
+mov (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F { sechalf align1 };
/* m0, m1 are all direct passed by PS thread payload */
-mov (8) data_port_msg_1<1>UD g1<8,8,1>UD { align1 };
+mov (8) data_port_msg_1<1>UD g1<8,8,1>UD { mask_disable align1 };
/* write */
send (16)
diff --git a/src/exa_wm_write.g4b b/src/exa_wm_write.g4b
index 785fe321..92e7b248 100644
--- a/src/exa_wm_write.g4b
+++ b/src/exa_wm_write.g4b
@@ -2,11 +2,11 @@
{ 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
{ 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
- { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
- { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
- { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
- { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
- { 0x00600001, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00601001, 0x20c003be, 0x008d01e0, 0x00000000 },
+ { 0x00601001, 0x20e003be, 0x008d0220, 0x00000000 },
+ { 0x00601001, 0x210003be, 0x008d0260, 0x00000000 },
+ { 0x00601001, 0x212003be, 0x008d02a0, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
{ 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_yuv_rgb.g4a b/src/exa_wm_yuv_rgb.g4a
index 327a8083..4fb2576a 100644
--- a/src/exa_wm_yuv_rgb.g4a
+++ b/src/exa_wm_yuv_rgb.g4a
@@ -30,27 +30,27 @@ include(`exa_wm.g4i')
define(`YCbCr_base', `src_sample_base')
-define(`Cr', `src_sample_r_01')
+define(`Cr', `src_sample_r')
define(`Cr_01', `src_sample_r_01')
define(`Cr_23', `src_sample_r_23')
-define(`Y', `src_sample_g_01')
+define(`Y', `src_sample_g')
define(`Y_01', `src_sample_g_01')
define(`Y_23', `src_sample_g_23')
-define(`Cb', `src_sample_b_01')
+define(`Cb', `src_sample_b')
define(`Cb_01', `src_sample_b_01')
define(`Cb_23', `src_sample_b_23')
-define(`Crn', `mask_sample_r_01')
+define(`Crn', `mask_sample_r')
define(`Crn_01', `mask_sample_r_01')
define(`Crn_23', `mask_sample_r_23')
-define(`Yn', `mask_sample_g_01')
+define(`Yn', `mask_sample_g')
define(`Yn_01', `mask_sample_g_01')
define(`Yn_23', `mask_sample_g_23')
-define(`Cbn', `mask_sample_b_01')
+define(`Cbn', `mask_sample_b')
define(`Cbn_01', `mask_sample_b_01')
define(`Cbn_23', `mask_sample_b_23')
@@ -58,14 +58,6 @@ define(`Cbn_23', `mask_sample_b_23')
* R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1)
* G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1)
* B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1)
- *
- * Y is g14, g15.
- * Cr is g12, g13.
- * Cb is g16, g17.
- *
- * R is g2, g6.
- * G is g3, g7.
- * B is g4, g8.
*/
/* Normalize Y, Cb and Cr:
@@ -84,31 +76,23 @@ add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 };
/*
* R = Y + Cr * 1.596
*/
-mul (8) null Crn_01<8,8,1>F 1.596F { align1 };
-mac.sat (8) src_sample_r_01<1>F Yn_01<8,8,1>F 1F { align1 };
-mul (8) null Crn_23<8,8,1>F 1.596F { align1 };
-mac.sat (8) src_sample_r_23<1>F Yn_23<8,8,1>F 1F { align1 };
+mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
+mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 };
/*
* G = Crn * -0.813 + Cbn * -0.392 + Y
*/
-mul (8) null Crn_01<8,8,1>F -0.813F { align1 };
-mac (8) null Cbn_01<8,8,1>F -0.392F { align1 };
-mac.sat (8) src_sample_g_01<1>F Yn_01<8,8,1>F 1F { align1 };
-mul (8) null Crn_23<8,8,1>F -0.813F { align1 };
-mac (8) null Cbn_23<8,8,1>F -0.392F { align1 };
-mac.sat (8) src_sample_g_23<1>F Yn_23<8,8,1>F 1F { align1 };
+mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
+mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 };
+mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 };
/*
* B = Cbn * 2.017 + Y
*/
-mul (8) null Cbn_01<8,8,1>F 2.017F { align1 };
-mac.sat (8) src_sample_b_01<1>F Yn_01<8,8,1>F 1F { align1 };
-mul (8) null Cbn_23<8,8,1>F 2.017F { align1 };
-mac.sat (8) src_sample_b_23<1>F Yn_23<8,8,1>F 1F { align1 };
+mov (16) acc0<1>F Yn<8,8,1>F { compr align1 };
+mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 };
/*
* A = 1.0
*/
-mov (8) src_sample_a_01<1>F 1.0F { align1 };
-mov (8) src_sample_a_23<1>F 1.0F { align1 };
+mov (16) src_sample_a<1>F 1.0F { compr align1 };
diff --git a/src/exa_wm_yuv_rgb.g4b b/src/exa_wm_yuv_rgb.g4b
index be72e549..01f6e2b2 100644
--- a/src/exa_wm_yuv_rgb.g4b
+++ b/src/exa_wm_yuv_rgb.g4b
@@ -2,19 +2,11 @@
{ 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
{ 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
{ 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 },
- { 0x00600041, 0x20007fbc, 0x008d02c0, 0x3fcc49ba },
- { 0x80600048, 0x21c07fbd, 0x008d0300, 0x3f800000 },
- { 0x00600041, 0x20007fbc, 0x008d02e0, 0x3fcc49ba },
- { 0x80600048, 0x21e07fbd, 0x008d0320, 0x3f800000 },
- { 0x00600041, 0x20007fbc, 0x008d02c0, 0xbf5020c5 },
- { 0x00600048, 0x20007fbc, 0x008d0340, 0xbec8b439 },
- { 0x80600048, 0x22007fbd, 0x008d0300, 0x3f800000 },
- { 0x00600041, 0x20007fbc, 0x008d02e0, 0xbf5020c5 },
- { 0x00600048, 0x20007fbc, 0x008d0360, 0xbec8b439 },
- { 0x80600048, 0x22207fbd, 0x008d0320, 0x3f800000 },
- { 0x00600041, 0x20007fbc, 0x008d0340, 0x40011687 },
- { 0x80600048, 0x22407fbd, 0x008d0300, 0x3f800000 },
- { 0x00600041, 0x20007fbc, 0x008d0360, 0x40011687 },
- { 0x80600048, 0x22607fbd, 0x008d0320, 0x3f800000 },
- { 0x00600001, 0x228003fd, 0x00000000, 0x3f800000 },
- { 0x00600001, 0x22a003fd, 0x00000000, 0x3f800000 },
+ { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+ { 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
+ { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+ { 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
+ { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
+ { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+ { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },
+ { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 },
diff --git a/src/i830.h b/src/i830.h
index 8bc0af4f..6cce0438 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -831,7 +831,6 @@ Bool i915_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMask,
unsigned int gen4_render_state_size(ScrnInfoPtr pScrn);
void gen4_render_state_init(ScrnInfoPtr pScrn);
void gen4_render_state_cleanup(ScrnInfoPtr pScrn);
-void gen4_render_state_reset(ScrnInfoPtr pScrn);
Bool i965_check_composite(int op, PicturePtr pSrc, PicturePtr pMask,
PicturePtr pDst);
Bool i965_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMask,
diff --git a/src/i830_accel.c b/src/i830_accel.c
index 0194f00d..953a73bc 100644
--- a/src/i830_accel.c
+++ b/src/i830_accel.c
@@ -205,9 +205,6 @@ I830Sync(ScrnInfoPtr pScrn)
pI830->LpRing->space = pI830->LpRing->mem->size - 8;
pI830->nextColorExpandBuf = 0;
-
- if (IS_I965G(pI830))
- gen4_render_state_reset(pScrn);
}
void
diff --git a/src/i830_debug.c b/src/i830_debug.c
index 15b02ceb..074e8b9c 100644
--- a/src/i830_debug.c
+++ b/src/i830_debug.c
@@ -927,13 +927,544 @@ void i830DumpRegs (ScrnInfoPtr pScrn)
#ifndef REG_DUMPER
-#define NUM_RING_DUMP 64
+static char *mi_cmds[0x40] = {
+ "MI_NOOP", /* 00 */
+ "Reserved 01",
+ "MI_USER_INTERRUPT",
+ "MI_WAIT_FOR_EVENT",
+
+ "MI_FLUSH", /* 04 */
+ "MI_ARB_CHECK",
+ NULL,
+ "MI_REPORT_HEAD",
+
+ NULL, /* 08 */
+ NULL,
+ "MI_BATCH_BUFFER_END",
+ NULL,
+
+ NULL, /* 0c */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 10 */
+ "MI_OVERLAY_FLIP",
+ "MI_LOAD_SCAN_LINES_INCL",
+ "MI_LOAD_SCAN_LINES_EXCL",
+
+ "MI_DISPLAY_BUFFER_INFO", /* 14 */
+ NULL,
+ NULL,
+ NULL,
+
+ "MI_SET_CONTEXT", /* 18 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 1c */
+ NULL,
+ NULL,
+ NULL,
+
+ "MI_STORE_DATA_IMM", /* 20 */
+ "MI_STORE_DATA_INDEX",
+ "MI_LOAD_REGISTER_IMM",
+ NULL,
+
+ "MI_STORE_REGISTER_MEM", /* 24 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 28 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 2c */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 30 */
+ "MI_BATCH_BUFFER_START",
+ NULL,
+ NULL,
+
+ NULL, /* 34 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 38 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 3c */
+ NULL,
+ NULL,
+ NULL,
+};
+
+static char *_2d_cmds[0x80] = {
+ NULL, /* 00 */
+ "XY_SETUP_BLT",
+ NULL,
+ "XY_SETUP_CLIP_BLT",
+
+ NULL, /* 04 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 08 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 0c */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 10 */
+ "XY_SETUP_MONO_PATTERN_SL_BLT",
+ NULL,
+ NULL,
+
+ NULL, /* 14 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 18 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 1c */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 20 */
+ NULL,
+ NULL,
+ NULL,
+
+ "XY_PIXEL_BLT", /* 24 */
+ "XY_SCANLINE_BLT",
+ "XY_TEXT_BLT",
+ NULL,
+
+ NULL, /* 28 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 2c */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 30 */
+ "XY_TEXT_IMMEDIATE_BLT",
+ NULL,
+ NULL,
+
+ NULL, /* 34 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 38 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 3c */
+ NULL,
+ NULL,
+ NULL,
+
+ "COLOR_BLT", /* 40 */
+ NULL,
+ NULL,
+ "SRC_COPY_BLT",
+
+ NULL, /* 44 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 48 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 4c */
+ NULL,
+ NULL,
+ NULL,
+
+ "XY_COLOR_BLT", /* 50 */
+ "XY_PAT_BLT",
+ "XY_MONO_PAT_BLT",
+ "XY_SRC_COPY_BLT",
+
+ "XY_MONO_SRC_COPY_BLT", /* 54 */
+ "XY_FULL_BLT",
+ "XY_FULL_MONO_SRC_BLT",
+ "XY_FULL_MONO_PATTERN_BLT",
+
+ "XY_FULL_MONO_PATTERN_MONO_SRC_BLT", /* 58 */
+ "XY_MONO_PAT_FIXED_BLT",
+ NULL,
+ NULL,
+
+ NULL, /* 5c */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 60 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 64 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 68 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 6c */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 70 */
+ "XY_MONO_SRC_COPY_IMMEDIATE_BLT",
+ "XY_PAT_BLT_IMMEDIATE",
+ "XY_SRC_COPY_CHROMA_BLT",
+
+ "XY_FULL_IMMEDIATE_PATTERN_BLT", /* 74 */
+ "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT",
+ "XY_PAT_CHROMA_BLT",
+ "XY_PAT_CHROMA_BLT_IMMEDIATE",
+
+ NULL, /* 78 */
+ NULL,
+ NULL,
+ NULL,
+
+ NULL, /* 7c */
+ NULL,
+ NULL,
+ NULL,
+
+};
+
+#define _3D_ONE_WORD 1
+
+static struct {
+ char *name;
+ int flags;
+} _3d_cmds[0x4][0x8][0x100] = {
+ { /* Pipeline Type 00 (Common) */
+ { /* Opcode 0 */
+ { "URB_FENCE", 0 }, /* 00 */
+ { "CS_URB_STATE", 0 },
+ { "CONSTANT_BUFFER", 0 },
+ { "STATE_PREFETCH", 0 },
+ },
+ { /* Opcode 1 */
+ { NULL, 0 }, /* 00 */
+ { "STATE_BASE_ADDRESS", 0 },
+ { "STATE_SIP", 0 },
+ { NULL, 0 },
+
+ { "PIPELINE_SELECT", _3D_ONE_WORD }, /* 04 */
+ },
+ },
+ { /* Pipeline Type 01 (Single DW) */
+ { /* Opcode 0 */
+ },
+ { /* Opcode 1 */
+ { NULL, 0 }, /* 00 */
+ { NULL, 0 },
+ { NULL, 0 },
+ { NULL, 0 },
+
+ { "PIPELINE_SELECT", 0 }, /* 04 */
+ { NULL, 0 },
+ { NULL, 0 },
+ { NULL, 0 },
+ },
+ },
+ { /* Pipeline Type 02 (Media) */
+ { /* Opcode 0 */
+ { "MEDIA_STATE_POINTERS", 0 }, /* 00 */
+ },
+ { /* Opcode 1 */
+ { "MEDIA_OBJECT", 0 }, /* 00 */
+ { "MEDIA_OBJECT_EX", 0 },
+ { "MEDIA_OBJECT_PTR", 0 },
+ },
+ },
+ { /* Pipeline Type 03 (3D) */
+ { /* Opcode 0 */
+ { "3DSTATE_PIPELINED_POINTERS", 0 }, /* 00 */
+ { "3DSTATE_BINDING_TABLE_POINTERS", 0 },
+ { NULL, 0 },
+ { NULL, 0 },
+
+ { NULL, 0 }, /* 04 */
+ { "3DSTATE_URB", 0 },
+ { NULL, 0 },
+ { NULL, 0 },
+
+ { "3DSTATE_VERTEX_BUFFERS", 0 }, /* 08 */
+ { "3DSTATE_VERTEX_ELEMENTS", 0 },
+ { "3DSTATE_INDEX_BUFFER", 0 },
+ { "3DSTATE_VF_STATISTICS", _3D_ONE_WORD },
+
+ { NULL, 0 }, /* 0c */
+ { "3DSTATE_VIEWPORT_STATE_POINTERS", 0 },
+ },
+ { /* Opcode 1 */
+ { "3DSTATE_DRAWING_RECTANGLE", 0 }, /* 00 */
+ { "3DSTATE_CONSTANT_COLOR", 0 },
+ { "3DSTATE_SAMPLER_PALETTE_LOAD0", 0 },
+ { NULL, 0 },
+
+ { "3DSTATE_CHROMA_KEY", 0 }, /* 04 */
+ { "3DSTATE_DEPTH_BUFFER", 0 },
+ { "3DSTATE_POLY_STIPPLE_OFFSET", 0 },
+ { "3DSTATE_POLY_STIPPLE_PATTERN", 0 },
+
+ { "3DSTATE_LINE_STIPPLE", 0 }, /* 08 */
+ { "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP", 0 },
+ },
+ { /* Opcode 2 */
+ { "PIPE_CONTROL", 0 }, /* 00 */
+ },
+ { /* Opcode 3 */
+ { "3DPRIMITIVE", 0 }, /* 00 */
+ },
+ },
+};
+
+static int
+i830_valid_command (uint32_t cmd)
+{
+ uint32_t type = (cmd >> 29) & 0x7;
+ uint32_t pipeline_type;
+ uint32_t opcode;
+ uint32_t subopcode;
+ uint32_t count;
+
+ switch (type) {
+ case 0: /* Memory Interface */
+ opcode = (cmd >> 23) & 0x3f;
+ if (opcode < 0x10)
+ count = 1;
+ else
+ count = (cmd & 0x3f) + 2;
+ if (opcode == 0x00 && cmd != 0x00000000)
+ return -1;
+ if (!mi_cmds[opcode])
+ return -1;
+ break;
+ case 1:
+ break;
+ case 2: /* 2D */
+ count = (cmd & 0x1f) + 2;
+ opcode = (cmd >> 22) & 0x7f;
+ if (!_2d_cmds[opcode])
+ return -1;
+ break;
+ case 3: /* 3D */
+ pipeline_type = (cmd >> 27) & 0x3;
+ opcode = (cmd >> 24) & 0x7;
+ subopcode = (cmd >> 16) & 0xff;
+ if (_3d_cmds[pipeline_type][opcode][subopcode].flags & _3D_ONE_WORD)
+ count = 1;
+ else
+ count = (cmd & 0xff) + 2;
+ if (pipeline_type <= 3)
+ return count;
+ if (!_3d_cmds[pipeline_type][opcode][subopcode].name)
+ return -1;
+ break;
+ default:
+ return -1;
+ }
+ return count;
+}
+
+static int
+i830_dump_cmd (uint32_t cmd, int count)
+{
+ uint32_t type = (cmd >> 29) & 0x7;
+ uint32_t pipeline_type;
+ uint32_t opcode;
+ uint32_t subopcode;
+ int ret = 1;
+
+ ErrorF ("\t");
+ switch (type) {
+ case 0: /* Memory Interface */
+ opcode = (cmd >> 23) & 0x3f;
+ if (mi_cmds[opcode])
+ ErrorF ("%-40.40s %d\n", mi_cmds[opcode], count);
+ else
+ ErrorF ("Memory Interface Reserved\n");
+ break;
+ case 1:
+ break;
+ case 2: /* 2D */
+ opcode = (cmd >> 22) & 0x7f;
+ if (_2d_cmds[opcode])
+ ErrorF ("%-40.40s %d\n", _2d_cmds[opcode], count);
+ else
+ ErrorF ("2D Reserved\n");
+ break;
+ case 3: /* 3D */
+ pipeline_type = (cmd >> 27) & 0x3;
+ opcode = (cmd >> 24) & 0x7;
+ subopcode = (cmd >> 16) & 0xff;
+ if (_3d_cmds[pipeline_type][opcode][subopcode].name) {
+ ErrorF ("%-40.40s %d\n",
+ _3d_cmds[pipeline_type][opcode][subopcode].name,
+ count);
+ } else {
+ ErrorF ("3D/Media Reserved (pipe %d op %d sub %d)\n", pipeline_type, opcode, subopcode);
+ }
+ break;
+ default:
+ ErrorF ("Reserved\n");
+ break;
+ }
+ return ret;
+}
+
+static int
+i830_valid_chain (ScrnInfoPtr pScrn, unsigned int ring, unsigned int end)
+{
+ I830Ptr pI830 = I830PTR(pScrn);
+ unsigned int head, tail, mask;
+ volatile unsigned char *virt;
+ uint32_t data;
+ int count;
+ volatile uint32_t *ptr;
+
+ head = (INREG (LP_RING + RING_HEAD)) & I830_HEAD_MASK;
+ tail = INREG (LP_RING + RING_TAIL) & I830_TAIL_MASK;
+ mask = pI830->LpRing->tail_mask;
+
+ virt = pI830->LpRing->virtual_start;
+ ErrorF ("Ring at virtual %p head 0x%x tail 0x%x count %d\n",
+ virt, head, tail, (((tail + mask + 1) - head) & mask) >> 2);
+
+ for (;;)
+ {
+ ptr = (volatile uint32_t *) (virt + ring);
+ data = *ptr;
+ count = i830_valid_command (data);
+ if (count < 0)
+ return 0;
+ while (count > 0 && ring != end)
+ {
+ ring = (ring + 4) & mask;
+ count--;
+ }
+ if (ring == end) {
+ if (count == 0)
+ return 1;
+ else
+ return 0;
+ }
+ }
+}
static void
-i830_dump_ring(ScrnInfoPtr pScrn)
+i830_dump_cmds (ScrnInfoPtr pScrn,
+ volatile unsigned char *virt,
+ uint32_t start,
+ uint32_t stop,
+ uint32_t mask,
+ uint32_t acthd)
+{
+ I830Ptr pI830 = I830PTR(pScrn);
+ uint32_t ring = start;
+ uint32_t cmd = start;
+ uint32_t data;
+ uint32_t batch_start_mask = ((0x7 << 29) |
+ (0x3f << 23) |
+ (0x7ff << 12) |
+ (1 << 11) |
+ (1 << 7) |
+ (1 << 6) |
+ (0x3f << 0));
+ uint32_t batch_start_cmd = ((0x0 << 29) |
+ (0x31 << 23) |
+ (0x00 << 12) |
+ (0 << 11) |
+ (1 << 7) |
+ (0 << 6) |
+ (0 << 0));
+ int count;
+ volatile uint32_t *ptr;
+
+ while (ring != stop)
+ {
+ if (ring == acthd)
+ ErrorF ("****");
+ ErrorF ("\t%08x: %08x", ring, *(volatile unsigned int *) (virt + ring));
+ if (ring == cmd)
+ {
+ ptr = (volatile uint32_t *) (virt + ring);
+ data = *ptr;
+ count = i830_valid_command (data);
+ i830_dump_cmd (data, count);
+
+ /* check for MI_BATCH_BUFFER_END */
+ if (data == (0x0a << 23))
+ stop = (ring + 4) & mask;
+ /* check for MI_BATCH_BUFFER_START */
+ if ((data & batch_start_mask) == batch_start_cmd)
+ {
+ uint32_t batch = ptr[1];
+ if (batch < pI830->FbMapSize) {
+ ErrorF ("\t%08x: %08x\n", (ring + 4) & mask, batch);
+ ErrorF ("Batch buffer at 0x%08x {\n", batch);
+ i830_dump_cmds (pScrn, pI830->FbBase, batch,
+ pI830->FbMapSize - batch,
+ 0xffffffff, acthd);
+ ErrorF ("}\n");
+ ring = (ring + (count - 1) * 4) & mask;
+ }
+ }
+ cmd = (cmd + count * 4) & mask;
+ } else
+ ErrorF ("\n");
+ ring = (ring + 4) & mask;
+ }
+}
+
+static void
+i830_dump_ring(ScrnInfoPtr pScrn, uint32_t acthd)
{
I830Ptr pI830 = I830PTR(pScrn);
- unsigned int head, tail, ring, mask;
+ unsigned int head, tail, mask, cmd;
volatile unsigned char *virt;
head = (INREG (LP_RING + RING_HEAD)) & I830_HEAD_MASK;
@@ -943,11 +1474,18 @@ i830_dump_ring(ScrnInfoPtr pScrn)
virt = pI830->LpRing->virtual_start;
ErrorF ("Ring at virtual %p head 0x%x tail 0x%x count %d\n",
virt, head, tail, (((tail + mask + 1) - head) & mask) >> 2);
- for (ring = (head - 128) & mask; ring != ((head + 4) & mask);
- ring = (ring + 4) & mask)
+
+ /* walk back by instructions */
+ for (cmd = (head - 256) & mask;
+ cmd != (head & mask);
+ cmd = (cmd + 4) & mask)
{
- ErrorF ("\t%08x: %08x\n", ring, *(volatile unsigned int *) (virt + ring));
+ if (i830_valid_chain (pScrn, cmd, (head & mask)))
+ break;
}
+
+ i830_dump_cmds (pScrn, virt, cmd, head, mask, acthd);
+
ErrorF ("Ring end\n");
}
@@ -980,13 +1518,14 @@ i830_dump_error_state(ScrnInfoPtr pScrn)
ErrorF("hwstam: 0x%04x ier: 0x%04x imr: 0x%04x iir: 0x%04x\n",
INREG16(HWSTAM), INREG16(IER), INREG16(IMR), INREG16(IIR));
- i830_dump_ring (pScrn);
+ i830_dump_ring (pScrn, 0);
}
void
i965_dump_error_state(ScrnInfoPtr pScrn)
{
I830Ptr pI830 = I830PTR(pScrn);
+ uint32_t acthd;
ErrorF("pgetbl_ctl: 0x%08x pgetbl_err: 0x%08x\n",
INREG(PGETBL_CTL), INREG(PGE_ERR));
@@ -1016,8 +1555,9 @@ i965_dump_error_state(ScrnInfoPtr pScrn)
"imr: 0x%08x iir: 0x%08x\n",
INREG(HWSTAM), INREG(IER), INREG(IMR), INREG(IIR));
+ acthd = INREG(ACTHD);
ErrorF("acthd: 0x%08x dma_fadd_p: 0x%08x\n",
- INREG(ACTHD), INREG(DMA_FADD_P));
+ acthd, INREG(DMA_FADD_P));
ErrorF("ecoskpd: 0x%08x excc: 0x%08x\n",
INREG(ECOSKPD), INREG(EXCC));
@@ -1062,6 +1602,7 @@ i965_dump_error_state(ScrnInfoPtr pScrn)
INREG(TS_DEBUG_DATA));
ErrorF("TD_CTL 0x%08x / 0x%08x\n",
INREG(TD_CTL), INREG(TD_CTL2));
+ i830_dump_ring (pScrn, acthd);
}
/**
diff --git a/src/i830_display.c b/src/i830_display.c
index 6ac9f9e7..1122721a 100644
--- a/src/i830_display.c
+++ b/src/i830_display.c
@@ -1718,8 +1718,10 @@ i830_crtc_clock_get(ScrnInfoPtr pScrn, xf86CrtcPtr crtc)
return 0;
}
- /* XXX: Handle the 100Mhz refclk */
- i9xx_clock(96000, &clock);
+ if ((dpll & PLL_REF_INPUT_MASK) == PLLB_REF_INPUT_SPREADSPECTRUMIN)
+ i9xx_clock(100000, &clock);
+ else
+ i9xx_clock(96000, &clock);
} else {
Bool is_lvds = (pipe == 1) && (INREG(LVDS) & LVDS_PORT_EN);
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 63fc8cb9..aca01dbb 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -197,6 +197,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "i830_debug.h"
#include "i830_bios.h"
#include "i830_video.h"
+#if HAVE_SYS_MMAN_H && HAVE_MPROTECT
+#include <sys/mman.h>
+#endif
#ifdef INTEL_XVMC
#define _INTEL_XVMC_SERVER_
@@ -685,6 +688,13 @@ I830MapMem(ScrnInfoPtr pScrn)
err = pci_device_map_range (device, pI830->LinearAddr, pI830->FbMapSize,
PCI_DEV_MAP_FLAG_WRITABLE | PCI_DEV_MAP_FLAG_WRITE_COMBINE,
(void **) &pI830->FbBase);
+ if (err)
+ return FALSE;
+ /* KLUDGE ALERT -- rewrite the PTEs to turn off the CD and WT bits */
+#if HAVE_MPROTECT
+ mprotect (pI830->FbBase, pI830->FbMapSize, PROT_NONE);
+ mprotect (pI830->FbBase, pI830->FbMapSize, PROT_READ|PROT_WRITE);
+#endif
#else
pI830->FbBase = xf86MapPciMem(pScrn->scrnIndex, VIDMEM_FRAMEBUFFER,
pI830->PciTag,
@@ -3112,7 +3122,7 @@ I830FreeScreen(int scrnIndex, int flags)
#ifdef INTEL_XVMC
ScrnInfoPtr pScrn = xf86Screens[scrnIndex];
I830Ptr pI830 = I830PTR(pScrn);
- if (pI830->XvMCEnabled)
+ if (pI830 && pI830->XvMCEnabled)
intel_xvmc_finish(xf86Screens[scrnIndex]);
#endif
I830FreeRec(xf86Screens[scrnIndex]);
@@ -3126,7 +3136,9 @@ I830LeaveVT(int scrnIndex, int flags)
ScrnInfoPtr pScrn = xf86Screens[scrnIndex];
I830Ptr pI830 = I830PTR(pScrn);
xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn);
+#ifndef HAVE_FREE_SHADOW
int o;
+#endif
DPRINTF(PFX, "Leave VT\n");
@@ -3154,6 +3166,7 @@ I830LeaveVT(int scrnIndex, int flags)
}
#endif
+#ifndef HAVE_FREE_SHADOW
for (o = 0; o < config->num_crtc; o++) {
xf86CrtcPtr crtc = config->crtc[o];
@@ -3164,6 +3177,9 @@ I830LeaveVT(int scrnIndex, int flags)
crtc->rotatedData = NULL;
}
}
+#else
+ xf86RotateFreeShadow(pScrn);
+#endif
xf86_hide_cursors (pScrn);
diff --git a/src/i830_lvds.c b/src/i830_lvds.c
index a5004b4b..1562c217 100644
--- a/src/i830_lvds.c
+++ b/src/i830_lvds.c
@@ -1366,9 +1366,10 @@ i830_lvds_init(ScrnInfoPtr pScrn)
/*
* Default to filling the whole screen if the mode is less than the
- * native size, without breaking aspect ratio.
+ * native size. (Change default to origin FULL mode, i8xx can only work
+ * in that mode for now.)
*/
- dev_priv->fitting_mode = FULL_ASPECT;
+ dev_priv->fitting_mode = FULL;
return;
diff --git a/src/i830_video.c b/src/i830_video.c
index 24372886..e1095781 100644
--- a/src/i830_video.c
+++ b/src/i830_video.c
@@ -2060,6 +2060,9 @@ i830_display_video(ScrnInfoPtr pScrn, xf86CrtcPtr crtc,
switch (id) {
case FOURCC_YV12:
case FOURCC_I420:
+#ifdef INTEL_XVMC
+ case FOURCC_XVMC:
+#endif
OVERLAY_DEBUG("YUV420\n");
#if 0
/* set UV vertical phase to -0.25 */
@@ -2074,7 +2077,6 @@ i830_display_video(ScrnInfoPtr pScrn, xf86CrtcPtr crtc,
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
- default:
OVERLAY_DEBUG("YUV422\n");
overlay->OSTRIDE = dstPitch;
OCMD &= ~SOURCE_FORMAT;
@@ -2315,6 +2317,9 @@ I830PutImage(ScrnInfoPtr pScrn,
switch (destId) {
case FOURCC_YV12:
case FOURCC_I420:
+#ifdef INTEL_XVMC
+ case FOURCC_XVMC:
+#endif
if (pPriv->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
dstPitch = ((height / 2) + pitchAlignMask) & ~pitchAlignMask;
size = dstPitch * width * 3;
@@ -2325,7 +2330,7 @@ I830PutImage(ScrnInfoPtr pScrn,
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
- default:
+
if (pPriv->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
dstPitch = ((height << 1) + pitchAlignMask) & ~pitchAlignMask;
size = dstPitch * width;
@@ -2334,6 +2339,10 @@ I830PutImage(ScrnInfoPtr pScrn,
size = dstPitch * height;
}
break;
+ default:
+ dstPitch = 0;
+ size = 0;
+ break;
}
#if 0
ErrorF("srcPitch: %d, dstPitch: %d, size: %d\n", srcPitch, dstPitch, size);
@@ -2413,11 +2422,16 @@ I830PutImage(ScrnInfoPtr pScrn,
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
- default:
nlines = ((y2 + 0xffff) >> 16) - top;
I830CopyPackedData(pScrn, pPriv, buf, srcPitch, dstPitch, top, left,
nlines, npixels);
break;
+#ifdef INTEL_XVMC
+ case FOURCC_XVMC:
+ break;
+#endif
+ default:
+ break;
}
if (pDraw->type == DRAWABLE_WINDOW) {
@@ -2427,13 +2441,13 @@ I830PutImage(ScrnInfoPtr pScrn,
}
#ifdef I830_USE_EXA
- if (pI830->useEXA) {
+ if (pPriv->textured && pI830->useEXA) {
/* Force the pixmap into framebuffer so we can draw to it. */
exaMoveInPixmap(pPixmap);
}
#endif
- if (!pI830->useEXA &&
+ if (pPriv->textured && !pI830->useEXA &&
(((char *)pPixmap->devPrivate.ptr < (char *)pI830->FbBase) ||
((char *)pPixmap->devPrivate.ptr >= (char *)pI830->FbBase +
pI830->FbMapSize))) {
diff --git a/src/i965_render.c b/src/i965_render.c
index 1a3237bb..3c553de1 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -124,12 +124,6 @@ static struct formatinfo i965_tex_formats[] = {
{PICT_a8, BRW_SURFACEFORMAT_A8_UNORM },
};
-/** Private data for gen4 render accel implementation. */
-struct gen4_render_state {
- unsigned char *state_addr;
- unsigned int state_offset;
-};
-
static void i965_get_blend_cntl(int op, PicturePtr pMask, uint32_t dst_format,
uint32_t *sblend, uint32_t *dblend)
{
@@ -265,37 +259,23 @@ i965_check_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
-static int urb_vs_start, urb_vs_size;
-static int urb_gs_start, urb_gs_size;
-static int urb_clip_start, urb_clip_size;
-static int urb_sf_start, urb_sf_size;
-static int urb_cs_start, urb_cs_size;
-
-static struct brw_surface_state *dest_surf_state, dest_surf_state_local;
-static struct brw_surface_state *src_surf_state, src_surf_state_local;
-static struct brw_surface_state *mask_surf_state, mask_surf_state_local;
-static struct brw_sampler_state *src_sampler_state, src_sampler_state_local;
-static struct brw_sampler_state *mask_sampler_state, mask_sampler_state_local;
-
-static struct brw_vs_unit_state *vs_state, vs_state_local;
-static struct brw_sf_unit_state *sf_state, sf_state_local;
-static struct brw_wm_unit_state *wm_state, wm_state_local;
-
-static uint32_t *binding_table;
-static int binding_table_entries;
-
-static int dest_surf_offset, src_surf_offset, mask_surf_offset;
-static int src_sampler_offset, mask_sampler_offset,vs_offset;
-static int sf_offset, wm_offset, cc_offset, vb_offset;
-static int wm_scratch_offset;
-static int binding_table_offset;
-static int next_offset, total_state_size;
-static char *state_base;
-static int state_base_offset;
-static float *vb;
-static int vb_size = (2 + 3 + 3) * 3 * 4; /* (dst, src, mask) 3 vertices, 4 bytes */
-
-static uint32_t src_blend, dst_blend;
+/* Set up a default static partitioning of the URB, which is supposed to
+ * allow anything we would want to do, at potentially lower performance.
+ */
+#define URB_CS_ENTRY_SIZE 0
+#define URB_CS_ENTRIES 0
+
+#define URB_VS_ENTRY_SIZE 1 // each 512-bit row
+#define URB_VS_ENTRIES 8 // we needs at least 8 entries
+
+#define URB_GS_ENTRY_SIZE 0
+#define URB_GS_ENTRIES 0
+
+#define URB_CLIP_ENTRY_SIZE 0
+#define URB_CLIP_ENTRIES 0
+
+#define URB_SF_ENTRY_SIZE 2
+#define URB_SF_ENTRIES 1
static const uint32_t sip_kernel_static[][4] = {
/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */
@@ -422,6 +402,12 @@ static const uint32_t ps_kernel_masknoca_projective_static [][4] = {
#define KERNEL_DECL(template) \
uint32_t template [((sizeof (template ## _static) + 63) & ~63) / 16][4];
+#define WM_STATE_DECL(kernel) \
+ struct brw_wm_unit_state wm_state_ ## kernel[SAMPLER_STATE_FILTER_COUNT] \
+ [SAMPLER_STATE_EXTEND_COUNT] \
+ [SAMPLER_STATE_FILTER_COUNT] \
+ [SAMPLER_STATE_EXTEND_COUNT]
+
/* Many of the fields in the state structure must be aligned to a
* 64-byte boundary, (or a 32-byte boundary, but 64 is good enough for
* those too).
@@ -429,11 +415,28 @@ static const uint32_t ps_kernel_masknoca_projective_static [][4] = {
#define PAD64_MULTI(previous, idx, factor) char previous ## _pad ## idx [(64 - (sizeof(struct previous) * (factor)) % 64) % 64]
#define PAD64(previous, idx) PAD64_MULTI(previous, idx, 1)
+typedef enum {
+ SAMPLER_STATE_FILTER_NEAREST,
+ SAMPLER_STATE_FILTER_BILINEAR,
+ SAMPLER_STATE_FILTER_COUNT
+} sampler_state_filter_t;
+
+typedef enum {
+ SAMPLER_STATE_EXTEND_NONE,
+ SAMPLER_STATE_EXTEND_REPEAT,
+ SAMPLER_STATE_EXTEND_COUNT
+} sampler_state_extend_t;
+
typedef struct _brw_cc_unit_state_padded {
struct brw_cc_unit_state state;
char pad[64 - sizeof (struct brw_cc_unit_state)];
} brw_cc_unit_state_padded;
+typedef struct brw_surface_state_padded {
+ struct brw_surface_state state;
+ char pad[32 - sizeof (struct brw_surface_state)];
+} brw_surface_state_padded;
+
/**
* Gen4 rendering state buffer structure.
*
@@ -445,6 +448,8 @@ typedef struct _brw_cc_unit_state_padded {
* the rest.
*/
typedef struct _gen4_state {
+ uint8_t wm_scratch[128 * PS_MAX_THREADS];
+
KERNEL_DECL (sip_kernel);
KERNEL_DECL (sf_kernel);
KERNEL_DECL (sf_kernel_mask);
@@ -457,6 +462,35 @@ typedef struct _gen4_state {
KERNEL_DECL (ps_kernel_masknoca_affine);
KERNEL_DECL (ps_kernel_masknoca_projective);
+ struct brw_vs_unit_state vs_state;
+ PAD64 (brw_vs_unit_state, 0);
+
+ struct brw_sf_unit_state sf_state;
+ PAD64 (brw_sf_unit_state, 0);
+ struct brw_sf_unit_state sf_state_mask;
+ PAD64 (brw_sf_unit_state, 1);
+
+ WM_STATE_DECL (nomask_affine);
+ WM_STATE_DECL (nomask_projective);
+ WM_STATE_DECL (maskca_affine);
+ WM_STATE_DECL (maskca_projective);
+ WM_STATE_DECL (maskca_srcalpha_affine);
+ WM_STATE_DECL (maskca_srcalpha_projective);
+ WM_STATE_DECL (masknoca_affine);
+ WM_STATE_DECL (masknoca_projective);
+
+ uint32_t binding_table[128];
+
+ struct brw_surface_state_padded surface_state[32];
+
+ /* Index by [src_filter][src_extend][mask_filter][mask_extend]. Two of
+ * the structs happen to add to 32 bytes.
+ */
+ struct brw_sampler_state sampler_state[SAMPLER_STATE_FILTER_COUNT]
+ [SAMPLER_STATE_EXTEND_COUNT]
+ [SAMPLER_STATE_FILTER_COUNT]
+ [SAMPLER_STATE_EXTEND_COUNT][2];
+
struct brw_sampler_default_color sampler_default_color;
PAD64 (brw_sampler_default_color, 0);
@@ -466,9 +500,105 @@ typedef struct _gen4_state {
struct brw_cc_viewport cc_viewport;
PAD64 (brw_cc_viewport, 0);
- uint8_t other_state[65536];
+ float vb[(2 + 3 + 3) * 3]; /* (dst, src, mask) 3 vertices, 4 bytes */
} gen4_state_t;
+/** Private data for gen4 render accel implementation. */
+struct gen4_render_state {
+ gen4_state_t *card_state;
+ uint32_t card_state_offset;
+
+ int binding_table_index;
+ int surface_state_index;
+};
+
+/**
+ * Sets up the SF state pointing at an SF kernel.
+ *
+ * The SF kernel does coord interp: for each attribute,
+ * calculate dA/dx and dA/dy. Hand these interpolation coefficients
+ * back to SF which then hands pixels off to WM.
+ */
+static void
+sf_state_init (struct brw_sf_unit_state *sf_state, int kernel_offset)
+{
+ memset(sf_state, 0, sizeof(*sf_state));
+ sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
+ sf_state->sf1.single_program_flow = 1;
+ sf_state->sf1.binding_table_entry_count = 0;
+ sf_state->sf1.thread_priority = 0;
+ sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
+ sf_state->sf1.illegal_op_exception_enable = 1;
+ sf_state->sf1.mask_stack_exception_enable = 1;
+ sf_state->sf1.sw_exception_enable = 1;
+ sf_state->thread2.per_thread_scratch_space = 0;
+ /* scratch space is not used in our kernel */
+ sf_state->thread2.scratch_space_base_pointer = 0;
+ sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
+ sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
+ sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
+ /* don't smash vertex header, read start from dw8 */
+ sf_state->thread3.urb_entry_read_offset = 1;
+ sf_state->thread3.dispatch_grf_start_reg = 3;
+ sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
+ sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
+ sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
+ sf_state->thread4.stats_enable = 1;
+ sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
+ sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
+ sf_state->sf6.scissor = 0;
+ sf_state->sf7.trifan_pv = 2;
+ sf_state->sf6.dest_org_vbias = 0x8;
+ sf_state->sf6.dest_org_hbias = 0x8;
+
+ assert((kernel_offset & 63) == 0);
+ sf_state->thread0.kernel_start_pointer = kernel_offset >> 6;
+}
+
+static void
+sampler_state_init (struct brw_sampler_state *sampler_state,
+ sampler_state_filter_t filter,
+ sampler_state_extend_t extend,
+ int default_color_offset)
+{
+ /* PS kernel use this sampler */
+ memset(sampler_state, 0, sizeof(*sampler_state));
+
+ sampler_state->ss0.lod_preclamp = 1; /* GL mode */
+ sampler_state->ss0.default_color_mode = 0; /* GL mode */
+
+ switch(filter) {
+ default:
+ case SAMPLER_STATE_FILTER_NEAREST:
+ sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+ sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+ break;
+ case SAMPLER_STATE_FILTER_BILINEAR:
+ sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+ break;
+ }
+
+ switch (extend) {
+ default:
+ case SAMPLER_STATE_EXTEND_NONE:
+ sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ break;
+ case SAMPLER_STATE_EXTEND_REPEAT:
+ sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ break;
+ }
+
+ assert((default_color_offset & 31) == 0);
+ sampler_state->ss2.default_color_pointer = default_color_offset >> 5;
+
+ sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
+}
+
static void
cc_state_init (struct brw_cc_unit_state *cc_state,
int src_blend,
@@ -504,16 +634,67 @@ cc_state_init (struct brw_cc_unit_state *cc_state,
cc_state->cc6.dest_blend_factor = dst_blend;
}
+static void
+wm_state_init (struct brw_wm_unit_state *wm_state,
+ Bool has_mask,
+ int scratch_offset,
+ int kernel_offset,
+ int sampler_state_offset)
+{
+ memset(wm_state, 0, sizeof (*wm_state));
+ wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
+ wm_state->thread1.single_program_flow = 0;
+
+ assert((scratch_offset & 1023) == 0);
+ wm_state->thread2.scratch_space_base_pointer = scratch_offset >> 10;
+
+ wm_state->thread2.per_thread_scratch_space = PS_SCRATCH_SPACE_LOG;
+ wm_state->thread3.const_urb_entry_read_length = 0;
+ wm_state->thread3.const_urb_entry_read_offset = 0;
+
+ wm_state->thread3.urb_entry_read_offset = 0;
+ /* wm kernel use urb from 3, see wm_program in compiler module */
+ wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
+
+ wm_state->wm4.stats_enable = 1; /* statistic */
+ assert((sampler_state_offset & 31) == 0);
+ wm_state->wm4.sampler_state_pointer = sampler_state_offset >> 5;
+ wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
+ wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
+ wm_state->wm5.transposed_urb_read = 0;
+ wm_state->wm5.thread_dispatch_enable = 1;
+ /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
+ * start point
+ */
+ wm_state->wm5.enable_16_pix = 1;
+ wm_state->wm5.enable_8_pix = 0;
+ wm_state->wm5.early_depth_test = 1;
+
+ assert((kernel_offset & 63) == 0);
+ wm_state->thread0.kernel_start_pointer = kernel_offset >> 6;
+
+ /* Each pair of attributes (src/mask coords) is two URB entries */
+ if (has_mask) {
+ wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
+ wm_state->thread3.urb_entry_read_length = 4;
+ } else {
+ wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
+ wm_state->thread3.urb_entry_read_length = 2;
+ }
+}
+
/**
* Called at EnterVT to fill in our state buffer with any static information.
*/
static void
-gen4_state_init (gen4_state_t *state, uint32_t state_base_offset)
+gen4_state_init (struct gen4_render_state *render_state)
{
- int i, j;
+ int i, j, k, l;
+ gen4_state_t *card_state = render_state->card_state;
+ uint32_t state_base_offset = render_state->card_state_offset;
#define KERNEL_COPY(kernel) \
- memcpy(state->kernel, kernel ## _static, sizeof(kernel ## _static))
+ memcpy(card_state->kernel, kernel ## _static, sizeof(kernel ## _static))
KERNEL_COPY (sip_kernel);
KERNEL_COPY (sf_kernel);
@@ -526,26 +707,90 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset)
KERNEL_COPY (ps_kernel_maskca_srcalpha_projective);
KERNEL_COPY (ps_kernel_masknoca_affine);
KERNEL_COPY (ps_kernel_masknoca_projective);
+#undef KERNEL_COPY
- memset(&state->sampler_default_color, 0,
- sizeof(state->sampler_default_color));
- state->sampler_default_color.color[0] = 0.0; /* R */
- state->sampler_default_color.color[1] = 0.0; /* G */
- state->sampler_default_color.color[2] = 0.0; /* B */
- state->sampler_default_color.color[3] = 0.0; /* A */
+ /* Set up the vertex shader to be disabled (passthrough) */
+ memset(&card_state->vs_state, 0, sizeof(card_state->vs_state));
+ card_state->vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES;
+ card_state->vs_state.thread4.urb_entry_allocation_size =
+ URB_VS_ENTRY_SIZE - 1;
+ card_state->vs_state.vs6.vs_enable = 0;
+ card_state->vs_state.vs6.vert_cache_disable = 1;
+
+ /* Set up the sampler default color (always transparent black) */
+ memset(&card_state->sampler_default_color, 0,
+ sizeof(card_state->sampler_default_color));
+ card_state->sampler_default_color.color[0] = 0.0; /* R */
+ card_state->sampler_default_color.color[1] = 0.0; /* G */
+ card_state->sampler_default_color.color[2] = 0.0; /* B */
+ card_state->sampler_default_color.color[3] = 0.0; /* A */
+
+ card_state->cc_viewport.min_depth = -1.e35;
+ card_state->cc_viewport.max_depth = 1.e35;
+
+ sf_state_init (&card_state->sf_state,
+ state_base_offset +
+ offsetof (gen4_state_t, sf_kernel));
+ sf_state_init (&card_state->sf_state_mask,
+ state_base_offset +
+ offsetof (gen4_state_t, sf_kernel_mask));
+
+ for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) {
+ for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) {
+ for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) {
+ for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) {
+ sampler_state_init (&card_state->sampler_state[i][j][k][l][0],
+ i, j,
+ state_base_offset +
+ offsetof (gen4_state_t,
+ sampler_default_color));
+ sampler_state_init (&card_state->sampler_state[i][j][k][l][1],
+ k, l,
+ state_base_offset +
+ offsetof (gen4_state_t,
+ sampler_default_color));
+ }
+ }
+ }
+ }
- state->cc_viewport.min_depth = -1.e35;
- state->cc_viewport.max_depth = 1.e35;
for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) {
for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) {
- cc_state_init (&state->cc_state[i][j].state, i, j,
+ cc_state_init (&card_state->cc_state[i][j].state, i, j,
state_base_offset +
offsetof (gen4_state_t, cc_viewport));
}
}
-#undef KERNEL_COPY
+#define SETUP_WM_STATE(kernel, has_mask) \
+ wm_state_init(&card_state->wm_state_ ## kernel [i][j][k][l], \
+ has_mask, \
+ state_base_offset + offsetof(gen4_state_t, \
+ wm_scratch), \
+ state_base_offset + offsetof(gen4_state_t, \
+ ps_kernel_ ## kernel), \
+ state_base_offset + offsetof(gen4_state_t, \
+ sampler_state[i][j][k][l]));
+
+
+ for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) {
+ for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) {
+ for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) {
+ for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) {
+ SETUP_WM_STATE (nomask_affine, FALSE);
+ SETUP_WM_STATE (nomask_projective, FALSE);
+ SETUP_WM_STATE (maskca_affine, TRUE);
+ SETUP_WM_STATE (maskca_projective, TRUE);
+ SETUP_WM_STATE (maskca_srcalpha_affine, TRUE);
+ SETUP_WM_STATE (maskca_srcalpha_projective, TRUE);
+ SETUP_WM_STATE (masknoca_affine, TRUE);
+ SETUP_WM_STATE (masknoca_projective, TRUE);
+ }
+ }
+ }
+ }
+#undef SETUP_WM_STATE
}
static uint32_t
@@ -559,9 +804,95 @@ i965_get_card_format(PicturePtr pPict)
if (i965_tex_formats[i].fmt == pPict->format)
break;
}
+ assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]));
+
return i965_tex_formats[i].card_fmt;
}
+static sampler_state_filter_t
+sampler_state_filter_from_picture (int filter)
+{
+ switch (filter) {
+ case PictFilterNearest:
+ return SAMPLER_STATE_FILTER_NEAREST;
+ case PictFilterBilinear:
+ return SAMPLER_STATE_FILTER_BILINEAR;
+ default:
+ return -1;
+ }
+}
+
+static sampler_state_extend_t
+sampler_state_extend_from_picture (int repeat)
+{
+ switch (repeat) {
+ case RepeatNone:
+ return SAMPLER_STATE_EXTEND_NONE;
+ case RepeatNormal:
+ return SAMPLER_STATE_EXTEND_REPEAT;
+ default:
+ return -1;
+ }
+}
+
+/**
+ * Sets up the common fields for a surface state buffer for the given picture
+ * in the surface state buffer at index, and returns the offset within the
+ * state buffer for this entry.
+ */
+static unsigned int
+i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss,
+ PicturePtr pPicture, PixmapPtr pPixmap,
+ Bool is_dst)
+{
+ I830Ptr pI830 = I830PTR(pScrn);
+ struct gen4_render_state *render_state= pI830->gen4_render_state;
+ gen4_state_t *card_state = render_state->card_state;
+ struct brw_surface_state local_ss;
+ uint32_t offset;
+
+ /* Since ss is a pointer to WC memory, do all of our bit operations
+ * into a local temporary first.
+ */
+ memset(&local_ss, 0, sizeof(local_ss));
+ local_ss.ss0.surface_type = BRW_SURFACE_2D;
+ if (is_dst) {
+ uint32_t dst_format;
+
+ assert(i965_get_dest_format(pPicture, &dst_format) == TRUE);
+ local_ss.ss0.surface_format = dst_format;
+ } else {
+ local_ss.ss0.surface_format = i965_get_card_format(pPicture);
+ }
+
+ local_ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
+ local_ss.ss0.writedisable_alpha = 0;
+ local_ss.ss0.writedisable_red = 0;
+ local_ss.ss0.writedisable_green = 0;
+ local_ss.ss0.writedisable_blue = 0;
+ local_ss.ss0.color_blend = 1;
+ local_ss.ss0.vert_line_stride = 0;
+ local_ss.ss0.vert_line_stride_ofs = 0;
+ local_ss.ss0.mipmap_layout_mode = 0;
+ local_ss.ss0.render_cache_read_mode = 0;
+ local_ss.ss1.base_addr = intel_get_pixmap_offset(pPixmap);
+
+ local_ss.ss2.mip_count = 0;
+ local_ss.ss2.render_target_rotation = 0;
+ local_ss.ss2.height = pPixmap->drawable.height - 1;
+ local_ss.ss2.width = pPixmap->drawable.width - 1;
+ local_ss.ss3.pitch = intel_get_pixmap_pitch(pPixmap) - 1;
+ local_ss.ss3.tile_walk = 0; /* Tiled X */
+ local_ss.ss3.tiled_surface = i830_pixmap_tiled(pPixmap);
+
+ memcpy(ss, &local_ss, sizeof(local_ss));
+
+ offset = (char *)ss - (char *)card_state;
+ assert((offset & 31) == 0);
+
+ return offset;
+}
+
Bool
i965_prepare_composite(int op, PicturePtr pSrcPicture,
PicturePtr pMaskPicture, PicturePtr pDstPicture,
@@ -569,36 +900,26 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
{
ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
I830Ptr pI830 = I830PTR(pScrn);
- uint32_t src_offset, src_pitch, src_tile_format = 0, src_tiled = 0;
- uint32_t mask_offset = 0, mask_pitch = 0, mask_tile_format = 0,
- mask_tiled = 0;
- uint32_t dst_format, dst_offset, dst_pitch, dst_tile_format = 0,
- dst_tiled = 0;
+ struct gen4_render_state *render_state= pI830->gen4_render_state;
+ gen4_state_t *card_state = render_state->card_state;
+ struct brw_surface_state_padded *ss;
+ uint32_t sf_state_offset;
+ sampler_state_filter_t src_filter, mask_filter;
+ sampler_state_extend_t src_extend, mask_extend;
Bool is_affine_src, is_affine_mask, is_affine;
+ int urb_vs_start, urb_vs_size;
+ int urb_gs_start, urb_gs_size;
+ int urb_clip_start, urb_clip_size;
+ int urb_sf_start, urb_sf_size;
+ int urb_cs_start, urb_cs_size;
+ char *state_base;
+ int state_base_offset;
+ uint32_t src_blend, dst_blend;
+ uint32_t *binding_table;
IntelEmitInvarientState(pScrn);
*pI830->last_3d = LAST_3D_RENDER;
- src_offset = intel_get_pixmap_offset(pSrc);
- src_pitch = intel_get_pixmap_pitch(pSrc);
- if (i830_pixmap_tiled(pSrc)) {
- src_tiled = 1;
- src_tile_format = 0; /* Tiled X */
- }
- dst_offset = intel_get_pixmap_offset(pDst);
- dst_pitch = intel_get_pixmap_pitch(pDst);
- if (i830_pixmap_tiled(pDst)) {
- dst_tiled = 1;
- dst_tile_format = 0; /* Tiled X */
- }
- if (pMask) {
- mask_offset = intel_get_pixmap_offset(pMask);
- mask_pitch = intel_get_pixmap_pitch(pMask);
- if (i830_pixmap_tiled(pMask)) {
- mask_tiled = 1;
- mask_tile_format = 0; /* Tiled X */
- }
- }
pI830->scale_units[0][0] = pSrc->drawable.width;
pI830->scale_units[0][1] = pSrc->drawable.height;
@@ -619,81 +940,10 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
is_affine = is_affine_src && is_affine_mask;
- /* setup 3d pipeline state */
-
- binding_table_entries = 2; /* default no mask */
-
- /* Set up our layout of state in framebuffer. First the general state: */
- next_offset = offsetof(gen4_state_t, other_state);
- vs_offset = ALIGN(next_offset, 64);
- next_offset = vs_offset + sizeof(*vs_state);
-
- sf_offset = ALIGN(next_offset, 32);
- next_offset = sf_offset + sizeof(*sf_state);
-
- wm_offset = ALIGN(next_offset, 32);
- next_offset = wm_offset + sizeof(*wm_state);
-
- wm_scratch_offset = ALIGN(next_offset, 1024);
- next_offset = wm_scratch_offset + PS_SCRATCH_SPACE * PS_MAX_THREADS;
-
- /* for texture sampler */
- src_sampler_offset = ALIGN(next_offset, 32);
- next_offset = src_sampler_offset + sizeof(*src_sampler_state);
-
- if (pMask) {
- mask_sampler_offset = ALIGN(next_offset, 32);
- next_offset = mask_sampler_offset + sizeof(*mask_sampler_state);
- }
- /* Align VB to native size of elements, for safety */
- vb_offset = ALIGN(next_offset, 32);
- next_offset = vb_offset + vb_size;
-
- /* And then the general state: */
- dest_surf_offset = ALIGN(next_offset, 32);
- next_offset = dest_surf_offset + sizeof(*dest_surf_state);
-
- src_surf_offset = ALIGN(next_offset, 32);
- next_offset = src_surf_offset + sizeof(*src_surf_state);
-
- if (pMask) {
- mask_surf_offset = ALIGN(next_offset, 32);
- next_offset = mask_surf_offset + sizeof(*mask_surf_state);
- binding_table_entries = 3;
- }
-
- binding_table_offset = ALIGN(next_offset, 32);
- next_offset = binding_table_offset + (binding_table_entries * 4);
-
- total_state_size = next_offset;
- assert(total_state_size < sizeof(gen4_state_t));
-
state_base_offset = pI830->gen4_render_state_mem->offset;
assert((state_base_offset & 63) == 0);
state_base = (char *)(pI830->FbBase + state_base_offset);
- binding_table = (void *)(state_base + binding_table_offset);
-
- vb = (void *)(state_base + vb_offset);
-
- /* Set up a default static partitioning of the URB, which is supposed to
- * allow anything we would want to do, at potentially lower performance.
- */
-#define URB_CS_ENTRY_SIZE 0
-#define URB_CS_ENTRIES 0
-
-#define URB_VS_ENTRY_SIZE 1 // each 512-bit row
-#define URB_VS_ENTRIES 8 // we needs at least 8 entries
-
-#define URB_GS_ENTRY_SIZE 0
-#define URB_GS_ENTRIES 0
-
-#define URB_CLIP_ENTRY_SIZE 0
-#define URB_CLIP_ENTRIES 0
-
-#define URB_SF_ENTRY_SIZE 2
-#define URB_SF_ENTRIES 1
-
urb_vs_start = 0;
urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
urb_gs_start = urb_vs_start + urb_vs_size;
@@ -705,337 +955,69 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
urb_cs_start = urb_sf_start + urb_sf_size;
urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
- /* Because we only have a single static buffer for our state currently,
- * we have to sync before updating it every time.
- */
- i830WaitSync(pScrn);
-
i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format,
&src_blend, &dst_blend);
- /* Set up the state buffer for the destination surface */
- dest_surf_state = &dest_surf_state_local;
- memset(dest_surf_state, 0, sizeof(*dest_surf_state));
- dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
- dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
- if (!i965_get_dest_format(pDstPicture, &dst_format))
- return FALSE;
- dest_surf_state->ss0.surface_format = dst_format;
-
- dest_surf_state->ss0.writedisable_alpha = 0;
- dest_surf_state->ss0.writedisable_red = 0;
- dest_surf_state->ss0.writedisable_green = 0;
- dest_surf_state->ss0.writedisable_blue = 0;
- dest_surf_state->ss0.color_blend = 1;
- dest_surf_state->ss0.vert_line_stride = 0;
- dest_surf_state->ss0.vert_line_stride_ofs = 0;
- dest_surf_state->ss0.mipmap_layout_mode = 0;
- dest_surf_state->ss0.render_cache_read_mode = 0;
-
- dest_surf_state->ss1.base_addr = dst_offset;
- dest_surf_state->ss2.height = pDst->drawable.height - 1;
- dest_surf_state->ss2.width = pDst->drawable.width - 1;
- dest_surf_state->ss2.mip_count = 0;
- dest_surf_state->ss2.render_target_rotation = 0;
- dest_surf_state->ss3.pitch = dst_pitch - 1;
- dest_surf_state->ss3.tile_walk = dst_tile_format;
- dest_surf_state->ss3.tiled_surface = dst_tiled;
-
- dest_surf_state = (void *)(state_base + dest_surf_offset);
- memcpy (dest_surf_state, &dest_surf_state_local, sizeof (dest_surf_state_local));
-
- /* Set up the source surface state buffer */
- src_surf_state = &src_surf_state_local;
- memset(src_surf_state, 0, sizeof(*src_surf_state));
- src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
- src_surf_state->ss0.surface_format = i965_get_card_format(pSrcPicture);
-
- src_surf_state->ss0.writedisable_alpha = 0;
- src_surf_state->ss0.writedisable_red = 0;
- src_surf_state->ss0.writedisable_green = 0;
- src_surf_state->ss0.writedisable_blue = 0;
- src_surf_state->ss0.color_blend = 1;
- src_surf_state->ss0.vert_line_stride = 0;
- src_surf_state->ss0.vert_line_stride_ofs = 0;
- src_surf_state->ss0.mipmap_layout_mode = 0;
- src_surf_state->ss0.render_cache_read_mode = 0;
-
- src_surf_state->ss1.base_addr = src_offset;
- src_surf_state->ss2.width = pSrc->drawable.width - 1;
- src_surf_state->ss2.height = pSrc->drawable.height - 1;
- src_surf_state->ss2.mip_count = 0;
- src_surf_state->ss2.render_target_rotation = 0;
- src_surf_state->ss3.pitch = src_pitch - 1;
- src_surf_state->ss3.tile_walk = src_tile_format;
- src_surf_state->ss3.tiled_surface = src_tiled;
-
- src_surf_state = (void *)(state_base + src_surf_offset);
- memcpy (src_surf_state, &src_surf_state_local, sizeof (src_surf_state_local));
-
- /* setup mask surface */
- if (pMask) {
- mask_surf_state = &mask_surf_state_local;
- memset(mask_surf_state, 0, sizeof(*mask_surf_state));
- mask_surf_state->ss0.surface_type = BRW_SURFACE_2D;
- mask_surf_state->ss0.surface_format =
- i965_get_card_format(pMaskPicture);
-
- mask_surf_state->ss0.writedisable_alpha = 0;
- mask_surf_state->ss0.writedisable_red = 0;
- mask_surf_state->ss0.writedisable_green = 0;
- mask_surf_state->ss0.writedisable_blue = 0;
- mask_surf_state->ss0.color_blend = 1;
- mask_surf_state->ss0.vert_line_stride = 0;
- mask_surf_state->ss0.vert_line_stride_ofs = 0;
- mask_surf_state->ss0.mipmap_layout_mode = 0;
- mask_surf_state->ss0.render_cache_read_mode = 0;
-
- mask_surf_state->ss1.base_addr = mask_offset;
- mask_surf_state->ss2.width = pMask->drawable.width - 1;
- mask_surf_state->ss2.height = pMask->drawable.height - 1;
- mask_surf_state->ss2.mip_count = 0;
- mask_surf_state->ss2.render_target_rotation = 0;
- mask_surf_state->ss3.pitch = mask_pitch - 1;
- mask_surf_state->ss3.tile_walk = mask_tile_format;
- mask_surf_state->ss3.tiled_surface = mask_tiled;
-
- mask_surf_state = (void *)(state_base + mask_surf_offset);
- memcpy (mask_surf_state, &mask_surf_state_local, sizeof (mask_surf_state_local));
- }
-
- /* Set up a binding table for our surfaces. Only the PS will use it */
- binding_table[0] = state_base_offset + dest_surf_offset;
- binding_table[1] = state_base_offset + src_surf_offset;
- if (pMask)
- binding_table[2] = state_base_offset + mask_surf_offset;
-
- /* PS kernel use this sampler */
- src_sampler_state = &src_sampler_state_local;
- memset(src_sampler_state, 0, sizeof(*src_sampler_state));
- src_sampler_state->ss0.lod_preclamp = 1; /* GL mode */
- switch(pSrcPicture->filter) {
- case PictFilterNearest:
- src_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
- src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
- break;
- case PictFilterBilinear:
- src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
- src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
- break;
- default:
- I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter);
- }
-
- src_sampler_state->ss0.default_color_mode = 0; /* GL mode */
-
- if (!pSrcPicture->repeat) {
- src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
- src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
- src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
- src_sampler_state->ss2.default_color_pointer =
- (state_base_offset +
- offsetof(gen4_state_t, sampler_default_color)) >> 5;
- } else {
- src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- }
- src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
-
- src_sampler_state = (void *)(state_base + src_sampler_offset);
- memcpy (src_sampler_state, &src_sampler_state_local, sizeof (src_sampler_state_local));
-
- if (pMask) {
- mask_sampler_state = &mask_sampler_state_local;
- memset(mask_sampler_state, 0, sizeof(*mask_sampler_state));
- mask_sampler_state->ss0.lod_preclamp = 1; /* GL mode */
- switch(pMaskPicture->filter) {
- case PictFilterNearest:
- mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
- mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
- break;
- case PictFilterBilinear:
- mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
- mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
- break;
- default:
- I830FALLBACK("Bad filter 0x%x\n", pMaskPicture->filter);
- }
-
- mask_sampler_state->ss0.default_color_mode = 0; /* GL mode */
- if (!pMaskPicture->repeat) {
- mask_sampler_state->ss1.r_wrap_mode =
- BRW_TEXCOORDMODE_CLAMP_BORDER;
- mask_sampler_state->ss1.s_wrap_mode =
- BRW_TEXCOORDMODE_CLAMP_BORDER;
- mask_sampler_state->ss1.t_wrap_mode =
- BRW_TEXCOORDMODE_CLAMP_BORDER;
- mask_sampler_state->ss2.default_color_pointer =
- (state_base_offset +
- offsetof(gen4_state_t, sampler_default_color)) >> 5;
- } else {
- mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- }
- mask_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
-
- mask_sampler_state = (void *)(state_base + mask_sampler_offset);
- memcpy (mask_sampler_state, &mask_sampler_state_local, sizeof (mask_sampler_state_local));
+ if ((render_state->binding_table_index + 3 >=
+ ARRAY_SIZE(card_state->binding_table)) ||
+ (render_state->surface_state_index + 3 >=
+ ARRAY_SIZE(card_state->surface_state)))
+ {
+ i830WaitSync(pScrn);
+ render_state->binding_table_index = 0;
+ render_state->surface_state_index = 0;
}
- /* Set up the vertex shader to be disabled (passthrough) */
- vs_state = &vs_state_local;
- memset(vs_state, 0, sizeof(*vs_state));
- vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
- vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
- vs_state->vs6.vs_enable = 0;
- vs_state->vs6.vert_cache_disable = 1;
-
- vs_state = (void *)(state_base + vs_offset);
- memcpy (vs_state, &vs_state_local, sizeof (vs_state_local));
-
- /* Set up the SF kernel to do coord interp: for each attribute,
- * calculate dA/dx and dA/dy. Hand these interpolation coefficients
- * back to SF which then hands pixels off to WM.
+ binding_table = card_state->binding_table +
+ render_state->binding_table_index;
+ ss = card_state->surface_state + render_state->surface_state_index;
+ /* We only use 2 or 3 entries, but the table has to be 32-byte
+ * aligned.
*/
- sf_state = &sf_state_local;
- memset(sf_state, 0, sizeof(*sf_state));
+ render_state->binding_table_index += 8;
+ render_state->surface_state_index += (pMask != NULL) ? 3 : 2;
+
+ /* Set up and bind the state buffer for the destination surface */
+ binding_table[0] = state_base_offset +
+ i965_set_picture_surface_state(pScrn,
+ &ss[0].state,
+ pDstPicture, pDst, TRUE);
+
+ /* Set up and bind the source surface state buffer */
+ binding_table[1] = state_base_offset +
+ i965_set_picture_surface_state(pScrn,
+ &ss[1].state,
+ pSrcPicture, pSrc, FALSE);
if (pMask) {
- sf_state->thread0.kernel_start_pointer = (state_base_offset +
- offsetof(gen4_state_t, sf_kernel_mask)) >> 6;
+ /* Set up and bind the mask surface state buffer */
+ binding_table[2] = state_base_offset +
+ i965_set_picture_surface_state(pScrn,
+ &ss[2].state,
+ pMaskPicture, pMask,
+ FALSE);
} else {
- sf_state->thread0.kernel_start_pointer = (state_base_offset +
- offsetof(gen4_state_t, sf_kernel)) >> 6;
+ binding_table[2] = 0;
}
- sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
- sf_state->sf1.single_program_flow = 1;
- sf_state->sf1.binding_table_entry_count = 0;
- sf_state->sf1.thread_priority = 0;
- sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
- sf_state->sf1.illegal_op_exception_enable = 1;
- sf_state->sf1.mask_stack_exception_enable = 1;
- sf_state->sf1.sw_exception_enable = 1;
- sf_state->thread2.per_thread_scratch_space = 0;
- /* scratch space is not used in our kernel */
- sf_state->thread2.scratch_space_base_pointer = 0;
- sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
- sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
- sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
- /* don't smash vertex header, read start from dw8 */
- sf_state->thread3.urb_entry_read_offset = 1;
- sf_state->thread3.dispatch_grf_start_reg = 3;
- sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
- sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
- sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
- sf_state->thread4.stats_enable = 1;
- sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
- sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
- sf_state->sf6.scissor = 0;
- sf_state->sf7.trifan_pv = 2;
- sf_state->sf6.dest_org_vbias = 0x8;
- sf_state->sf6.dest_org_hbias = 0x8;
- sf_state = (void *)(state_base + sf_offset);
- memcpy (sf_state, &sf_state_local, sizeof (sf_state_local));
-
- /* Set up the PS kernel (dispatched by WM) */
- wm_state = &wm_state_local;
- memset(wm_state, 0, sizeof (*wm_state));
- if (pMask) {
- if (pMaskPicture->componentAlpha &&
- PICT_FORMAT_RGB(pMaskPicture->format))
- {
- if (i965_blend_op[op].src_alpha) {
- if (is_affine) {
- wm_state->thread0.kernel_start_pointer =
- (state_base_offset +
- offsetof(gen4_state_t,
- ps_kernel_maskca_srcalpha_affine)) >> 6;
- } else {
- wm_state->thread0.kernel_start_pointer =
- (state_base_offset +
- offsetof(gen4_state_t,
- ps_kernel_maskca_srcalpha_projective)) >> 6;
- }
- } else {
- if (is_affine) {
- wm_state->thread0.kernel_start_pointer =
- (state_base_offset +
- offsetof(gen4_state_t,
- ps_kernel_maskca_affine)) >> 6;
- } else {
- wm_state->thread0.kernel_start_pointer =
- (state_base_offset +
- offsetof(gen4_state_t,
- ps_kernel_maskca_projective)) >> 6;
- }
- }
- } else {
- if (is_affine) {
- wm_state->thread0.kernel_start_pointer =
- (state_base_offset +
- offsetof(gen4_state_t,
- ps_kernel_masknoca_affine)) >> 6;
- } else {
- wm_state->thread0.kernel_start_pointer =
- (state_base_offset +
- offsetof(gen4_state_t,
- ps_kernel_masknoca_projective)) >> 6;
- }
- }
+ src_filter = sampler_state_filter_from_picture (pSrcPicture->filter);
+ if (src_filter < 0)
+ I830FALLBACK ("Bad src filter 0x%x\n", pSrcPicture->filter);
+ src_extend = sampler_state_extend_from_picture (pSrcPicture->repeat);
+ if (src_extend < 0)
+ I830FALLBACK ("Bad src repeat 0x%x\n", pSrcPicture->repeat);
+
+ if (pMaskPicture) {
+ mask_filter = sampler_state_filter_from_picture (pMaskPicture->filter);
+ if (mask_filter < 0)
+ I830FALLBACK ("Bad mask filter 0x%x\n", pMaskPicture->filter);
+ mask_extend = sampler_state_extend_from_picture (pMaskPicture->repeat);
+ if (mask_extend < 0)
+ I830FALLBACK ("Bad mask repeat 0x%x\n", pMaskPicture->repeat);
} else {
- if (is_affine) {
- wm_state->thread0.kernel_start_pointer =
- (state_base_offset +
- offsetof(gen4_state_t,
- ps_kernel_nomask_affine)) >> 6;
- } else {
- wm_state->thread0.kernel_start_pointer =
- (state_base_offset +
- offsetof(gen4_state_t,
- ps_kernel_nomask_projective)) >> 6;
- }
+ mask_filter = SAMPLER_STATE_FILTER_NEAREST;
+ mask_extend = SAMPLER_STATE_EXTEND_NONE;
}
- wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
- wm_state->thread1.single_program_flow = 0;
- if (!pMask)
- wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
- else
- wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
-
- wm_state->thread2.scratch_space_base_pointer = (state_base_offset +
- wm_scratch_offset)>>10;
- wm_state->thread2.per_thread_scratch_space = PS_SCRATCH_SPACE_LOG;
- wm_state->thread3.const_urb_entry_read_length = 0;
- wm_state->thread3.const_urb_entry_read_offset = 0;
- /* Each pair of attributes (src/mask coords) is one URB entry */
- if (pMask)
- wm_state->thread3.urb_entry_read_length = 4;
- else
- wm_state->thread3.urb_entry_read_length = 2;
- wm_state->thread3.urb_entry_read_offset = 0;
- /* wm kernel use urb from 3, see wm_program in compiler module */
- wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
-
- wm_state->wm4.stats_enable = 1; /* statistic */
- wm_state->wm4.sampler_state_pointer = (state_base_offset +
- src_sampler_offset) >> 5;
- wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
- wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
- wm_state->wm5.transposed_urb_read = 0;
- wm_state->wm5.thread_dispatch_enable = 1;
- /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
- * start point
- */
- wm_state->wm5.enable_16_pix = 1;
- wm_state->wm5.enable_8_pix = 0;
- wm_state->wm5.early_depth_test = 1;
-
- wm_state = (void *)(state_base + wm_offset);
- memcpy (wm_state, &wm_state_local, sizeof (wm_state_local));
-
/* Begin the long sequence of commands needed to set up the 3D
* rendering pipe
*/
@@ -1096,7 +1078,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
/* Only the PS uses the binding table */
- OUT_BATCH(state_base_offset + binding_table_offset); /* ps */
+ assert((((unsigned char *)binding_table - pI830->FbBase) & 31) == 0);
+ OUT_BATCH((unsigned char *)binding_table - pI830->FbBase);
/* The drawing rectangle clipping is always on. Set it to values that
* shouldn't do any clipping.
@@ -1114,12 +1097,66 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
/* Set the pointers to the 3d pipeline state */
OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5);
- OUT_BATCH(state_base_offset + vs_offset); /* 32 byte aligned */
+ assert((offsetof(gen4_state_t, vs_state) & 31) == 0);
+ OUT_BATCH(state_base_offset + offsetof(gen4_state_t, vs_state));
OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
- OUT_BATCH(state_base_offset + sf_offset); /* 32 byte aligned */
- OUT_BATCH(state_base_offset + wm_offset); /* 32 byte aligned */
+
+ if (pMask) {
+ sf_state_offset = state_base_offset +
+ offsetof(gen4_state_t, sf_state_mask);
+ } else {
+ sf_state_offset = state_base_offset +
+ offsetof(gen4_state_t, sf_state);
+ }
+ assert((sf_state_offset & 31) == 0);
+ OUT_BATCH(sf_state_offset);
+
+ /* Shorthand for long array lookup */
+#define OUT_WM_KERNEL(kernel) do { \
+ uint32_t offset = state_base_offset + \
+ offsetof(gen4_state_t, \
+ wm_state_ ## kernel \
+ [src_filter] \
+ [src_extend] \
+ [mask_filter] \
+ [mask_extend]); \
+ assert((offset & 31) == 0); \
+ OUT_BATCH(offset); \
+} while (0)
+
+ if (pMask) {
+ if (pMaskPicture->componentAlpha &&
+ PICT_FORMAT_RGB(pMaskPicture->format))
+ {
+ if (i965_blend_op[op].src_alpha) {
+ if (is_affine)
+ OUT_WM_KERNEL(maskca_srcalpha_affine);
+ else
+ OUT_WM_KERNEL(maskca_srcalpha_projective);
+ } else {
+ if (is_affine)
+ OUT_WM_KERNEL(maskca_affine);
+ else
+ OUT_WM_KERNEL(maskca_projective);
+ }
+ } else {
+ if (is_affine)
+ OUT_WM_KERNEL(masknoca_affine);
+ else
+ OUT_WM_KERNEL(masknoca_projective);
+ }
+ } else {
+ if (is_affine)
+ OUT_WM_KERNEL(nomask_affine);
+ else
+ OUT_WM_KERNEL(nomask_projective);
+ }
+#undef OUT_WM_KERNEL
+
/* 64 byte aligned */
+ assert((offsetof(gen4_state_t,
+ cc_state[src_blend][dst_blend]) & 63) == 0);
OUT_BATCH(state_base_offset +
offsetof(gen4_state_t, cc_state[src_blend][dst_blend]));
@@ -1173,7 +1210,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
VB0_VERTEXDATA |
((4 * (2 + nelem * selem)) << VB0_BUFFER_PITCH_SHIFT));
- OUT_BATCH(state_base_offset + vb_offset);
+ OUT_BATCH(state_base_offset + offsetof(gen4_state_t, vb));
OUT_BATCH(3);
OUT_BATCH(0); // ignore for VERTEXDATA, but still there
@@ -1231,9 +1268,11 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
{
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
I830Ptr pI830 = I830PTR(pScrn);
+ gen4_state_t *card_state = pI830->gen4_render_state->card_state;
Bool has_mask;
Bool is_affine_src, is_affine_mask, is_affine;
float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
+ float *vb = card_state->vb;
int i;
is_affine_src = i830_transform_is_affine (pI830->transform[0]);
@@ -1357,7 +1396,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
if (!is_affine)
vb[i++] = mask_w[0];
}
- assert (i * 4 <= vb_size);
+ assert (i * 4 <= sizeof(card_state->vb));
{
BEGIN_BATCH(6);
@@ -1406,17 +1445,18 @@ void
gen4_render_state_init(ScrnInfoPtr pScrn)
{
I830Ptr pI830 = I830PTR(pScrn);
- struct gen4_render_state *state;
+ struct gen4_render_state *render_state;
if (pI830->gen4_render_state == NULL)
- pI830->gen4_render_state = calloc(sizeof(*state), 1);
+ pI830->gen4_render_state = calloc(sizeof(*render_state), 1);
- state = pI830->gen4_render_state;
+ render_state = pI830->gen4_render_state;
- state->state_offset = pI830->gen4_render_state_mem->offset;
- state->state_addr = pI830->FbBase + pI830->gen4_render_state_mem->offset;
+ render_state->card_state_offset = pI830->gen4_render_state_mem->offset;
+ render_state->card_state = (gen4_state_t *)
+ (pI830->FbBase + render_state->card_state_offset);
- gen4_state_init((gen4_state_t *)state->state_addr, state->state_offset);
+ gen4_state_init(render_state);
}
/**
@@ -1427,16 +1467,7 @@ gen4_render_state_cleanup(ScrnInfoPtr pScrn)
{
I830Ptr pI830 = I830PTR(pScrn);
- pI830->gen4_render_state->state_addr = NULL;
-}
-
-/**
- * Called when the hardware is idled and flushed, so we know we can
- * reuse the buffer contents.
- */
-void
-gen4_render_state_reset(ScrnInfoPtr pScrn)
-{
+ pI830->gen4_render_state->card_state = NULL;
}
unsigned int
diff --git a/src/i965_video.c b/src/i965_video.c
index 464f2e31..485c89a0 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -171,8 +171,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
int urb_sf_start, urb_sf_size;
int urb_cs_start, urb_cs_size;
struct brw_surface_state *dest_surf_state;
- struct brw_surface_state *src_surf_state[3];
- struct brw_sampler_state *src_sampler_state[3];
+ struct brw_surface_state *src_surf_state[6];
+ struct brw_sampler_state *src_sampler_state[6];
struct brw_vs_unit_state *vs_state;
struct brw_sf_unit_state *sf_state;
struct brw_wm_unit_state *wm_state;
@@ -185,7 +185,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
float src_scale_x, src_scale_y;
uint32_t *binding_table;
Bool first_output = TRUE;
- int dest_surf_offset, src_surf_offset[3], src_sampler_offset[3], vs_offset;
+ int dest_surf_offset, src_surf_offset[6], src_sampler_offset[6], vs_offset;
int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
int wm_scratch_offset;
int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
@@ -197,10 +197,10 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
int src_surf;
int n_src_surf;
uint32_t src_surf_format;
- uint32_t src_surf_base[3];
- int src_width[3];
- int src_height[3];
- int src_pitch[3];
+ uint32_t src_surf_base[6];
+ int src_width[6];
+ int src_height[6];
+ int src_pitch[6];
int wm_binding_table_entries;
const uint32_t *ps_kernel_static;
int ps_kernel_static_size;
@@ -219,8 +219,11 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
#endif
src_surf_base[0] = pPriv->YBuf0offset;
- src_surf_base[1] = pPriv->VBuf0offset;
- src_surf_base[2] = pPriv->UBuf0offset;
+ src_surf_base[1] = pPriv->YBuf0offset;
+ src_surf_base[2] = pPriv->VBuf0offset;
+ src_surf_base[3] = pPriv->VBuf0offset;
+ src_surf_base[4] = pPriv->UBuf0offset;
+ src_surf_base[5] = pPriv->UBuf0offset;
#if 0
ErrorF ("base 0 0x%x base 1 0x%x base 2 0x%x\n",
src_surf_base[0], src_surf_base[1], src_surf_base[2]);
@@ -250,13 +253,13 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
src_surf_format = BRW_SURFACEFORMAT_R8_UNORM;
ps_kernel_static = &ps_kernel_planar_static[0][0];
ps_kernel_static_size = sizeof (ps_kernel_planar_static);
- src_width[0] = width;
- src_height[0] = height;
- src_pitch[0] = video_pitch * 2;
- src_width[1] = src_width[2] = width / 2;
- src_height[1] = src_height[2] = height / 2;
- src_pitch[1] = src_pitch[2] = video_pitch;
- n_src_surf = 3;
+ src_width[1] = src_width[0] = width;
+ src_width[1] = src_height[0] = height;
+ src_pitch[1] = src_pitch[0] = video_pitch * 2;
+ src_width[4] = src_width[5] = src_width[2] = src_width[3] = width / 2;
+ src_height[4] = src_height[5] = src_height[2] = src_height[3] = height / 2;
+ src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = video_pitch;
+ n_src_surf = 6;
break;
default:
return;